'''List all tags used in current directory and subdirectories.''' import os from lib.tagfiles import TagCollection from lib.dl_editdistance import dameraulevenshtein def find_similar_tags(tagnames): '''Find tag name pairs that have edit distance of 2 or less.''' for i, tagname1 in enumerate(tagnames): for tagname2 in tagnames[i+1:]: if dameraulevenshtein(tagname1, tagname2) <= 2: yield (tagname1, tagname2) def check_files_exist(path, tagcollection): '''Check for obscure syntax errors causing links to non-existing files.''' for tagname, targets in tagcollection.tags.items(): for relpath in targets: if not os.path.exists(os.path.join(path, relpath)): yield tagname, relpath def main(path, print_all): tc = TagCollection() tc.import_recursively(path, 'Merkinnat.txt') if tc.msgs: print "Import messages (%d):" % len(tc.msgs) for msg in tc.msgs: print msg print '-' * 20 print tagnames = tc.tags.keys() tagnames.sort() tagnamewidth = max([len(t) for t in tagnames]) if print_all: print "Tag name".ljust(tagnamewidth), "Use count" use_counts = [(tagname, len(tc.tags[tagname])) for tagname in tagnames] use_counts.sort(key = lambda t: t[1]) for tagname, count in use_counts: print tagname.ljust(tagnamewidth), count print similar = find_similar_tags(tagnames) for i, (tag1, tag2) in enumerate(similar): if min(len(tc.tags[tag1]), len(tc.tags[tag2])) >= 3: continue if i == 0: print "Similar tags:" print repr(tag1).ljust(tagnamewidth), ', '.join(tc.tags[tag1]) print repr(tag2).ljust(tagnamewidth), ', '.join(tc.tags[tag2]) print for tagname, relpath in check_files_exist(path, tc): print ("ERROR: file " + relpath + " for tag " + tagname + " does not exist!") if __name__ == '__main__': import sys main(os.getcwd(), '--all' in sys.argv)