import os import os.path import filecmp import shutil class Image: def __init__(self, path): assert os.path.isfile(path) self.path = path def last_dir_name(self): dirname = os.path.dirname(self.path) return os.path.basename(dirname) def filename(self): return os.path.basename(self.path) def size(self): return os.path.getsize(self.path) def __eq__(self, other): return (self.filename() == other.filename() and self.size() == other.size() and filecmp.cmp(self.path, other.path, 0)) def __hash__(self): return hash(self.filename()) ^ hash(self.size()) def collect_all_files(path, existing = None): if existing is None: existing = set() results = set() dupes = [] def callback(arg, dirname, fnames): print "Current: %20s\r" % os.path.basename(dirname)[:20], fnames.sort() for filename in fnames: f = os.path.join(dirname, filename) if not os.path.isfile(f): continue i = Image(f) if i in results or i in existing: dupes.append(i) else: results.add(i) os.path.walk(path, callback, None) return results, dupes def copy_files_to(path, images): for image in images: this_dir = os.path.join(path, image.last_dir_name()) if not os.path.isdir(this_dir): os.mkdir(this_dir) dest_path = os.path.join(this_dir, image.filename()) if os.path.exists(dest_path) and image.filename() in ['Thumbs.db', '.picasa.ini']: continue while os.path.exists(dest_path): print "File " + dest_path + " already exists, changing name." root, ext = os.path.splitext(dest_path) root += "x" dest_path = root + ext shutil.copy2(image.path, dest_path) if __name__ == '__main__': import sys if len(sys.argv) == 2: assert os.path.isdir(sys.argv[1]) source = sys.argv[1] dest = os.path.dirname(os.path.abspath(__file__)) print "Copying non-dupes to " + dest elif len(sys.argv) == 3 and sys.argv[1] == '-n': assert os.path.isdir(sys.argv[2]) print "Checking only" source = sys.argv[2] dest = None elif len(sys.argv) == 3: assert os.path.isdir(sys.argv[1]) assert os.path.isdir(sys.argv[2]) source = sys.argv[1] dest = sys.argv[2] print "Copying non-dupes to " + dest else: print "Usage: " + sys.argv[0] + " [-n] source [dest]" raise SystemExit oldset = None if dest: oldset, dupes = collect_all_files(dest) if dupes: print "Note: there are", len(dupes), "duplicates in destination path." results, dupes = collect_all_files(source, oldset) print "Found", len(results), "unique files and", len(dupes), "duplicates." if dest: print "Copying..." copy_files_to(dest, results) print "Done."