import os import os.path import pyexiv2 import datetime import re from pymisc import tupleslices # Date prediction format: # (from, to, probability) # Last value is the probability that the actual date lies within # the defined range. def dp_file(path): '''Get the timestamp of the file.''' date = datetime.datetime.fromtimestamp(os.path.getmtime(path)) delta = datetime.timedelta(hours = 4) # DST easily gives +- 3 hours return [(date - delta, date + delta, 0.6)] def dp_exif(path): '''Get Exif date.''' image = pyexiv2.Image(path) image.readMetadata() if 'Exif.Image.DateTime' not in image.exifKeys(): return [] date = image['Exif.Image.DateTime'] if not isinstance(date, datetime.datetime): return [] delta = datetime.timedelta(hours = 4) return [(date - delta, date + delta, 0.6)] def dp_olympus(path): '''Check for old olympus data''' data = open(path).read(16384) if not "OLYMPUS" in data: return [] for line in data.split('\n'): if not 'TimeDate' in line: continue match = re.match("^TimeDate=([0-9]+)\s*$", line) if match: date = datetime.datetime.fromtimestamp(int(match.group(1))) delta = datetime.timedelta(hours = 4) return [(date - delta, date + delta, 0.6)] return [] def dp_path(path): '''By path separation''' result = [] parts = os.path.normpath(path).split(os.path.sep) # Remove filename parts = parts[:-1] if 'uudet' in parts: result.append((datetime.datetime(2003, 1, 1), datetime.datetime.now(), 0.8)) elif 'vanhat1' in parts: result.append((datetime.datetime(2001, 1, 1), datetime.datetime(2004, 1, 1), 0.8)) elif 'vanhat2' in parts: result.append((datetime.datetime(1997, 1, 1), datetime.datetime(2002, 1, 1), 0.8)) for year in range(1997, datetime.datetime.now().year + 1): for part in parts: if str(year) in part: result.append((datetime.datetime(year, 1, 1), datetime.datetime(year, 12, 31), 0.8)) return result def predict_date(path): # Get estimates functions = [dp_file, dp_exif, dp_olympus, dp_path] estimates = [] for f in functions: estimates += f(path) # Split timeline to non-overlapping parts splits = [] for efrom, eto, eprob in estimates: splits.append(efrom) splits.append(eto) splits.sort() probs = [] pmax = None for pfrom, pto in tupleslices(splits, 2): pprob = 1.0 for efrom, eto, eprob in estimates: if pfrom >= efrom and pto <= eto: pprob *= eprob else: pprob *= 1 - eprob if not pmax or pprob > pmax[0]: pmax = (pprob, pfrom + (pto - pfrom) / 2) return pmax[1] if __name__ == '__main__': import sys print predict_date(sys.argv[1])