import os
import os.path
import pyexiv2
import datetime
import re
from pymisc import tupleslices

# Date prediction format:
# (from, to, probability)
# Last value is the probability that the actual date lies within
# the defined range.

def dp_file(path):
    '''Get the timestamp of the file.'''
    date = datetime.datetime.fromtimestamp(os.path.getmtime(path))
    delta = datetime.timedelta(hours = 4) # DST easily gives +- 3 hours
    return [(date - delta, date + delta, 0.6)]

def dp_exif(path):
    '''Get Exif date.'''
    image = pyexiv2.Image(path)
    image.readMetadata()
    
    if 'Exif.Image.DateTime' not in image.exifKeys():
        return []
    
    date = image['Exif.Image.DateTime']
    
    if not isinstance(date, datetime.datetime):
        return []

    delta = datetime.timedelta(hours = 4)
    return [(date - delta, date + delta, 0.6)]

def dp_olympus(path):
    '''Check for old olympus data'''
    data = open(path).read(16384)
    if not "OLYMPUS" in data:
        return []
    
    for line in data.split('\n'):
        if not 'TimeDate' in line:
            continue
        
        match = re.match("^TimeDate=([0-9]+)\s*$", line)
        if match:
            date = datetime.datetime.fromtimestamp(int(match.group(1)))
            delta = datetime.timedelta(hours = 4)
            return [(date - delta, date + delta, 0.6)]
    
    return []

def dp_path(path):
    '''By path separation'''
    result = []
    parts = os.path.normpath(path).split(os.path.sep)
    
    # Remove filename
    parts = parts[:-1]
    
    if 'uudet' in parts:
        result.append((datetime.datetime(2003, 1, 1), datetime.datetime.now(), 0.8))
    elif 'vanhat1' in parts:
        result.append((datetime.datetime(2001, 1, 1), datetime.datetime(2004, 1, 1), 0.8))
    elif 'vanhat2' in parts:
        result.append((datetime.datetime(1997, 1, 1), datetime.datetime(2002, 1, 1), 0.8))
    
    for year in range(1997, datetime.datetime.now().year + 1):
        for part in parts:
            if str(year) in part:
                result.append((datetime.datetime(year, 1, 1),
                               datetime.datetime(year, 12, 31), 0.8))
    
    return result

def predict_date(path):
    # Get estimates
    functions = [dp_file, dp_exif, dp_olympus, dp_path]
    estimates = []
    for f in functions:
        estimates += f(path)
    
    # Split timeline to non-overlapping parts
    splits = []
    for efrom, eto, eprob in estimates:
        splits.append(efrom)
        splits.append(eto)
    splits.sort()
    
    probs = []
    pmax = None
    for pfrom, pto in tupleslices(splits, 2):
        pprob = 1.0
        for efrom, eto, eprob in estimates:
            if pfrom >= efrom and pto <= eto:
                pprob *= eprob
            else:
                pprob *= 1 - eprob
        
        if not pmax or pprob > pmax[0]:
            pmax = (pprob, pfrom + (pto - pfrom) / 2)
    
    return pmax[1]

if __name__ == '__main__':
    import sys
    print predict_date(sys.argv[1])