#!/usr/bin/python2.4 # YLE:n sivuilta parsetus import re import urllib import datetime import time def yle_url(date, channel): return "http://www.yle.fi/ohjelmat/index.php?co%5B%5D=" + channel + "&date=" + date.strftime("%Y%m%d") + "&l=f&verbose=no&mode=pda" def yle_parse(file, date): ohjelmat = [] state = 0 startmask = re.compile(r'''^\s*.*

''') mask = re.compile(r'''(\d\d).(\d\d)\s\s(?:)?(?:)?([^<]+)(?:)?(?:)?
''') for line in file: if state == 0: if startmask.match(line): state = 1 continue m = mask.match(line) if m: time = datetime.datetime(date.year, date.month, date.day, int(m.group(1)), int(m.group(2))) name = unicode(m.group(3).strip(), "iso8859-15") ohjelmat.append((time, name)) return ohjelmat def yle_get(date, channel): url = yle_url(date, channel) file = urllib.urlopen(url) return yle_parse(file, date) def yle_minimal(channel): ohjelmat = yle_get(datetime.datetime.today(), channel) now = datetime.datetime.today() if ohjelmat[0][0] > now: # First program not yet started name = u"Ei ohjelmaa" try: nextupdate = ohjelmat[0][0] except IndexError: nextupdate = None else: name = ohjelmat[0][1] try: nextupdate = ohjelmat[1][0] except IndexError: nextupdate = None return name, nextupdate if __name__ == "__main__": print "Content-type: text/plain; charset=ISO-8859-15" print channels = ['tv1', 'tv2', 'mtv', 'nel', 'sub'] nextupdate = None for channel in channels: name, nu = yle_minimal(channel) print channel + "\t" + name.encode("iso8859-15") if nu and (not nextupdate or nextupdate > nu): nextupdate = nu delta = int(nextupdate.strftime("%s")) - time.time() print "Update\t" + str(int(delta))