#!/usr/bin/python2.4
# YLE:n sivuilta parsetus
import re
import urllib
import datetime
import time
def yle_url(date, channel):
return "http://www.yle.fi/ohjelmat/index.php?co%5B%5D=" + channel + "&date=" + date.strftime("%Y%m%d") + "&l=f&verbose=no&mode=pda"
def yle_parse(file, date):
ohjelmat = []
state = 0
startmask = re.compile(r'''^\s*.*
''')
mask = re.compile(r'''(\d\d).(\d\d)\s\s(?:)?(?:)?([^<]+)(?:)?(?:)?
''')
for line in file:
if state == 0:
if startmask.match(line):
state = 1
continue
m = mask.match(line)
if m:
time = datetime.datetime(date.year, date.month, date.day, int(m.group(1)), int(m.group(2)))
name = unicode(m.group(3).strip(), "iso8859-15")
ohjelmat.append((time, name))
return ohjelmat
def yle_get(date, channel):
url = yle_url(date, channel)
file = urllib.urlopen(url)
return yle_parse(file, date)
def yle_minimal(channel):
ohjelmat = yle_get(datetime.datetime.today(), channel)
now = datetime.datetime.today()
if ohjelmat[0][0] > now:
# First program not yet started
name = u"Ei ohjelmaa"
try:
nextupdate = ohjelmat[0][0]
except IndexError:
nextupdate = None
else:
name = ohjelmat[0][1]
try:
nextupdate = ohjelmat[1][0]
except IndexError:
nextupdate = None
return name, nextupdate
if __name__ == "__main__":
print "Content-type: text/plain; charset=ISO-8859-15"
print
channels = ['tv1', 'tv2', 'mtv', 'nel', 'sub']
nextupdate = None
for channel in channels:
name, nu = yle_minimal(channel)
print channel + "\t" + name.encode("iso8859-15")
if nu and (not nextupdate or nextupdate > nu):
nextupdate = nu
delta = int(nextupdate.strftime("%s")) - time.time()
print "Update\t" + str(int(delta))