import sys import os.path class SpellChecker: '''Performs a simple search on /usr/share/dict/finnish.''' def __init__(self, file): self.file = file self.cache = {} self.positioncache = {} self.file.seek(0, 2) self.filesize = self.file.tell() def check(self, word): if not self.cache.has_key(word): self.cache[word] = self.check_from_file(word) return self.cache[word] def check_from_file(self, word): def getword(line): return unicode(line, 'iso8859-15').strip().lower() def goto(position): if position > 0: self.file.seek(position, 0) self.file.readline() else: self.file.seek(0, 0) return getword(self.file.readline()) lword = word.lower() prefix = lword[:3] if self.positioncache.has_key(prefix): position = self.positioncache[prefix] else: position = (ord(lword[0]) - ord('a')) * self.filesize / (ord('z') - ord('a')) while goto(position) >= lword: if position > self.filesize: return False position -= 16384 while goto(position) < lword: if position > self.filesize: return False position += 16384 while goto(position) >= lword: if position <= 0: return False position -= 256 self.positioncache[prefix] = position for line in self.file: line = getword(line) if line == lword: return True if line > lword: return False return False dictfile = '/usr/share/dict/finnish' if os.path.exists(dictfile): checker = SpellChecker(open(dictfile)) else: checker = None def split_word(word): if not checker: return [] subword = "" while word: subword += word[0] word = word[1:] if len(subword) >= 3 and checker.check(subword): rest = split_word(word) if not word or rest: return [subword] + rest return [] if __name__ == '__main__': word = unicode(sys.argv[1], 'utf-8') results = split_word(word) if not results: print "Unknown word" else: print u', '.join(results)