#! /usr/bin/env python __author__ = "Thomas Bellman " __rcsId__ = """$Id: filmlistor.py,v 1.5 2001/12/29 21:46:16 bellman Exp $""" import sys import string import time import os import urllib2 import cgi import traceback import md5 import cPickle import txtdb _testlists = [ ( None, # "/home/bellman/.public/filmer.txt", "http://www.lysator.liu.se/~bellman/filmer.txt", "Bellman" ), ( "/tmp/trh/min-anime.txt", "http://www.lysator.liu.se/~bellman/anime.txt", "TRH" ), ] class TitleMangler: phrase_separators = [ ":", "--", " --", " - ", ", " ] def find_phrase_end(self, title): end = len(title) for sep in self.phrase_separators: i = string.find(title, sep) if i >= 0 and i < end: end = i return end def __init__(self, ignorelist): self.ignores = map(lambda w: string.lower(w) + " ", ignorelist) def normalize(self, title): lowtit = string.lower(title) phrase_end = self.find_phrase_end(title) for w in self.ignores: if lowtit[:len(w)] == w: title = (title[len(w):phrase_end] + ", " + title[:len(w)-1] + title[phrase_end:]) break return title def compare(self, t1, t2): t1 = string.lower(self.normalize(t1)) t2 = string.lower(self.normalize(t2)) return cmp(t1, t2) standard_ignorewords = [ "The", "A", "An", "Den", "Det", "Der", "Das", "Die", "Den", "Dem", ] standard_mangler = TitleMangler(standard_ignorewords) class TitleCompilation: cachedir = os.path.join(os.getenv("HOME"), ".filmlistecache") def __init__(self, urllist, titlemangler=standard_mangler): self.__lists = urllist self.__mangler = titlemangler def reload(self): dblist = [] titles = [] if not os.path.isdir(self.cachedir): os.mkdir(self.cachedir, 0700) for url, owner in self.__lists: cachefile = self.__cachefile(url, owner) db = txtdb.Txtdb(None, autoreload=0) try: copyfp = open(cachefile) cachet,curl,cowner = cPickle.load(copyfp) except: copyfp = None cachet = -sys.maxint try: ufp = urllib2.urlopen(url) modt = time.mktime(ufp.info().getdate('last-modified')) if not copyfp or modt > cachet: db.load(ufp) copyfp = open(cachefile, "w") cPickle.dump((modt,url,owner), copyfp, 1) db.savepickle(copyfp) copyfp.close() else: db.loadpickle(copyfp) copyfp.close() ufp.close() except: exctype,excvalue,exctb = sys.exc_info() sys.stderr.write("\n\nFailed reading %s's database \"%s\":\n" % (owner, url)) traceback.print_exc(1, sys.stderr) if copyfp: sys.stderr.write("Using cache file %s instead.\n" % (cachefile,)) try: db.loadpickle(copyfp) copyfp.close() except: sys.stderr.write("Failed reading old cache copy:\n") traceback.print_exc(1, sys.stderr) sys.stderr.write("Skipping this database.\n") continue else: sys.stderr.write( "No cache file, skipping this database.\n") continue dblist.append((db, owner)) for t in db: titles.append((t, owner, url)) self.__dbs = dblist self.titles = titles def __cachefile(self, url, owner): cachefile = os.path.join( self.cachedir, owner + '-' + md5.new(url).digest().encode('hex')) + ",p" return cachefile def sort(self, cmpfunc): def cmpf((t1,o1,u1), (t2,o2,u2), cmpfunc=cmpfunc): return cmpfunc(t1.title, t2.title) self.titles.sort(cmpf) def sort_by_title(self): self.sort(self.__mangler.compare) def print_list(self): normalizer = self.__mangler.normalize for t,o,s in self.titles: print "%-60.60s %s" % (normalizer(t.title), o) def htmltable_list(self): rows = [] normalizer = self.__mangler.normalize for t,owner,srcurl in self.titles: url = ahref = aend = "" if hasattr(t, "url"): url = t.url elif hasattr(t, "imdb"): url = t.imdb if url: ahref = '' % (cgi.escape(url, 1),) aend = "" media = string.split(t.media)[0] r = ((' %s%s%s' ' %s' ' %s') % (ahref, normalizer(t.title), aend, media, cgi.escape(srcurl, 1), owner)) rows.append(r) return string.join(rows, "\n")