#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Maildird - Copyright (C) 2005 Jörgen Cederlöf version = "0.9" # Maildird attempts to solve the problem of sorting mail that has # already been delivered to a Maildir. When Procmail wants to be # inserted in the middle of the delivery chain, Maildird calmly waits # until the mail is delivered, notices that a new mail has arrived in # the Maildir it monitors, writes something useful in the log that # (hopefully) is connected to stdout, runs some tests and moves the # mail safely to the correct Maildir. On the other hand it is nothing # more than a few lines of Python, so it performs no magic. # Safety is first priority. Maildird is designed to avoid getting in # situations where mail might be lost. But of course it comes WITHOUT # ANY WARRANTY, see below. # This is currently something of a hack. It should work safely and # reliably, but there is no separate configuration file, command line # arguments or even external documentation. Configure by changing the # source. # I use Maildird like this: # .forward directs mail to /home/jc/mail/unsorted/ # Maildird is running on another computer which mounts that directory # through NFS. A cron job runs once every hour and runs # exec /home/jc/config/maildird >>/home/jc/logs/maildird.log 2>&1 & # if it notices that Maildird is not running. # If a mail is received, Maildird notices within a second, logs and # moves the mail to the right place. # There is not much that can go wrong. If the mail server ignores # .forward every mail is stored in the inbox. If Maildird is not # running every mail is left in the unsorted folder until Maildird is # restarted. # If a mail is sorted incorrectly, I teach Bogofilter or update the # rules and then simply move the mail back to the unsorted mailbox to # let Maildird process it again. # Mostly relevant for Lysator users: The script I use to start # maildird looks like this: # #!/bin/bash # uname -n |grep -v koeberg && echo "Run on koeberg." %% exit 1 # ps -ef |egrep '^ *jc .*/home/jc/config/maildir[d]' >/dev/null && exit 0 # export PATH=/home/jc/bin/koeberg/bin/:$PATH # Contains Bogofilter # exec /home/jc/config/maildird >>/home/jc/logs/maildird.log 2>&1 & # and my crontab on koeberg contains # 05 * * * * /home/jc/config/startmaildird # . /var/mail/jc/.forward contains the single line # /var/mail/jc/Maildir/.unsorted/ # . # Note: Some programs like MUAs and mail syncing programs might add # headers, which will make the mails SHA-1 not match the ones in the # log. For me, running the mail through # sed '1,/^$/{/^\(X-OfflineIMAP\|Content-Length:\)/d}' # before calculating the SHA-1 makes the hash identical to the logged # one. YMMV. # The latest version of maildird is probably available somewhere # around http://www.lysator.liu.se/~jc/hacks/maildird . # Changelog: # Version 0.9: First public release # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. import os import sys import email.Parser import sha import time import socket import time from stat import * srcmaildir = "/home/jc/mail/unsorted/" inbox = "/home/jc/mail/INBOX/" maybespambox = "/home/jc/mail/maybe_spam/" def warning(s): print >>sys.stderr, s sys.stderr.flush() def error(s, r=3): warning(s+"\n") sys.exit(r) # These are the tests that decides where mail goes and why. # Configure at will. testtable = [] # Crypto-Gram looks like spam according to Bogofilter, so we classify # it as ham and don't show it to Bogofilter. def test_cryptogram(mail): if mail['headers']['To'] in ('crypto-gram@chaparraltree.com', 'crypto-gram-list@schneier.com'): return True return False testtable.append(("crypto-gram", test_cryptogram, inbox)) # Administrative mail containing spam is bad for statistics. def test_listadmin(mail): if '-owner@lists.lysator.liu.se' in mail['headers']['From'] or \ '-bounces@lists.lysator.liu.se' in mail['headers']['From']: return True return False testtable.append(("listadmin", test_listadmin, inbox)) def test_spam(mail): # -u updates database. With ham-cutoff == spam-cutoff we enter # two-state mode and will never be unsure. 0.51 seems to be right # for my mail. YMMV. cmd = "bogofilter -v -u -o 0.51,0.51 -I" print "Running %s %s" % (cmd, mail['fullpath']) sys.stdout.flush() # spawnvp() uses $PATH to find program. ret = os.spawnvp(os.P_WAIT, cmd.split()[0], cmd.split() + [mail['fullpath']]) if ret<0: error("bogofilter was killed by signal %d." % -ret) # From bogofilter manual: # 0 for spam; 1 for non-spam; 2 for unsure ; 3 for I/O or other errors. if ret == 0: return True elif ret == 1: return False else: warning("Bogofilter returned %d. It shouldn't." % ret) return None # Retry. testtable.append(("spam", test_spam, maybespambox)) testtable.append(("DEFAULT", lambda mail: True, inbox)) # **************** No more tests. The real code starts here. **************** # There is a more modern and more complicated way to create these, but # most implementations seem to use this method. See # http://cr.yp.to/proto/maildir.html for details. def createmaildirname(__deliveries=[0]): """Create and return a unique name for a Maildir message.""" hostname = socket.gethostname().replace('/', '\\057') \ .replace(':', '\\072') delivery_identifier = "%d_%d" % (os.getpid(), __deliveries[0]) seconds = int(time.time()) __deliveries[0] += 1 return "%s.%s.%s" % (seconds, delivery_identifier, hostname) def movemail(srcfile, dstmaildir): """Move a mail to a Maildir safely. Does not overwrite. Race free. Throws exception on error. Works only inside a single filesystem.""" dstfile = dstmaildir+"/new/"+createmaildirname() print "Moving %s to %s" % (srcfile, dstfile) sys.stdout.flush() # os.rename() silently overwrites files. os.link() doesn't, and # throws exception on error. try: os.link(srcfile, dstfile) except: # The file was probably moved. warning("Error linking %s to %s: %s: %s" % \ (srcfile, dstfile, sys.exc_info()[0], sys.exc_info()[1])) return try: os.unlink(srcfile) except: # The file was probably moved by someone else. I think # deleting the destination file now should be safe, but I'll # let it remain until I am completely convinced it is # perfectly safe. The same mail twice is very cheap, a deleted # mail can be very expensive. warning("Error unlinking %s: %s: %s" % \ (srcfile, sys.exc_info()[0], sys.exc_info()[1])) def sha1sum(f): h = sha.sha() s = f.read(4096) while s: h.update(s) s = f.read(4096) return h.hexdigest() def examine_mail(mailfile): try: f = open(mailfile) except: # The file was probably moved by someone else. warning("Error opening %s: %s: %s" % \ (mailfile, sys.exc_info()[0], sys.exc_info()[1])) return None hexdigest = sha1sum(f) f.seek(0) headers = email.Parser.HeaderParser().parse(f, headersonly=True) f.close() return {'filename': mailfile.split('/')[-1], 'srcdir': '/'.join(mailfile.split('/')[:-1]), 'fullpath': mailfile, 'id': repr(headers['Message-ID']), 'hash': hexdigest, 'headers': headers} def printable_metadata(mail): class repritems: def __init__(self, mapping): self.mapping = mapping def __getitem__(self, key): return repr(self.mapping[key]) s = [] s.append( 'Filename: %s' % mail['filename']) s.append( 'SHA-1: %s' % mail['hash']) s.append(('Message-ID: %(Message-ID)s\n' + 'Subject: %(Subject)s\n' + 'Date: %(Date)s\n' + 'From: %(From)s\n' + 'To: %(To)s\n' + 'Cc: %(Cc)s\n' + 'X-Spam-Status: %(X-Spam-Status)s') % \ repritems(mail['headers'])) return '\n'.join(s) def handlemail(mail): print "Current time: %s" % time.strftime("%Y-%m-%d %H:%M:%S %Z") print printable_metadata(mail) sys.stdout.flush() for name, f, dstmaildir in testtable: match = f(mail) if match is True: print "Mail %s matched test %s. Moving to %s ." % \ (mail['id'], name, dstmaildir) sys.stdout.flush() movemail(mail['fullpath'], dstmaildir) print "Done." sys.stdout.flush() break elif match is None: warning("Test %s thinks we should retry later." % name) return else: error("Mail %s matched no rule. This shouldn't happen." % mail['id']) def getmtime(directory, __fds={}): if directory not in __fds: __fds[directory] = os.open(directory, os.O_RDONLY) fd = __fds[directory] return os.fstat(fd)[ST_MTIME] def checkformail(srcmaildir): for d in ("/new/", "/cur/"): for filename in os.listdir(srcmaildir+d): mail = examine_mail(srcmaildir+d+filename) if mail: handlemail(mail) print sys.stdout.flush() def main(srcmaildir): print print "* Starting." print "* Time is: %s" % time.strftime("%Y-%m-%d %H:%M:%S %Z") print "* My version is: %s" % version print "* My SHA-1 is: %s" % sha1sum(open(sys.argv[0])) print sys.stdout.flush() # Yes, this will cause checkformail() to be run twice with a # second in between. That is a feature, not a bug. Consider the # case when a mail is moved from new to cur, or simply renamed, by # another process between os.listdir() and handlemail(); we will # fail but retry the next second. Similar tricky situations will # be automagically dealt with this way. The unconditional # time.sleep() guarantees that no CPU-hogging loops occur. mtime = (0, 0) while True: newmtime = (getmtime(srcmaildir+"/new/"), getmtime(srcmaildir+"/cur/")) if newmtime != mtime: checkformail(srcmaildir) mtime = newmtime time.sleep(1) if __name__ == "__main__": main(srcmaildir)