home *** CD-ROM | disk | FTP | other *** search
- #
- # EmailDB.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- # IMPORTANT: EmailDB can have only a single instance!
-
- from consts import *
- from utilities import *
- from rwlock import * # to ensure thread-safety, since we will have only one global SiteDB
-
- # so the reference to libgdbm.3.dylib would work in gdbm.so
- # because Panther doesn't ship Python with gdbm module so I include my own
- os.chdir('%slib' % ROOT_PATH)
-
- import gdbm, fcntl, md5, cPickle, zlib
-
- # EmailDB is implemented to accomodate multiple readers and (xor) single writer
- _emailDBRWLock = RWLock()
-
-
- class EmailDB:
- """A compressed database of emails
- -------------------------------
- """
- def __init__ (self, fn, readOnly = False):
- self.fn = fn
- self.readOnly = readOnly
- self.open()
-
- def open (self):
- # we only allows one thread to open the db, regardless whether it's read-only or not
- _emailDBRWLock.acquire_write()
-
- if hasattr(self, 'db'):
- # another thread already opened it
- return
-
- if self.readOnly:
- self.db = gdbm.open(self.fn, 'ru')
- else:
- if os.path.exists(self.fn):
- self.db = gdbm.open(self.fn, 'ws')
- else:
- self.db = gdbm.open(self.fn, 'cs')
-
- _emailDBRWLock.release()
-
- def reOpen (self):
- """Will close the db first if it's already opened.
-
- NOTE: NOT RECOMMENDED FOR MULTIPLE THREADS! (can open the db multiple times)"""
- if hasattr(self, 'db'):
- self.db.close()
- del self.db
-
- self.open()
-
- def addEntry (self, msgSrc):
- k = md5.new(msgSrc).hexdigest()
-
- _emailDBRWLock.acquire_write()
-
- try:
- # use version 2 of pickle protocol
- self.db[k] = zlib.compress(cPickle.dumps(msgSrc, cPickle.HIGHEST_PROTOCOL))
- except Exception, e:
- printException(u'Exception in EmailDB.addEntry()', e)
-
- _emailDBRWLock.release()
-
- return k
-
- def getEntry (self, md5Key):
- _emailDBRWLock.acquire_read()
-
- try:
- ret = cPickle.loads(zlib.decompress(self.db[md5Key]))
- except Exception, e:
- printException(u'Exception in EmailDB.getEntry(); key = %s' % md5Key, e)
- return None
- _emailDBRWLock.release()
-
- return ret
-
- def recycle (self):
- # don't recycle it if it's in read-only mode
- if not self.readOnly:
- _emailDBRWLock.acquire_write()
-
- try:
- self.db.close()
- os.remove(self.fn)
- try:
- self.db = gdbm.open(self.fn, 'ws')
- except:
- self.db = gdbm.open(self.fn, 'cs')
- except Exception, e:
- printException('Exception in EmailDB.recycle()', e)
-
- _emailDBRWLock.release()
-
- def close (self):
- """Close the database. WARNING: don't try to do anything afterwards!"""
- self.db.close()
- del self.db
-
-
- if __name__ == '__main__':
- import datetime
-
- def usage ():
- print 'Usage: ./EmailDB.py <action> <dbFN> [arg]'
- print ' * action can be one of "add", "get", and "list" (no double quotes);'
- print ' * dbFN is the name of the email database;'
- print ' * if action is "add", arg is the name of the file containing email raw source;'
- print ' if action is "get", it\'s a key to retrieve a message.'
-
- argc = len(sys.argv)
- if argc < 3:
- usage()
- sys.exit(1)
-
- action = sys.argv[1]
- dbFN = sys.argv[2]
-
- emailDB = EmailDB(dbFN, True)
- if action == 'add':
- if argc < 4:
- usage()
- sys.exit(1)
-
- arg = sys.argv[3]
-
- # for testing purposes, we pass None as the matchResult
- print '* Added', emailDB.addEntry(open(arg).read())
- print '* In %s:' % dbFN
- for k in emailDB.db.keys():
- print ' ', k
-
- elif action == 'get':
- if argc < 4:
- usage()
- sys.exit(1)
-
- arg = sys.argv[3]
-
- print '* Getting %s:' % arg
- msgSrc = emailDB.getEntry(arg)
-
- print '* Message source:'
- print msgSrc
-
- elif action == 'list':
- print '* In %s:' % dbFN
- for k in emailDB.db.keys():
- print ' ', k
-
- else:
- print '* Unknown action...'
-