home *** CD-ROM | disk | FTP | other *** search
- #
- # MetaPatterns.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- import string
-
- from consts import *
- from utilities import *
- from emailAddress import *
-
-
- # pattern to glean email addresses from com.apple.mail.plist
- _epPat = re.compile(r'EmailAddresses\s*=\s*\(([^;]*)\);')
-
- # this is a much simplified version of RFC 2822
- _emailPat = re.compile(r'((?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]))[\040\t]*(?:\.[\040\t]*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]))[\040\t]*)*)@[\040\t]*((?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]))[\040\t]*(?:\.[\040\t]*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]))[\040\t]*)*)')
-
-
- def getReservedMPs ():
- """(MAC OS X ONLY) Getting names, domains and emails from Mail's account settings."""
- pLines = ' '.join(map(lambda l:l.strip(),
- os.popen('defaults read com.apple.mail MailAccounts')))
- srcEList = []
- for m in _epPat.finditer(pLines):
- srcEList.extend(map(lambda e:e.strip()[1:-1], m.group(1).split(',')))
-
- reservedMPs = []
- nList = []
- dList = []
- eList = []
- for i,e in enumerate(srcEList):
- m = _emailPat.search(e)
- if m:
- n = re.escape(m.group(1).strip())
- d = re.escape(m.group(2).strip())
- e = re.escape(e)
-
- id = i + 1
- reservedMPs.append(('myName%d' % id, r'\b%s\b' % n))
- reservedMPs.append(('myDomain%d' % id, r'\b%s\b' % d))
- reservedMPs.append(('myEmail%d' % id, r'\b%s\b' % e))
-
- nList.append(n)
- dList.append(d)
- eList.append(e)
-
- if len(nList):
- reservedMPs.append(('myNames', r'\b(?:%s)\b' % '|'.join(map(lambda n: '(?:%s)' % n, nList))))
- reservedMPs.append(('myDomains', r'\b(?:%s)\b' % '|'.join(map(lambda n: '(?:%s)' % n, dList))))
- reservedMPs.append(('myEmails', r'\b(?:%s)\b' % '|'.join(map(lambda n: '(?:%s)' % n, eList))))
-
- return reservedMPs
-
-
- class _ReplaceMP (object):
- __slots__ = 'metaPatterns'
-
- def __init__ (self, metaPatterns):
- self.metaPatterns = metaPatterns
-
- def __call__ (self, mo):
- mp = self.metaPatterns.get(mo.group(0)[3:-1])
- if mp is not None: return '(?:%s)' % mp[0]
- else: raise JMExceptionMetaPattern(mo.group(0)[3:-1])
-
-
- class MetaPatterns (dict):
- """A dictionary of meta patterns
- -----------------------------
- fn: file name
-
- NOTE: the key is the name of the pattern, and the value is a tuple (str, bool, bool)
- where the first bool == True means it's a reserved MP, the second bool == True
- iff it's a managed meta pattern.
- """
- def __init__ (self, fn):
- self.fn = fn
- self._replaceMP = _ReplaceMP(self)
-
- self.load()
-
- def load (self):
- self.clear()
-
- # pattern file format: odd lines are patterns, even lines are names
- isName = False
- for l in filter(lambda l:len(l) and l[0] != '#', map(string.strip, openFile(self.fn))):
- if isName:
- self[l[1:-3]] = (pat, False, l[-1] == 'M')
- isName = False
- else:
- pat = l.strip()[1:-1]
- isName = True
-
- # add the reserved meta patterns
- for k, v in getReservedMPs(): self[k] = (v, True, True)
-
- def writeToFile (self):
- strList = []
- for k, v in self.items():
- if v[1]: continue # don't save the reserved meta patterns
- if v[2]: mStr = 'M'
- else: mStr = 'U'
- strList.append('"%s"\n"%s" %s' % (v[0], k, mStr))
-
- openFile(self.fn, 'w').write('\n'.join(strList))
-
- def instantiate (self, pat):
- return mpPat.sub(self._replaceMP, pat)
-
- def findAll (self, pat):
- """Returns a set containing the names of the meta patterns used in pat."""
- return sets.Set(filter(lambda name: self.get(name) is not None,
- map(lambda mo: mo.group(0)[3:-1], mpPat.finditer(pat))))
-
-
- if __name__ == '__main__':
- mps = MetaPatterns('%smetaPatterns' % CONF_PATH)
- items = mps.items()
- items.sort()
- for k, v in items:
- s = '%s: %s'%(k,v[0])
- print s.encode('utf8')
-
-
- origPattern = u'(?i)(?#v-)(?#i-)agra'
- print
- print '* original pattern:', origPattern
- print '* real pattern:', encodeText(mps.instantiate(origPattern))
- print '* meta patterns used:', encodeText(', '.join(mps.findAll(origPattern)))
-