home *** CD-ROM | disk | FTP | other *** search
- #
- # Tests.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- # IMPORTANT: Properties, Patterns and Tests can have only a single instance!
-
- import string
-
- from consts import *
- from Property import *
- from Pattern import *
- from Message import *
-
- _locals = locals()
-
-
- class Properties (dict):
- """A dictionary of properties
- --------------------------
- fn: file name
-
- Note: key is class names (string), value is instances of Property.
- """
- def __init__ (self, propertiesFN):
- self.fn = propertiesFN
- self.load()
-
- def load (self):
- self.clear()
- mode = 0
- for l in filter(lambda l:len(l) and l[0] != '#', map(string.strip, openFile(self.fn))):
- if mode == 0:
- mode = 1
- recipientPattern = l[1:-1]
-
- else:
- mode = 0
- l = l.split(' ')
- testRecord = TestRecord(float(l[1]),
- int(l[2]), int(l[3]), int(l[4]), int(l[5]))
- classID = l[0]
-
- if classID == 'PropertyDateInFuture':
- r = PropertyDateInFuture(testRecord, recipientPattern, int(l[6]))
- elif classID == 'PropertyDateInThePast':
- r = PropertyDateInThePast(testRecord, recipientPattern, int(l[6]))
- elif classID == 'PropertyRecipientsMismatch':
- r = PropertyRecipientsMismatch(testRecord, recipientPattern)
- elif classID == 'PropertyOpenRelay':
- r = PropertyOpenRelay(testRecord, recipientPattern, float(l[6]), int(l[7]), l[8:])
- elif classID == 'PropertyPhishingURL':
- r = PropertyPhishingURL(testRecord, recipientPattern, bool(int(l[6])))
- elif len(l) > 6:
- r = _locals[classID](testRecord, recipientPattern, int(l[6]))
- else:
- r = _locals[classID](testRecord, recipientPattern)
-
- self[classID] = r
-
- def writeToFile (self):
- strList = []
- for classID, r in self.items():
- testRecord = r.testRecord
-
- if r.recipientPattern: recipientPattern = r.recipientPattern.pattern
- else: recipientPattern = ''
-
- s = '"%s"\n%s %s' % (recipientPattern, classID, testRecord)
- if classID == 'PropertyDateInFuture':
- strList.append('%s %d\n' % (s, r.timeDelta.seconds / 60))
- elif classID == 'PropertyDateInThePast':
- strList.append('%s %d\n' % (s, -r.timeDelta.days))
- elif classID == 'PropertyOpenRelay':
- strList.append('%s %f %d %s\n' % (s, r.timeout, r.numLastIPs, ' '.join(r.blackLists)))
- elif classID == 'PropertyPhishingURL':
- strList.append('%s %d\n' % (s, int(r.checkWhitelistedEmail)))
- elif hasattr(r, 'intArg'):
- strList.append('%s %d\n' % (s, r.intArg))
- else:
- strList.append('%s\n' % s)
-
- openFile(self.fn, 'w').write('\n'.join(strList))
-
- def getStatsString (self):
- """Returns a string containing the statistics of all properties. The format:
- 'name1 cpuTime1 numTruePositive1 numTrueNegative1 numFalsePositive1 numFalseNegative1\n
- name2 cpuTime2 ...'."""
- return '\n'.join(['%s %s' % (name, ' '.join(map(str, r.testRecord.readAll())))
- for name, r in self.items()])
-
- def setStats (self, statsString):
- """Update the statistics of the properties. The statsString is produced from
- getStatsString()."""
- if len(statsString) == 0: return
- for l in map(lambda i: i.split(' '), statsString.split('\n')):
- self[l[0]].testRecord.setAll(float(l[1]), int(l[2]), int(l[3]), int(l[4]), int(l[5]))
-
-
- class Patterns (dict):
- """A dictionary of patterns
- --------------------------
- fn: file name
-
- Note: key is the pattern itself (Unicode string), value is instances of Pattern.
- """
- def __init__ (self, patternsFN):
- self.fn = patternsFN
- self.load()
-
- def load (self):
- self.clear()
- mode = 0
- for l in filter(lambda l:len(l) and l[0] != '#', map(string.strip, openFile(self.fn))):
- if mode > 5 and l[0] == '"':
- self[pattern] = Pattern(patternName, testRecords, pattern, isManaged,
- recipientPattern, encodingPattern)
- mode = 0
-
- if mode == 0:
- pattern = l[1:-1]
- elif mode == 1:
- patternName = l[1:-1]
- elif mode == 2:
- isManaged = (l[0] == 'M')
- elif mode == 3:
- recipientPattern = l[1:-1]
- elif mode == 4:
- encodingPattern = l[1:-1]
- else:
- if mode == 5:
- testRecords = {}
-
- l = l.split(' ')
- testRecords[l[0]] = TestRecord(float(l[1]),
- int(l[2]), int(l[3]), int(l[4]), int(l[5]))
-
- mode += 1
-
- # don't forget the last one
- self[pattern] = Pattern(patternName, testRecords, pattern, isManaged,
- recipientPattern, encodingPattern)
-
- def writeToFile (self):
- """So we can print all patterns into a file."""
- strList = []
- for patternID, p in self.items():
- if p.isManaged: stateStr = 'M'
- else: stateStr = 'U'
- strList.append('"%s"\n"%s"\n%s' %
- (p.origPattern, p.name, stateStr))
-
- if p.recipientPattern: recipientPattern = p.recipientPattern.pattern
- else: recipientPattern = ''
- if p.encodingPattern: encodingPattern = p.encodingPattern.pattern
- else: encodingPattern = ''
-
- strList.append('"%s"\n"%s"' % (recipientPattern, encodingPattern))
-
- for viewID, testRecord in p.testRecords.items():
- strList.append('%s %s' % (viewID, testRecord))
-
- strList[-1] = '%s\n' % strList[-1]
-
- openFile(self.fn, 'w').write('\n'.join(strList))
-
- def getStatsString (self):
- """Returns a string containing the statistics of all properties. The format:
- 'name1 numViews1\n
- view11 cpuTime11 numTruePositive11 numTrueNegative11 numFalsePositive11 numFalseNegative11\n
- view12 ...\n
- name2 numViews2\n ...', where the names are double quoted."""
- return '\n'.join(['"%s" %d\n%s' % (name, len(p.testRecords),
- '\n'.join(['%s %s' % (view, ' '.join(map(str, testRecord.readAll())))
- for view, testRecord in p.testRecords.items()]))
- for name, p in self.items()])
-
- def setStats (self, statsString):
- """Update the statistics of the properties. The statsString is produced from
- getStatsString()."""
- if len(statsString) == 0: return
-
- count = 0
- for i in statsString.split('\n'):
- if count == 0:
- idx = i.rfind(' ')
- p = self.get(i[1:idx - 1]) # patterns could be gone at this point
- count = int(i[idx + 1:])
- else:
- if p:
- l = i.split(' ')
- testRecord = p.testRecords.get(l[0])
- if testRecord: # a view can be gone at this point
- testRecord.setAll(float(l[1]), int(l[2]), int(l[3]), int(l[4]), int(l[5]))
- count -= 1
-
-
- class Test (object):
- """A single test - could be a property or a pattern
- ------------------------------------------------
- propertyOrPattern: an instance of Property or Pattern.
- isPattern: True iff it's a pattern.
- isOn: True iff it's on.
- isHard: True iff it's a hard test.
- isHTML: True iff it's only applicable to an HTML message (exists only when isPattern is True).
- view: a string indicating which view we're interested in (exists only when isPattern is True).
- """
- __slots__ = ['propertyOrPattern', 'isPattern', 'isOn', 'isHard', 'isHTML', 'view']
-
- def __init__ (self, propertyOrPattern, parameters):
- # IMPORTANT: the positions in parameters are highly dependent on the way
- # tests file is loaded (see Tests.__init__()).
- stateStr = parameters[1]
-
- self.propertyOrPattern = propertyOrPattern
- self.isPattern = isinstance(propertyOrPattern, Pattern)
- self.isOn = parameters[-1] == '1' # after this point the source in the parameters is never used/updated
- self.isHard = stateStr[-1] == 'H' # after this point the source in the parameters is never used/updated
- if self.isPattern:
- self.isHTML = stateStr[0] == 'H' # after this point the source in the parameters is never used/updated
- self.view = parameters[0]
-
- def getAttribute_ (self, name):
- """This is basically for Obj-C side of PyObjC bridge so we can get at the instance variables"""
- return getattr(self, name)
-
- def setAttribute_withValue_ (self, name, value):
- """This is basically for Obj-C side of PyObjC bridge so we can set an instance variable"""
- setattr(self, name, value)
-
-
- class Tests (list):
- """A list of tests (including both properties and patterns)
- --------------------------------------------------------
- properties: an instance of Properties
- patterns: an instance of Patterns
- fn: file name
- """
- def __init__ (self, properties, patterns, testsFN):
- self.properties = properties
- self.patterns = patterns
- self.fn = testsFN
- self.load()
-
- def load (self):
- # this only loads in the tests file (not including properties and patterns)
- del self[:]
- mode = 0
- for l in filter(lambda l:len(l) and l[0] != '#', map(string.strip, openFile(self.fn))):
- if mode == 0:
- if l[0] == 'P':
- # property
- parameters = l.split(' ')
- propertyOrPattern = self.properties.get(parameters[0])
- else:
- # first line of a pattern
- pattern = l[1:-1]
- mode += 1
- continue
- else:
- parameters = l.split(' ')
- propertyOrPattern = self.patterns.get(pattern)
- mode = 0
-
- if propertyOrPattern:
- self.append(Test(propertyOrPattern, parameters))
- else:
- NSLog(u'A test is AWOL.')
-
- def patchPropertiesAgainstDefaults (self):
- """Patch the tests so they contain exactly the same set of properties as those in Defaults/tests.
-
- ASSUMPTION: CALL load() BEFORE YOU CALL THIS!"""
- newProperties = Properties('%sproperties' % DEFAULTS_PATH)
- newPropertySet = sets.Set(newProperties.keys())
- oldPropertySet = sets.Set(self.properties.keys())
-
- addSet = newPropertySet - oldPropertySet
- removeSet = oldPropertySet - newPropertySet
-
- # ====== updating self.properties ======
-
- # STEP 1: remove things from self.properties
- for r in removeSet:
- del self.properties[r]
-
- # STEP 2: add things to self.properties
- for r in addSet:
- self.properties[r] = newProperties[r]
-
- # ====== updating self ======
-
- # STEP 1: remove tests
-
- # testList contains (index, test) tuples *only* for tests that are properties
- testList = filter(lambda i: not i[1].isPattern, enumerate(self))
-
- # removeTestList is an ascendingly sorted index list
- removeTestList = map(lambda i: i[0], filter(lambda i: i[1].propertyOrPattern.__class__.__name__ in removeSet, testList))
- if removeTestList:
- for i in removeTestList[::-1]: del self[i] # remove from the end of the list
-
- # STEP 2: add tests
-
- # newTests is used to determine the insertion points for the new properties
- newTests = Tests(newProperties, Patterns('%spatterns' % DEFAULTS_PATH), '%stests' % DEFAULTS_PATH)
-
- # newPropertyDict is a dictionary using tests as keys and property class names as values
- # newPropertyDict records the immediately earlier old test of a new test
- # t1 is a test containing a new property, t2 is t1's immediately earlier test
- # n1/n2 is the property class name of the test t1/t2
- newPropertyDict = {}
- t2 = None
- for t1 in filter(lambda t: not t.isPattern, newTests):
- n1 = t1.propertyOrPattern.__class__.__name__
- if n1 in addSet:
- # ASSUMPTION: t2 can't be None at this point
- # cuz we never introduce a new propertie that's placed at the beginning of
- # the test list!
- n2 = t2.propertyOrPattern.__class__.__name__
- if n2 in addSet:
- # t2 is new too
- newPropertyDict[t1] = newPropertyDict[t2]
- else:
- newPropertyDict[t1] = n2
- t2 = t1
-
- if len(newPropertyDict):
- # now we reverse the keys and the values into the newPropertyDict2,
- # and the values are lists now
- newPropertyDict2 = {}
- for k, v in newPropertyDict.items():
- newPropertyDict2.setdefault(v, []).append(k)
-
- idx = 0
- maxIdx = len(self)
- while len(newPropertyDict2):
- t2 = self[idx]
- if not t2.isPattern:
- n2 = t2.propertyOrPattern.__class__.__name__ # n2 is the class name of an old property
- l = newPropertyDict2.get(n2)
- if l:
- for t1 in l[::-1]: # t1 is a test containing a new property
- self.insert(idx + 1, t1)
-
- idx += len(l)
- del newPropertyDict2[n2]
-
- idx += 1
-
- self.properties.writeToFile()
- self.writeToFile()
-
- def writeToFile (self):
- """So that we can print the specification of test ordering into a file."""
- strList = []
- for test in self:
- if test.isHard: hardStr = 'H'
- else: hardStr = 'S'
-
- if test.isPattern:
- if test.isHTML: htmlStr = 'H'
- else: htmlStr = '_'
- strList.append(u'"%s"\n%s %s%s %s\n' % (test.propertyOrPattern.origPattern,
- test.view, htmlStr, hardStr, int(test.isOn)))
- else:
- strList.append(u'%s %s %s\n' % (test.propertyOrPattern.__class__.__name__,
- hardStr, int(test.isOn)))
-
- openFile(self.fn, 'w').write('\n'.join(strList))
-
-
- if __name__ == '__main__':
- if len(sys.argv) == 1:
- print 'Usage: ./Tests.py <filename>'
- print ' * filename is the name of the file containing email raw source.'
- sys.exit(1)
-
- # init tests
- oldTime = time.time()
- tests = Tests(Properties('%sproperties' % CONF_PATH),
- Patterns('%spatterns' % CONF_PATH),
- '%stests' % CONF_PATH)
- print 'Initialization time:', time.time() - oldTime
-
- #tests.patchPropertiesAgainstDefaults()
-
- #print '* All of the tests:'
- #print tests
- #print
-
- msg = Message(open(sys.argv[1]).read())
-
- # executionn here doesn't distinguish hard/soft tests
- print '* Executing the tests:'
- for test in filter(lambda t: t.isOn, tests):
- propertyOrPattern = test.propertyOrPattern
- if test.isPattern:
- view = test.view
- mo, cpuTime = propertyOrPattern.run(msg, view)
- if mo:
- print encodeText('- Pattern "%s" matches "%s": %f usec(s)' % (propertyOrPattern.name,
- mo.group(0),
- cpuTime))
- else:
- result, cpuTime = propertyOrPattern.run(msg)
- if result is not False:
- if result is True:
- s = '- %s' % propertyOrPattern.name
- else:
- try:
- s = encodeText('- %s (%s)' % (propertyOrPattern.name, result))
- except:
- s = encodeText('- %s' % propertyOrPattern.name)
-
- print '%s: %f usec(s)' % (s, cpuTime)
-