home *** CD-ROM | disk | FTP | other *** search
- #
- # comparePatterns.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 5/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- import sys, os.path, difflib
-
- from consts import *
- from Tests import *
- from MetaPatterns import *
-
-
- DIFF_SCORE_THRESHOLD = 0.6
- rangePat = re.compile(r'-(\d+),(\d+) \+(\d+),(\d+)')
- _seqMatcher = difflib.SequenceMatcher()
-
-
- class PatternChange (object):
- """A single pattern change
- -----------------------
- action: '+' (addition), '-' (deletion), 'n' (name change), 'p' (pattern change)
- 'b' (both name and pattern change);
- new: a string or a tuple of strings (when action == 'b': the first is name and the
- second is the pattern);
- opcodes: a list or a tuple of lists describing how to make the old pattern into
- the new one (tuple if action == 'b');
- """
- __slots__ = ('action', 'new', 'opcodes')
-
- def __init__ (self, action, new, opcodes):
- self.action = action
- self.new = new
- self.opcodes = opcodes
-
- def __repr__ (self):
- return '[%s, "%s", %s]' % (self.action, self.new, self.opcodes)
-
-
- def _whatKindOfChange (old, new, opcodes):
- """Returns a PatternChange instance describing the changes necessary to make 'old'
- into 'new'.
-
- ASSUMPTION: there must be at least one non-equal opcode in opcodes!"""
- oldPat, oldName = old.split('\n')
- newPat, newName = new.split('\n')
-
- oldPatSize = len(oldPat)
- newPatSize = len(newPat)
-
- patOpcodes = []
- nameOpcodes = []
-
- for op in filter(lambda i: i[0] != 'equal', opcodes):
- oldIdx = op[1]
- if oldIdx < oldPatSize:
- patOpcodes.append(op)
- else:
- nameOpcodes.append((op[0],
- op[1] - oldPatSize - 1,
- op[2] - oldPatSize - 1,
- op[3] - newPatSize - 1,
- op[4] - newPatSize - 1))
-
- if patOpcodes and nameOpcodes:
- return PatternChange('b', (newPat, newName), (patOpcodes, nameOpcodes))
- elif patOpcodes:
- return PatternChange('p', newPat, patOpcodes)
- else:
- return PatternChange('n', newName, nameOpcodes)
-
-
- def _diffPatterns (origList, newList, idIsPattern = True):
- """Compare two lists of patterns (strings) and return a tuple (changeDelta,
- removeDelta, addDelta) that can change the origList into newList:
-
- changeDelta: a dictionary with pattern IDs as keys and PatternChange instances as values;
- removeDelta: a set of pattern IDs for removal;
- addDelta: a set of tuple pattern IDs for addition.
-
- For the regular patterns the IDs are the regex themselves; for the meta patterns the
- IDs are the meta patterns' names.
-
- When comparing meta patterns, set idIsPattern to False."""
-
- origList.sort()
- newList.sort()
-
- changeDelta = {}
-
- # we need to find the closest matches for the patterns in the origPatterns against the
- # newPatterns
- # NOTE here the term "pattern" refers to a regex/name combo
- origPatterns = sets.Set()
- newPatterns = sets.Set()
-
- for diffLine in map(string.strip,
- difflib.unified_diff(origList, newList, n = 0))[2:]:
- c = diffLine[0]
- if c == '@':
- mo = rangePat.search(diffLine[3:-3])
- origIdx, newIdx = int(mo.group(1)) - 1, int(mo.group(3)) - 1
- else:
- if c == '+':
- newPatterns.add(newList[newIdx])
- newIdx += 1
-
- elif c == '-':
- origPatterns.add(origList[origIdx])
- origIdx += 1
-
- else:
- origIdx += 1
- newIdx += 1
-
- newPatternsToBeRemoved = sets.Set()
-
- # now origPatterns and newPatterns contain only the leftover patterns for alignment
- # p2 is a new pattern (cached by SequenceMatcher)
- for p2 in newPatterns:
- _seqMatcher.set_seq2(p2) # seq 2 is cached in SequenceMatcher
- matchesForOnePattern = []
- for p1 in origPatterns:
- _seqMatcher.set_seq1(p1)
- opcodes = _seqMatcher.get_opcodes()
- ratio = _seqMatcher.ratio()
- if ratio == 1.0:
- # identical "change" is found - take p1 and p2 out of changeDelta
- origPatterns.remove(p1)
- newPatternsToBeRemoved.add(p2)
- break
- elif ratio > DIFF_SCORE_THRESHOLD:
- matchesForOnePattern.append((ratio, p1, opcodes))
-
- if matchesForOnePattern:
- match, maxIdx = max(zip(matchesForOnePattern, range(len(matchesForOnePattern))))
- origPattern = match[1]
- origPatterns.remove(origPattern)
- newPatternsToBeRemoved.add(p2) # p2 is taken - remove it from newPatterns later
-
- if idIsPattern: pcID = origPattern.split('\n')[0]
- else: pcID = origPattern.split('\n')[1]
-
- pc = _whatKindOfChange(origPattern, p2, match[2])
- changeDelta[pcID] = pc
-
- newPatterns -= newPatternsToBeRemoved
-
- if idIsPattern:
- return (changeDelta,
- sets.Set(map(lambda i: i.split('\n')[0], origPatterns)),
- sets.Set(map(lambda i: i.split('\n')[0], newPatterns)))
- else:
- return (changeDelta,
- sets.Set(map(lambda i: i.split('\n')[1], origPatterns)),
- sets.Set(map(lambda i: i.split('\n')[1], newPatterns)))
-
-
- def compareMetaPatterns (origMetaPatterns, newMetaPatterns):
- """Compare two meta patterns and return a tuple (changeDelta, removeDelta, addDelta) that
- can change the origMetaPatterns into newMetaPatterns; see _diffPatterns() for comments on
- the three deltas.
- """
- # we only compare managed non-reserved meta patterns
- origMetaList = map(lambda i:'%s\n%s' % (i[1][0], i[0]),
- filter(lambda i:not i[1][1] and i[1][2], origMetaPatterns.items()))
- newMetaList = map(lambda i:'%s\n%s' % (i[1][0], i[0]),
- filter(lambda i:not i[1][1] and i[1][2], newMetaPatterns.items()))
-
- return _diffPatterns(origMetaList, newMetaList, False)
-
-
- def comparePatterns (origPatterns, newPatterns, origTests, newTests):
- """Compare two patterns and return a tuple (changeDelta, removeDelta, addDelta) that
- can change the origPatterns into newPatterns; see _diffPatterns() for comments on
- changeDelta; removeDelta is a dictionary using regexes as keys and test indices as
- values; addDelta is a dictionary using regexes as keys and Test instances as values.
- """
- # we only compare managed patterns
- origList = map(lambda i:'%s\n%s' % (i.origPattern, i.name),
- filter(lambda i:i.isManaged, origPatterns.values()))
- newList = map(lambda i:'%s\n%s' % (i.origPattern, i.name),
- filter(lambda i:i.isManaged, newPatterns.values()))
-
- changeDelta, removeDelta, addDelta = _diffPatterns(origList, newList, origTests)
-
- # turn removeDelta into a dictionary, with values being a list of indices into origTests
- # None means the pattern isn't used in origTests (normally this can't happen)
- removeDelta = dict.fromkeys(removeDelta, None)
-
- for idx, t in filter(lambda i: i[1].isPattern, enumerate(origTests)):
- p = t.propertyOrPattern.origPattern
- if p in removeDelta:
- if removeDelta[p] is None: removeDelta[p] = [idx]
- else: removeDelta[p].append(idx)
-
- # turn addDelta into a dictionary, with values being a list of Test instances in newTests
- # None means the pattern isn't used in newTests (normally this can't happen)
- addDelta = dict.fromkeys(addDelta, None)
-
- for t in filter(lambda t: t.isPattern, newTests):
- p = t.propertyOrPattern.origPattern
- if p in addDelta:
- if addDelta[p] is None: addDelta[p] = [t]
- else: addDelta[p].append(t)
-
- return (changeDelta, removeDelta, addDelta)
-
-
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- print 'Usage: ./comparePatterns.py <path to the old patterns> <path to the new patterns>'
- sys.exit(1)
-
- import pprint
-
- origPath = sys.argv[1]
- newPath = sys.argv[2]
-
- # comparing meta patterns
- origMetaPatterns = MetaPatterns(os.path.join(origPath, 'metaPatterns'))
- newMetaPatterns = MetaPatterns(os.path.join(newPath, 'metaPatterns'))
-
- changeDelta, removeDelta, addDelta = compareMetaPatterns(origMetaPatterns, newMetaPatterns)
-
- print '========== meta patterns deltas =========='
- print '* changeDelta:'
- pprint.pprint(changeDelta)
- print '\n* removeDelta:'
- pprint.pprint(removeDelta)
- print '\n* addDelta:'
- pprint.pprint(addDelta)
- print
-
- # comparing patterns
- origPatterns = Patterns(os.path.join(origPath, 'patterns'))
- newPatterns = Patterns(os.path.join(newPath, 'patterns'))
- origTests = Tests(Properties(os.path.join(origPath, 'properties')),
- origPatterns,
- os.path.join(origPath, 'tests'))
- newTests = Tests(Properties(os.path.join(newPath, 'properties')),
- newPatterns,
- os.path.join(newPath, 'tests'))
-
- changeDelta, removeDelta, addDelta = comparePatterns(origPatterns, newPatterns, origTests, newTests)
-
- print '========== patterns deltas =========='
- print '* changeDelta:'
- pprint.pprint(changeDelta)
- print '\n* removeDelta:'
- pprint.pprint(removeDelta)
- print '\n* addDelta:'
- pprint.pprint(addDelta)
-