Chip 2003 January

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2003 January / Chip_2003-01_cd2.bin / convert / eJayMp3Pro / mp3pro_demo.exe / RE.PY < prev next >

Wrap

Python Source | 1998-08-28 | 11.7 KB | 370 lines

import sys import string from pcre import * # # First, the public part of the interface: # # pcre.error and re.error should be the same, since exceptions can be # raised from either module. # compilation flags I = IGNORECASE L = LOCALE M = MULTILINE S = DOTALL X = VERBOSE # # # _cache = {} _MAXCACHE = 20 def _cachecompile(pattern, flags=0): key = (pattern, flags) try: return _cache[key] except KeyError: pass value = compile(pattern, flags) if len(_cache) >= _MAXCACHE: _cache.clear() _cache[key] = value return value def match(pattern, string, flags=0): return _cachecompile(pattern, flags).match(string) def search(pattern, string, flags=0): return _cachecompile(pattern, flags).search(string) def sub(pattern, repl, string, count=0): if type(pattern) == type(''): pattern = _cachecompile(pattern) return pattern.sub(repl, string, count) def subn(pattern, repl, string, count=0): if type(pattern) == type(''): pattern = _cachecompile(pattern) return pattern.subn(repl, string, count) def split(pattern, string, maxsplit=0): if type(pattern) == type(''): pattern = _cachecompile(pattern) return pattern.split(string, maxsplit) def findall(pattern, string): if type(pattern) == type(''): pattern = _cachecompile(pattern) return pattern.findall(string) def escape(pattern): "Escape all non-alphanumeric characters in pattern." result = list(pattern) alphanum=string.letters+'_'+string.digits for i in range(len(pattern)): char = pattern[i] if char not in alphanum: if char=='\000': result[i] = '\\000' else: result[i] = '\\'+char return string.join(result, '') def compile(pattern, flags=0): "Compile a regular expression pattern, returning a RegexObject." groupindex={} code=pcre_compile(pattern, flags, groupindex) return RegexObject(pattern, flags, code, groupindex) # # Class definitions # class RegexObject: def __init__(self, pattern, flags, code, groupindex): self.code = code self.flags = flags self.pattern = pattern self.groupindex = groupindex def search(self, string, pos=0, endpos=None): """Scan through string looking for a match to the pattern, returning a MatchObject instance, or None if no match was found.""" if endpos is None or endpos>len(string): endpos=len(string) if endpos<pos: endpos=pos regs = self.code.match(string, pos, endpos, 0) if regs is None: return None self._num_regs=len(regs) return MatchObject(self, string, pos, endpos, regs) def match(self, string, pos=0, endpos=None): """Try to apply the pattern at the start of the string, returning a MatchObject instance, or None if no match was found.""" if endpos is None or endpos>len(string): endpos=len(string) if endpos<pos: endpos=pos regs = self.code.match(string, pos, endpos, ANCHORED) if regs is None: return None self._num_regs=len(regs) return MatchObject(self, string, pos, endpos, regs) def sub(self, repl, string, count=0): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl""" return self.subn(repl, string, count)[0] def subn(self, repl, source, count=0): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source string by the replacement repl. number is the number of substitutions that were made.""" if count < 0: raise error, "negative substitution count" if count == 0: count = sys.maxint n = 0 # Number of matches pos = 0 # Where to start searching lastmatch = -1 # End of last match results = [] # Substrings making up the result end = len(source) if type(repl) is type(''): # See if repl contains group references try: repl = pcre_expand(_Dummy, repl) except: m = MatchObject(self, source, 0, end, []) repl = lambda m, repl=repl, expand=pcre_expand: expand(m, repl) else: m = None else: m = MatchObject(self, source, 0, end, []) match = self.code.match append = results.append while n < count and pos <= end: regs = match(source, pos, end, 0) if not regs: break self._num_regs = len(regs) i, j = regs[0] if i == j == lastmatch: # Empty match adjacent to previous match pos = pos + 1 append(source[lastmatch:pos]) continue if pos < i: append(source[pos:i]) if m: m.pos = pos m.regs = regs append(repl(m)) else: append(repl) pos = lastmatch = j if i == j: # Last match was empty; don't try here again pos = pos + 1 append(source[lastmatch:pos]) n = n + 1 append(source[pos:]) return (string.join(results, ''), n) def split(self, source, maxsplit=0): """Split the source string by the occurrences of the pattern, returning a list containing the resulting substrings.""" if maxsplit < 0: raise error, "negative split count" if maxsplit == 0: maxsplit = sys.maxint n = 0 pos = 0 lastmatch = 0 results = [] end = len(source) match = self.code.match append = results.append while n < maxsplit: regs = match(source, pos, end, 0) if not regs: break i, j = regs[0] if i == j: # Empty match if pos >= end: break pos = pos+1 continue append(source[lastmatch:i]) rest = regs[1:] if rest: for a, b in rest: if a == -1 or b == -1: group = None else: group = source[a:b] append(group) pos = lastmatch = j n = n + 1 append(source[lastmatch:]) return results def findall(self, source): """Return a list of all non-overlapping matches in the string. If one or more groups are present in the pattern, return a list of groups; this will be a list of tuples if the pattern has more than one group. Empty matches are included in the result. """ pos = 0 end = len(source) results = [] match = self.code.match append = results.append while pos <= end: regs = match(source, pos, end, 0) if not regs: break i, j = regs[0] rest = regs[1:] if not rest: gr = source[i:j] elif len(rest) == 1: a, b = rest[0] gr = source[a:b] else: gr = [] for (a, b) in rest: gr.append(source[a:b]) gr = tuple(gr) append(gr) pos = max(j, pos+1) return results # The following 3 functions were contributed by Mike Fletcher, and # allow pickling and unpickling of RegexObject instances. def __getinitargs__(self): return (None,None,None,None) # any 4 elements, to work around # problems with the # pickle/cPickle modules not yet # ignoring the __init__ function def __getstate__(self): return self.pattern, self.flags, self.groupindex def __setstate__(self, statetuple): self.pattern = statetuple[0] self.flags = statetuple[1] self.groupindex = statetuple[2] self.code = apply(pcre_compile, statetuple) class _Dummy: # Dummy class used by _subn_string(). Has 'group' to avoid core dump. group = None class MatchObject: def __init__(self, re, string, pos, endpos, regs): self.re = re self.string = string self.pos = pos self.endpos = endpos self.regs = regs def start(self, g = 0): "Return the start of the substring matched by group g" if type(g) == type(''): try: g = self.re.groupindex[g] except (KeyError, TypeError): raise IndexError, 'group %s is undefined' % `g` return self.regs[g][0] def end(self, g = 0): "Return the end of the substring matched by group g" if type(g) == type(''): try: g = self.re.groupindex[g] except (KeyError, TypeError): raise IndexError, 'group %s is undefined' % `g` return self.regs[g][1] def span(self, g = 0): "Return (start, end) of the substring matched by group g" if type(g) == type(''): try: g = self.re.groupindex[g] except (KeyError, TypeError): raise IndexError, 'group %s is undefined' % `g` return self.regs[g] def groups(self, default=None): "Return a tuple containing all subgroups of the match object" result = [] for g in range(1, self.re._num_regs): a, b = self.regs[g] if a == -1 or b == -1: result.append(default) else: result.append(self.string[a:b]) return tuple(result) def group(self, *groups): "Return one or more groups of the match" if len(groups) == 0: groups = (0,) result = [] for g in groups: if type(g) == type(''): try: g = self.re.groupindex[g] except (KeyError, TypeError): raise IndexError, 'group %s is undefined' % `g` if g >= len(self.regs): raise IndexError, 'group %s is undefined' % `g` a, b = self.regs[g] if a == -1 or b == -1: result.append(None) else: result.append(self.string[a:b]) if len(result) > 1: return tuple(result) elif len(result) == 1: return result[0] else: return () def groupdict(self, default=None): "Return a dictionary containing all named subgroups of the match" dict = {} for name, index in self.re.groupindex.items(): a, b = self.regs[index] if a == -1 or b == -1: dict[name] = default else: dict[name] = self.string[a:b] return dict