home *** CD-ROM | disk | FTP | other *** search
/ Netrunner 2004 October / NETRUNNER0410.ISO / regular / iria107a.lzh / script / sre_compile.pyc (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2000-11-17  |  10.1 KB  |  375 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.0)
  3.  
  4. import _sre
  5. from sre_constants import *
  6. MAXCODE = 65535
  7.  
  8. def _compile(code, pattern, flags):
  9.     emit = code.append
  10.     for op, av in pattern:
  11.         if op in (LITERAL, NOT_LITERAL):
  12.             emit(av)
  13.         elif op is IN:
  14.             if flags & SRE_FLAG_IGNORECASE:
  15.                 emit(OPCODES[OP_IGNORE[op]])
  16.                 
  17.                 def fixup(literal, flags = flags):
  18.                     return _sre.getlower(literal, flags)
  19.  
  20.             else:
  21.                 emit(OPCODES[op])
  22.                 
  23.                 fixup = lambda x: x
  24.             skip = len(code)
  25.             emit(0)
  26.             _compile_charset(av, flags, code, fixup)
  27.             code[skip] = len(code) - skip
  28.         elif op is ANY:
  29.             if flags & SRE_FLAG_DOTALL:
  30.                 emit(OPCODES[ANY_ALL])
  31.             else:
  32.                 emit(OPCODES[ANY])
  33.         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
  34.             if flags & SRE_FLAG_TEMPLATE:
  35.                 raise error, 'internal: unsupported template operator'
  36.                 emit(OPCODES[REPEAT])
  37.                 skip = len(code)
  38.                 emit(0)
  39.                 emit(av[0])
  40.                 emit(av[1])
  41.                 _compile(code, av[2], flags)
  42.                 emit(OPCODES[SUCCESS])
  43.                 code[skip] = len(code) - skip
  44.             elif _simple(av) and op == MAX_REPEAT:
  45.                 emit(OPCODES[REPEAT_ONE])
  46.                 skip = len(code)
  47.                 emit(0)
  48.                 emit(av[0])
  49.                 emit(av[1])
  50.                 _compile(code, av[2], flags)
  51.                 emit(OPCODES[SUCCESS])
  52.                 code[skip] = len(code) - skip
  53.             else:
  54.                 emit(OPCODES[REPEAT])
  55.                 skip = len(code)
  56.                 emit(0)
  57.                 emit(av[0])
  58.                 emit(av[1])
  59.                 _compile(code, av[2], flags)
  60.                 code[skip] = len(code) - skip
  61.                 if op == MAX_REPEAT:
  62.                     emit(OPCODES[MAX_UNTIL])
  63.                 else:
  64.                     emit(OPCODES[MIN_UNTIL])
  65.         elif op is SUBPATTERN:
  66.             if av[0]:
  67.                 emit(OPCODES[MARK])
  68.                 emit((av[0] - 1) * 2)
  69.             
  70.             _compile(code, av[1], flags)
  71.             if av[0]:
  72.                 emit(OPCODES[MARK])
  73.                 emit((av[0] - 1) * 2 + 1)
  74.             
  75.         elif op in (SUCCESS, FAILURE):
  76.             emit(OPCODES[op])
  77.         elif op in (ASSERT, ASSERT_NOT):
  78.             emit(OPCODES[op])
  79.             skip = len(code)
  80.             emit(0)
  81.             if av[0] >= 0:
  82.                 emit(0)
  83.             else:
  84.                 (lo, hi) = av[1].getwidth()
  85.                 if lo != hi:
  86.                     raise error, 'look-behind requires fixed-width pattern'
  87.                 
  88.                 emit(lo)
  89.             _compile(code, av[1], flags)
  90.             emit(OPCODES[SUCCESS])
  91.             code[skip] = len(code) - skip
  92.         elif op is CALL:
  93.             emit(OPCODES[op])
  94.             skip = len(code)
  95.             emit(0)
  96.             _compile(code, av, flags)
  97.             emit(OPCODES[SUCCESS])
  98.             code[skip] = len(code) - skip
  99.         elif op is AT:
  100.             emit(OPCODES[op])
  101.             if flags & SRE_FLAG_MULTILINE:
  102.                 emit(ATCODES[AT_MULTILINE.get(av, av)])
  103.             else:
  104.                 emit(ATCODES[av])
  105.         elif op is BRANCH:
  106.             emit(OPCODES[op])
  107.             tail = []
  108.             for av in av[1]:
  109.                 skip = len(code)
  110.                 emit(0)
  111.                 _compile(code, av, flags)
  112.                 emit(OPCODES[JUMP])
  113.                 tail.append(len(code))
  114.                 emit(0)
  115.                 code[skip] = len(code) - skip
  116.             
  117.             emit(0)
  118.             for tail in tail:
  119.                 code[tail] = len(code) - tail
  120.             
  121.         elif op is CATEGORY:
  122.             emit(OPCODES[op])
  123.             if flags & SRE_FLAG_LOCALE:
  124.                 emit(CHCODES[CH_LOCALE[av]])
  125.             elif flags & SRE_FLAG_UNICODE:
  126.                 emit(CHCODES[CH_UNICODE[av]])
  127.             else:
  128.                 emit(CHCODES[av])
  129.         elif op is GROUPREF:
  130.             if flags & SRE_FLAG_IGNORECASE:
  131.                 emit(OPCODES[OP_IGNORE[op]])
  132.             else:
  133.                 emit(OPCODES[op])
  134.             emit(av - 1)
  135.         else:
  136.             raise ValueError, ('unsupported operand type', op)
  137.     
  138.  
  139.  
  140. def _compile_charset(charset, flags, code, fixup = None):
  141.     emit = code.append
  142.     if not fixup:
  143.         
  144.         fixup = lambda x: x
  145.     
  146.     for op, av in _optimize_charset(charset, fixup):
  147.         emit(OPCODES[op])
  148.         if op is NEGATE:
  149.             pass
  150.         elif op is LITERAL:
  151.             emit(fixup(av))
  152.         elif op is RANGE:
  153.             emit(fixup(av[0]))
  154.             emit(fixup(av[1]))
  155.         elif op is CHARSET:
  156.             code.extend(av)
  157.         elif op is CATEGORY:
  158.             if flags & SRE_FLAG_LOCALE:
  159.                 emit(CHCODES[CH_LOCALE[av]])
  160.             elif flags & SRE_FLAG_UNICODE:
  161.                 emit(CHCODES[CH_UNICODE[av]])
  162.             else:
  163.                 emit(CHCODES[av])
  164.         else:
  165.             raise error, 'internal: unsupported set operator'
  166.     
  167.     emit(OPCODES[FAILURE])
  168.  
  169.  
  170. def _optimize_charset(charset, fixup):
  171.     out = []
  172.     charmap = [
  173.         0] * 256
  174.     
  175.     try:
  176.         for op, av in charset:
  177.             if op is NEGATE:
  178.                 out.append((op, av))
  179.             elif op is LITERAL:
  180.                 charmap[fixup(av)] = 1
  181.             elif op is RANGE:
  182.                 for i in range(fixup(av[0]), fixup(av[1]) + 1):
  183.                     charmap[i] = 1
  184.                 
  185.             elif op is CATEGORY:
  186.                 return charset
  187.             
  188.     except IndexError:
  189.         return charset
  190.  
  191.     i = p = n = 0
  192.     runs = []
  193.     for c in charmap:
  194.         if c:
  195.             n = n + 1
  196.         elif n:
  197.             runs.append((p, n))
  198.             n = 0
  199.         
  200.         i = i + 1
  201.     
  202.     if n:
  203.         runs.append((p, n))
  204.     
  205.     if len(runs) <= 2:
  206.         for p, n in runs:
  207.             pass
  208.         
  209.         if len(out) < len(charset):
  210.             return out
  211.         
  212.     else:
  213.         data = []
  214.         m = 1
  215.         v = 0
  216.         for c in charmap:
  217.             m = m << 1
  218.             if m > MAXCODE:
  219.                 data.append(v)
  220.                 m = 1
  221.                 v = 0
  222.             
  223.         
  224.         out.append((CHARSET, data))
  225.         return out
  226.     return charset
  227.  
  228.  
  229. def _simple(av):
  230.     (lo, hi) = av[2].getwidth()
  231.     if lo == 0 and hi == MAXREPEAT:
  232.         raise error, 'nothing to repeat'
  233.     
  234.     if hi == hi:
  235.         pass
  236.     elif hi == 1:
  237.         pass
  238.     return av[2][0][0] != SUBPATTERN
  239.  
  240.  
  241. def _compile_info(code, pattern, flags):
  242.     (lo, hi) = pattern.getwidth()
  243.     if lo == 0:
  244.         return None
  245.     
  246.     prefix = []
  247.     prefix_skip = 0
  248.     charset = []
  249.     if not (flags & SRE_FLAG_IGNORECASE):
  250.         for op, av in pattern.data:
  251.             if op is LITERAL:
  252.                 prefix.append(av)
  253.             elif op is SUBPATTERN and len(av[1]) == 1:
  254.                 (op, av) = av[1][0]
  255.                 if op is LITERAL:
  256.                     prefix.append(av)
  257.                 else:
  258.                     break
  259.             else:
  260.                 break
  261.         
  262.         if not prefix and pattern.data:
  263.             (op, av) = pattern.data[0]
  264.             if op is SUBPATTERN and av[1]:
  265.                 (op, av) = av[1][0]
  266.                 if op is LITERAL:
  267.                     charset.append((op, av))
  268.                 elif op is BRANCH:
  269.                     c = []
  270.                     for p in av[1]:
  271.                         (op, av) = p[0]
  272.                         if op is LITERAL:
  273.                             c.append((op, av))
  274.                         else:
  275.                             break
  276.                     else:
  277.                         charset = c
  278.                 
  279.             elif op is BRANCH:
  280.                 c = []
  281.                 for p in av[1]:
  282.                     (op, av) = p[0]
  283.                     if op is LITERAL:
  284.                         c.append((op, av))
  285.                     else:
  286.                         break
  287.                 else:
  288.                     charset = c
  289.             elif op is IN:
  290.                 charset = av
  291.             
  292.         
  293.     
  294.     emit = code.append
  295.     emit(OPCODES[INFO])
  296.     skip = len(code)
  297.     emit(0)
  298.     mask = 0
  299.     if prefix:
  300.         mask = SRE_INFO_PREFIX
  301.         if prefix_skip == prefix_skip:
  302.             pass
  303.         elif prefix_skip == len(pattern.data):
  304.             mask = mask + SRE_INFO_LITERAL
  305.         
  306.     elif charset:
  307.         mask = mask + SRE_INFO_CHARSET
  308.     
  309.     emit(mask)
  310.     if lo < MAXCODE:
  311.         emit(lo)
  312.     else:
  313.         emit(MAXCODE)
  314.         prefix = prefix[:MAXCODE]
  315.     if hi < MAXCODE:
  316.         emit(hi)
  317.     else:
  318.         emit(0)
  319.     if prefix:
  320.         emit(len(prefix))
  321.         emit(prefix_skip)
  322.         code.extend(prefix)
  323.         table = [
  324.             -1] + [
  325.             0] * len(prefix)
  326.         for i in range(len(prefix)):
  327.             table[i + 1] = table[i] + 1
  328.             while table[i + 1] > 0 and prefix[i] != prefix[table[i + 1] - 1]:
  329.                 table[i + 1] = table[table[i + 1] - 1] + 1
  330.                 continue
  331.                 0
  332.         
  333.         code.extend(table[1:])
  334.     elif charset:
  335.         _compile_charset(charset, 0, code)
  336.     
  337.     code[skip] = len(code) - skip
  338.  
  339. STRING_TYPES = [
  340.     type('')]
  341.  
  342. try:
  343.     STRING_TYPES.append(type(unicode('')))
  344. except NameError:
  345.     pass
  346.  
  347.  
  348. def _code(p, flags):
  349.     flags = p.pattern.flags | flags
  350.     code = []
  351.     _compile_info(code, p, flags)
  352.     _compile(code, p.data, flags)
  353.     code.append(OPCODES[SUCCESS])
  354.     return code
  355.  
  356.  
  357. def compile(p, flags = 0):
  358.     if type(p) in STRING_TYPES:
  359.         import sre_parse
  360.         pattern = p
  361.         p = sre_parse.parse(p, flags)
  362.     else:
  363.         pattern = None
  364.     code = _code(p, flags)
  365.     if not __debug__ and p.pattern.groups <= 100:
  366.         raise AssertionError, 'sorry, but this version only supports 100 named groups'
  367.     groupindex = p.pattern.groupdict
  368.     indexgroup = [
  369.         None] * p.pattern.groups
  370.     for k, i in groupindex.items():
  371.         indexgroup[i] = k
  372.     
  373.     return _sre.compile(pattern, flags, code, p.pattern.groups - 1, groupindex, indexgroup)
  374.  
  375.