home *** CD-ROM | disk | FTP | other *** search
/ PC World Komputer 1999 March B / SCO_CASTOR4RRT.iso / uccs / root.13 / usr / include / regexp.h < prev    next >
C/C++ Source or Header  |  1998-08-19  |  9KB  |  518 lines

  1. /*
  2.  * Copyright (c) 1998 The Santa Cruz Operation, Inc.. All Rights Reserved. 
  3.  *                                                                         
  4.  *        THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF THE               
  5.  *                   SANTA CRUZ OPERATION INC.                             
  6.  *                                                                         
  7.  *   The copyright notice above does not evidence any actual or intended   
  8.  *   publication of such source code.                                      
  9.  */
  10.  
  11. #ifndef _REGEXP_H
  12. #define _REGEXP_H
  13. #ident    "@(#)sgs-head:common/head/regexp.h    1.9.1.4"
  14.  
  15. #include <string.h>
  16.  
  17. #define    CBRA    2
  18. #define    CCHR    4
  19. #define    CDOT    8
  20. #define    CCL    12
  21. #define    CXCL    16
  22. #define    CDOL    20
  23. #define    CCEOF    22
  24. #define    CKET    24
  25. #define    CBACK    36
  26. #define NCCL    40
  27.  
  28. #define    STAR    01
  29. #define RNGE    03
  30.  
  31. #define    NBRA    9
  32.  
  33. #define PLACE(c)    ep[c >> 3] |= bittab[c & 07]
  34. #define ISTHERE(c)    (ep[c >> 3] & bittab[c & 07])
  35. #define ecmp(s1, s2, n)    (!strncmp(s1, s2, n))
  36.  
  37. #ifdef __cplusplus
  38. #define __STATIC static
  39. #else
  40. #define __STATIC
  41. #endif
  42.  
  43. static unsigned char    bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  44. static char    *braslist[NBRA];
  45. static char    *braelist[NBRA];
  46. static int    nodelim, low, size;
  47.  
  48. __STATIC int    sed, nbra;
  49. __STATIC char    *loc1, *loc2, *locs;
  50. __STATIC int    circf;
  51.  
  52. #ifdef __cplusplus
  53. typedef int (*__LOC1)(char* = loc1);
  54. typedef int (*__LOC2)(char* = loc2);
  55. typedef int (*__NODELIM)(int =  nodelim);
  56. #endif
  57.  
  58. __STATIC char *
  59. compile(char *instring, register char *ep, const char *endbuf, int seof)
  60. {
  61. #ifdef __cplusplus
  62.     enum __s {_INSTRING = sizeof(instring)}; 
  63. #endif
  64.     INIT    /* Dependent declarations and initializations */
  65.     register int c;
  66.     register int eof = seof;
  67.     char *lastep = instring;
  68.     int cclcnt;
  69.     char bracket[NBRA], *bracketp;
  70.     int closed;
  71.     int neg;
  72.     int lc;
  73.     int i, cflg;
  74.     int iflag; /* used for non-ascii characters in brackets */
  75.  
  76.     lastep = 0;
  77.     if((c = GETC()) == eof || c == '\n') {
  78.         if(c == '\n') {
  79.             UNGETC(c);
  80.             nodelim = 1;
  81.         }
  82.         if(*ep == 0 && !sed)
  83.             ERROR(41);
  84.         RETURN(ep);
  85.     }
  86.     bracketp = bracket;
  87.     circf = closed = nbra = 0;
  88.     if(c == '^')
  89.         circf++;
  90.     else
  91.         UNGETC(c);
  92.     while(1) {
  93.         if(ep >= endbuf)
  94.             ERROR(50);
  95.         c = GETC();
  96.         if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
  97.             lastep = ep;
  98.         if(c == eof) {
  99.             *ep++ = CCEOF;
  100.             if (bracketp != bracket)
  101.                 ERROR(42);
  102.             RETURN(ep);
  103.         }
  104.         switch(c) {
  105.  
  106.         case '.':
  107.             *ep++ = CDOT;
  108.             continue;
  109.  
  110.         case '\n':
  111.             if(!sed) {
  112.                 UNGETC(c);
  113.                 *ep++ = CCEOF;
  114.                 nodelim = 1;
  115.                 if(bracketp != bracket)
  116.                     ERROR(42);
  117.                 RETURN(ep);
  118.             }
  119.             else ERROR(36);
  120.         case '*':
  121.             if(lastep == 0 || *lastep == CBRA || *lastep == CKET)
  122.                 goto defchar;
  123.             *lastep |= STAR;
  124.             continue;
  125.  
  126.         case '$':
  127.             if(PEEKC() != eof && PEEKC() != '\n')
  128.                 goto defchar;
  129.             *ep++ = CDOL;
  130.             continue;
  131.  
  132.         case '[':
  133.             if(&ep[17] >= endbuf)
  134.                 ERROR(50);
  135.  
  136.             *ep++ = CCL;
  137.             lc = 0;
  138.             for(i = 0; i < 16; i++)
  139.                 ep[i] = 0;
  140.  
  141.             neg = 0;
  142.             if((c = GETC()) == '^') {
  143.                 neg = 1;
  144.                 c = GETC();
  145.             }
  146.             iflag = 1;
  147.             do {
  148.                 c &= 0377;
  149.                 if(c == '\0' || c == '\n')
  150.                     ERROR(49);
  151.                 if((c & 0200) && iflag) {
  152.                     iflag = 0;
  153.                     if(&ep[32] >= endbuf)
  154.                         ERROR(50);
  155.                     ep[-1] = CXCL;
  156.                     for(i = 16; i < 32; i++)
  157.                         ep[i] = 0;
  158.                 }
  159.                 if(c == '-' && lc != 0) {
  160.                     if((c = GETC()) == ']') {
  161.                         PLACE('-');
  162.                         break;
  163.                     }
  164.                     if((c & 0200) && iflag) {
  165.                         iflag = 0;
  166.                         if(&ep[32] >= endbuf)
  167.                             ERROR(50);
  168.                         ep[-1] = CXCL;
  169.                         for(i = 16; i < 32; i++)
  170.                             ep[i] = 0;
  171.                     }
  172.                     while(lc < c ) {
  173.                         PLACE(lc);
  174.                         lc++;
  175.                     }
  176.                 }
  177.                 lc = c;
  178.                 PLACE(c);
  179.             } while((c = GETC()) != ']');
  180.             
  181.             if(iflag)
  182.                 iflag = 16;
  183.             else
  184.                 iflag = 32;
  185.             
  186.             if(neg) {
  187.                 if(iflag == 32) {
  188.                     for(cclcnt = 0; cclcnt < iflag; cclcnt++)
  189.                         ep[cclcnt] ^= 0377;
  190.                     ep[0] &= 0376;
  191.                 } else {
  192.                     ep[-1] = NCCL;
  193.                     /* make nulls match so test fails */
  194.                     ep[0] |= 01;
  195.                 }
  196.             }
  197.  
  198.             ep += iflag;
  199.  
  200.             continue;
  201.  
  202.         case '\\':
  203.             switch(c = GETC()) {
  204.  
  205.             case '(':
  206.                 if(nbra >= NBRA)
  207.                     ERROR(43);
  208.                 *bracketp++ = nbra;
  209.                 *ep++ = CBRA;
  210.                 *ep++ = nbra++;
  211.                 continue;
  212.  
  213.             case ')':
  214.                 if(bracketp <= bracket) 
  215.                     ERROR(42);
  216.                 *ep++ = CKET;
  217.                 *ep++ = *--bracketp;
  218.                 closed++;
  219.                 continue;
  220.  
  221.             case '{':
  222.                 if(lastep == (char *) 0)
  223.                     goto defchar;
  224.                 *lastep |= RNGE;
  225.                 cflg = 0;
  226.             nlim:
  227.                 c = GETC();
  228.                 i = 0;
  229.                 do {
  230.                     if('0' <= c && c <= '9')
  231.                         i = 10 * i + c - '0';
  232.                     else
  233.                         ERROR(16);
  234.                 } while(((c = GETC()) != '\\') && (c != ','));
  235.                 if(i >= 255)
  236.                     ERROR(11);
  237.                 *ep++ = i;
  238.                 if(c == ',') {
  239.                     if(cflg++)
  240.                         ERROR(44);
  241.                     if((c = GETC()) == '\\')
  242.                         *ep++ = (char)255;
  243.                     else {
  244.                         UNGETC(c);
  245.                         goto nlim;
  246.                         /* get 2'nd number */
  247.                     }
  248.                 }
  249.                 if(GETC() != '}')
  250.                     ERROR(45);
  251.                 if(!cflg)    /* one number */
  252.                     *ep++ = i;
  253.                 else if((ep[-1] & 0377) < (ep[-2] & 0377))
  254.                     ERROR(46);
  255.                 continue;
  256.  
  257.             case '\n':
  258.                 ERROR(36);
  259.  
  260.             case 'n':
  261.                 c = '\n';
  262.                 goto defchar;
  263.  
  264.             default:
  265.                 if(c >= '1' && c <= '9') {
  266.                     if((c -= '1') >= closed)
  267.                         ERROR(25);
  268.                     *ep++ = CBACK;
  269.                     *ep++ = c;
  270.                     continue;
  271.                 }
  272.             }
  273.     /* Drop through to default to use \ to turn off special chars */
  274.  
  275.         defchar:
  276.         default:
  277.             lastep = ep;
  278.             *ep++ = CCHR;
  279.             *ep++ = c;
  280.         }
  281.     }
  282. }
  283.  
  284. static void
  285. getrnge(register const char *str)
  286. {
  287.     low = *str++ & 0377;
  288.     size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
  289. }
  290.  
  291. __STATIC int
  292. advance(register const char *lp, register const char *ep)
  293. {
  294.     register const char *curlp;
  295.     int c;
  296.     char *bbeg; 
  297.     register char neg;
  298.     int ct;
  299.  
  300.     while(1) {
  301.         neg = 0;
  302.         switch(*ep++) {
  303.  
  304.         case CCHR:
  305.             if(*ep++ == *lp++)
  306.                 continue;
  307.             return(0);
  308.     
  309.         case CDOT:
  310.             if(*lp++)
  311.                 continue;
  312.             return(0);
  313.     
  314.         case CDOL:
  315.             if(*lp == 0)
  316.                 continue;
  317.             return(0);
  318.     
  319.         case CCEOF:
  320.             loc2 = (char *)lp;
  321.             return(1);
  322.     
  323.         case CXCL: 
  324.             c = (unsigned char)*lp++;
  325.             if(ISTHERE(c)) {
  326.                 ep += 32;
  327.                 continue;
  328.             }
  329.             return(0);
  330.         
  331.         case NCCL:    
  332.             neg = 1;
  333.  
  334.         case CCL: 
  335.             c = *lp++;
  336.             if(((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
  337.                 ep += 16;
  338.                 continue;
  339.             }
  340.             return(0);
  341.         
  342.         case CBRA:
  343.             braslist[*ep++] = (char *)lp;
  344.             continue;
  345.     
  346.         case CKET:
  347.             braelist[*ep++] = (char *)lp;
  348.             continue;
  349.     
  350.         case CCHR | RNGE:
  351.             c = *ep++;
  352.             getrnge(ep);
  353.             while(low--)
  354.                 if(*lp++ != c)
  355.                     return(0);
  356.             curlp = lp;
  357.             while(size--) 
  358.                 if(*lp++ != c)
  359.                     break;
  360.             if(size < 0)
  361.                 lp++;
  362.             ep += 2;
  363.             goto star;
  364.     
  365.         case CDOT | RNGE:
  366.             getrnge(ep);
  367.             while(low--)
  368.                 if(*lp++ == '\0')
  369.                     return(0);
  370.             curlp = lp;
  371.             while(size--)
  372.                 if(*lp++ == '\0')
  373.                     break;
  374.             if(size < 0)
  375.                 lp++;
  376.             ep += 2;
  377.             goto star;
  378.     
  379.         case CXCL | RNGE:
  380.             getrnge(ep + 32);
  381.             while(low--) {
  382.                 c = (unsigned char)*lp++;
  383.                 if(!ISTHERE(c))
  384.                     return(0);
  385.             }
  386.             curlp = lp;
  387.             while(size--) {
  388.                 c = (unsigned char)*lp++;
  389.                 if(!ISTHERE(c))
  390.                     break;
  391.             }
  392.             if(size < 0)
  393.                 lp++;
  394.             ep += 34;        /* 32 + 2 */
  395.             goto star;
  396.         
  397.         case NCCL | RNGE:
  398.             neg = 1;
  399.         
  400.         case CCL | RNGE:
  401.             getrnge(ep + 16);
  402.             while(low--) {
  403.                 c = *lp++;
  404.                 if(((c & 0200) || !ISTHERE(c)) ^ neg)
  405.                     return(0);
  406.             }
  407.             curlp = lp;
  408.             while(size--) {
  409.                 c = *lp++;
  410.                 if(((c & 0200) || !ISTHERE(c)) ^ neg)
  411.                     break;
  412.             }
  413.             if(size < 0)
  414.                 lp++;
  415.             ep += 18;         /* 16 + 2 */
  416.             goto star;
  417.     
  418.         case CBACK:
  419.             bbeg = braslist[*ep];
  420.             ct = braelist[*ep++] - bbeg;
  421.     
  422.             if(ecmp(bbeg, lp, ct)) {
  423.                 lp += ct;
  424.                 continue;
  425.             }
  426.             return(0);
  427.     
  428.         case CBACK | STAR:
  429.             bbeg = braslist[*ep];
  430.             ct = braelist[*ep++] - bbeg;
  431.             curlp = lp;
  432.             while(ecmp(bbeg, lp, ct))
  433.                 lp += ct;
  434.     
  435.             while(lp >= curlp) {
  436.                 if(advance(lp, ep))    return(1);
  437.                 lp -= ct;
  438.             }
  439.             return(0);
  440.     
  441.     
  442.         case CDOT | STAR:
  443.             curlp = lp;
  444.             while(*lp++);
  445.             goto star;
  446.     
  447.         case CCHR | STAR:
  448.             curlp = lp;
  449.             while(*lp++ == *ep);
  450.             ep++;
  451.             goto star;
  452.     
  453.         case CXCL | STAR:
  454.             curlp = lp;
  455.             do {
  456.                 c = (unsigned char)*lp++;
  457.             } while(ISTHERE(c));
  458.             ep += 32;
  459.             goto star;
  460.         
  461.         case NCCL | STAR:
  462.             neg = 1;
  463.  
  464.         case CCL | STAR:
  465.             curlp = lp;
  466.             do {
  467.                 c = *lp++;
  468.             } while(((c & 0200) == 0 && ISTHERE(c)) ^ neg);
  469.             ep += 16;
  470.             goto star;
  471.     
  472.         star:
  473.             do {
  474.                 if(--lp == locs)
  475.                     break;
  476.                 if(advance(lp, ep))
  477.                     return(1);
  478.             } while(lp > curlp);
  479.             return(0);
  480.  
  481.         }
  482.     }
  483. }
  484.  
  485. __STATIC int
  486. step(register const char *p1, register const char *p2)
  487. {
  488.     register int c;
  489.  
  490.     if(circf) {
  491.         loc1 = (char *)p1;
  492.         return(advance(p1, p2));
  493.     }
  494.     /* fast check for first character */
  495.     if(*p2 == CCHR) {
  496.         c = p2[1];
  497.         do {
  498.             if(*p1 != c)
  499.                 continue;
  500.             if(advance(p1, p2)) {
  501.                 loc1 = (char *)p1;
  502.                 return(1);
  503.             }
  504.         } while(*p1++);
  505.         return(0);
  506.     }
  507.         /* regular algorithm */
  508.     do {
  509.         if(advance(p1, p2)) {
  510.             loc1 = (char *)p1;
  511.             return(1);
  512.         }
  513.     } while(*p1++);
  514.     return(0);
  515. }
  516.  
  517. #endif /*_REGEXP_H*/
  518.