home *** CD-ROM | disk | FTP | other *** search
- ############################################################################
- #
- # File: patterns.icn
- #
- # Subject: Procedures for SNOBOL4-style pattern matching
- #
- # Author: Ralph E. Griswold
- #
- # Date: June 10, 1988
- #
- ###########################################################################
- #
- # These procedures provide procedural equivalents for most SNOBOL4
- # patterns and some extensions.
- #
- # Procedures and their pattern equivalents are:
- #
- # Any(s) ANY(S)
- #
- # Arb() ARB
- #
- # Arbno(p) ARBNO(P)
- #
- # Arbx(i) ARB(I)
- #
- # Bal() BAL
- #
- # Break(s) BREAK(S)
- #
- # Breakx(s) BREAKX(S)
- #
- # Cat(p1,p2) P1 P2
- #
- # Discard(p) /P
- #
- # Exog(s) \S
- #
- # Find(s) FIND(S)
- #
- # Len(i) LEN(I)
- #
- # Limit(p,i) P \ i
- #
- # Locate(p) LOCATE(P)
- #
- # Marb() longest-first ARB
- #
- # Notany(s) NOTANY(S)
- #
- # Pos(i) POS(I)
- #
- # Replace(p,s) P = S
- #
- # Rpos(i) RPOS(I)
- #
- # Rtab(i) RTAB(I)
- #
- # Span(s) SPAN(S)
- #
- # String(s) S
- #
- # Succeed() SUCCEED
- #
- # Tab(i) TAB(I)
- #
- # Xform(f,p) F(P)
- #
- # The following procedures relate to the application and control
- # of pattern matching:
- #
- # Apply(s,p) S ? P
- #
- # Mode() anchored or unanchored matching (see Anchor
- # and Float)
- #
- # Anchor() &ANCHOR = 1 if Mode := Anchor
- #
- # Float() &ANCHOR = 0 if Mode := Float
- #
- # In addition to the procedures above, the following expressions
- # can be used:
- #
- # p1() | p2() P1 | P2
- #
- # v <- p() P . V (approximate)
- #
- # v := p() P $ V (approximate)
- #
- # fail FAIL
- #
- # =s S (in place of String(s))
- #
- # p1() || p2() P1 P2 (in place of Cat(p1,p2))
- #
- # Using this system, most SNOBOL4 patterns can be satisfactorily
- # transliterated into Icon procedures and expressions. For example,
- # the pattern
- #
- # SPAN("0123456789") $ N "H" LEN(*N) $ LIT
- #
- # can be transliterated into
- #
- # (n <- Span('0123456789')) || ="H" ||
- # (lit <- Len(n))
- #
- # Concatenation of components is necessary to preserve the
- # pattern-matching properties of SNOBOL4.
- #
- # Caveats: Simulating SNOBOL4 pattern matching using the procedures
- # above is inefficient.
- #
- ############################################################################
-
- global Mode, Float
-
- procedure Anchor() # &ANCHOR = 1
- suspend ""
- end
-
- procedure Any(s) # ANY(S)
- suspend tab(any(s))
- end
-
- procedure Apply(s,p) # S ? P
- local tsubject, tpos, value
- initial {
- Float := Arb
- /Mode := Float # &ANCHOR = 0 if not already set
- }
- suspend (
- (tsubject := &subject) &
- (tpos := &pos) &
- (&subject <- s) &
- (&pos <- 1) &
- (Mode() & (value := p())) &
- (&pos <- tpos) & # to restore on backtracking
- (&subject <- tsubject) & # note this sets &pos
- (&pos <- tpos) & # to restore on evaluation
- value
- )
- end
-
- procedure Arb() # ARB
- suspend tab(&pos to *&subject + 1)
- end
-
- procedure Arbno(p) # ARBNO(P)
- suspend "" | (p() || Arbno(p))
- end
-
- procedure Arbx(i) # ARB(I)
- suspend tab(&pos to *&subject + 1 by i)
- end
-
- procedure Bal() # BAL
- suspend Bbal() || Arbno(Bbal)
- end
-
- procedure Bbal() # used by Bal()
- suspend (="(" || Arbno(Bbal) || =")") | Notany("()")
- end
-
- procedure Break(s) # BREAK(S)
- suspend tab(upto(s) \ 1)
- end
-
- procedure Breakx(s) # BREAKX(S)
- suspend tab(upto(s))
- end
-
- procedure Cat(p1,p2) # P1 P2
- suspend p1() || p2()
- end
-
- procedure Discard(p) # /P
- suspend p() & ""
- end
-
- procedure Exog(s) # \S
- suspend s
- end
-
- procedure Find(s) # FIND(S)
- suspend tab(find(s) + 1)
- end
-
- procedure Len(i) # LEN(I)
- suspend move(i)
- end
-
- procedure Limit(p,i) # P \ i
- local j
- j := &pos
- suspend p() \ i
- &pos := j
- end
-
- procedure Locate(p) # LOCATE(P)
- suspend tab(&pos to *&subject + 1) & p()
- end
-
- procedure Marb() # max-first ARB
- suspend tab(*&subject + 1 to &pos by -1)
- end
-
- procedure Notany(s) # NOTANY(S)
- suspend tab(any(~s))
- end
-
- procedure Pos(i) # POS(I)
- suspend pos(i + 1) & ""
- end
-
- procedure Replace(p,s) # P = S
- suspend p() & s
- end
-
- procedure Rpos(i) # RPOS(I)
- suspend pos(-i) & ""
- end
-
- procedure Rtab(i) # RTAB(I)
- suspend tab(-i)
- end
-
- procedure Span(s) # SPAN(S)
- suspend tab(many(s))
- end
-
- procedure String(s) # S
- suspend =s
- end
-
- procedure Succeed() # SUCCEED
- suspend |""
- end
-
- procedure Tab(i) # TAB(I)
- suspend tab(i + 1)
- end
-
- procedure Xform(f,p) # F(P)
- suspend f(p())
- end
-