home *** CD-ROM | disk | FTP | other *** search
Java Source | 1995-08-11 | 6.4 KB | 225 lines |
- /*
- * @(#)RegexpPool.java 1.2 95/04/03
- *
- * Copyright (c) 1995 Sun Microsystems, Inc. All Rights reserved Permission to
- * use, copy, modify, and distribute this software and its documentation for
- * NON-COMMERCIAL purposes and without fee is hereby granted provided that
- * this copyright notice appears in all copies. Please refer to the file
- * copyright.html for further important copyright and licensing information.
- *
- * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
- * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
- * OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
- * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR
- * ITS DERIVATIVES.
- */
-
- package java.util;
-
- /**
- * A class to represent a pool of regular expressions. A string
- * can be matched against the whole pool all at once. It is much
- * faster than doing individual regular expression matches one-by-one.
- * @see java.util.RegexpTarget
- * @author James Gosling
- */
-
- public class RegexpPool {
- private RegexpNode prefixMachine = new RegexpNode(0);
- private RegexpNode suffixMachine = new RegexpNode(0);
- private static final int BIG = 0x7FFFFFFF;
- private int lastBestRank = BIG;
- public RegexpPool () {
- prefixMachine.rank = BIG;
- suffixMachine.rank = BIG;
- }
- /** Add a regular expression to the pool of regular expressions.
- @param re The regular expression to add to the pool.
- For now, only handles strings that either begin or end with
- a '*'.
- @param ret The object to be returned when this regular expression is
- matched. If ret is an instance of the RegexpTarget class, ret.found
- is called with the string fragment that matched the '*' as its parameter.
- */
- public void add(String re, Object ret) {
- int len = re.length();
- RegexpNode p;
- if (re.charAt(0) == '*') {
- p = suffixMachine;
- while (len > 1)
- p = p.add(re.charAt(--len));
- } else {
- boolean exact = false;
- if (re.charAt(len - 1) == '*')
- len--;
- else
- exact = true;
- p = prefixMachine;
- for (int i = 0; i < len; i++)
- p = p.add(re.charAt(i));
- p.exact = exact;
- }
- if (p.result != null)
- throw new REException(re + " is a duplicate");
- p.result = ret;
- p.rank = RegexpNode.masterrank++;
-
- }
-
- /** Search for a match to a string & return the object associated
- with it with the match. When multiple regular expressions
- would match the string, the one returned is the one first added
- to the pool.
- @param s The string to match against the regular expressions
- in the pool.
- @return null on failure, otherwise the object associated with
- the regular expression when it was added to the pool.
- If the object is an instance of RegexpTarget, then
- the return value is the result from calling
- return.found(string_that_matched_wildcard).
- */
- public Object match(String s) {
- return matchAfter(s, BIG);
- }
-
- /** Identical to match except that it will only find matches to
- regular expressions that were added to the pool <i>after<i>
- the last regular expression that matched in the last call
- to match() or matchNext() */
- public Object matchNext(String s) {
- return matchAfter(s, lastBestRank);
- }
-
- private Object matchAfter(String s, int bestRank) {
- RegexpNode p = prefixMachine;
- RegexpNode best = p;
- int bst = 0;
- int bend = 0;
- int len = s.length();
- int i;
- if (len <= 0)
- return null;
- /* March forward through the prefix machine */
- for (i = 0; p != null; i++) {
- if (p.result != null && p.rank < bestRank
- && (!p.exact || i == len)) {
- bestRank = p.rank;
- best = p;
- bst = i;
- bend = len;
- }
- if (i >= len)
- break;
- p = p.find(s.charAt(i));
- }
- /* march backward through the suffix machine */
- p = suffixMachine;
- for (i = len; --i >= 0 && p != null;) {
- if (p.result != null && p.rank < bestRank) {
- bestRank = p.rank;
- best = p;
- bst = 0;
- bend = i+1;
- }
- p = p.find(s.charAt(i));
- }
- Object o = best.result;
- if (o != null && o instanceof RegexpTarget)
- o = ((RegexpTarget) o).found(s.substring(bst, bend));
- lastBestRank = bestRank;
- return o;
- }
-
- /** Resets the pool so that the next call to matchNext looks
- at all regular expressions in the pool. match(s); is equivalent
- to reset(); matchNext(s);
- <p><b>Multithreading note:</b> reset/nextMatch leave state in the
- regular expression pool. If multiple threads could be using this
- pool this way, they should be syncronized to avoid race hazards.
- match() was done in such a way that there are no such race
- hazards: multiple threads can be matching in the same pool
- simultaneously. */
- public void reset() {
- lastBestRank = BIG;
- }
-
- static public void main(String argv[]) {
- RegexpPool rp = new RegexpPool ();
- for (int i = 1; i < argv.length; i++)
- rp.add(argv[i], argv[i]);
- rp.print();
- }
-
- /** Print this pool to standard output */
- public void print() {
- System.out.print("Regexp pool:\n");
- if (suffixMachine.firstchild != null) {
- System.out.print(" Suffix machine: ");
- suffixMachine.firstchild.print();
- System.out.print("\n");
- }
- if (prefixMachine.firstchild != null) {
- System.out.print(" Prefix machine: ");
- prefixMachine.firstchild.print();
- System.out.print("\n");
- }
- }
- }
-
- /* A node in a regular expression finite state machine. */
- class RegexpNode {
- char c;
- RegexpNode firstchild;
- RegexpNode nextsibling;
- int rank;
- boolean exact;
- static int masterrank;
- Object result;
- RegexpNode (char C) {
- c = C;
- }
- RegexpNode add(char C) {
- RegexpNode p = firstchild;
- if (p == null)
- p = new RegexpNode (C);
- else {
- while (p != null)
- if (p.c == C)
- return p;
- else
- p = p.nextsibling;
- p = new RegexpNode (C);
- p.nextsibling = firstchild;
- }
- firstchild = p;
- return p;
- }
- RegexpNode find(char C) {
- for (RegexpNode p = firstchild;
- p != null;
- p = p.nextsibling)
- if (p.c == C)
- return p;
- return null;
- }
- void print() {
- if (nextsibling != null) {
- RegexpNode p = this;
- System.out.print("(");
- while (p != null) {
- System.out.write(p.c);
- if (p.firstchild != null)
- p.firstchild.print();
- p = p.nextsibling;
- System.out.write(p != null ? '|' : ')');
- }
- } else {
- System.out.write(c);
- if (firstchild != null)
- firstchild.print();
- }
- }
-
- }
-