home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1995 November
/
PCWK1195.iso
/
inne
/
win95
/
sieciowe
/
hotja32.lzh
/
hotjava
/
classsrc
/
java
/
util
/
regexppool.java
< prev
next >
Wrap
Text File
|
1995-08-11
|
7KB
|
225 lines
/*
* @(#)RegexpPool.java 1.2 95/04/03
*
* Copyright (c) 1995 Sun Microsystems, Inc. All Rights reserved Permission to
* use, copy, modify, and distribute this software and its documentation for
* NON-COMMERCIAL purposes and without fee is hereby granted provided that
* this copyright notice appears in all copies. Please refer to the file
* copyright.html for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
* SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
* OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
* LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR
* ITS DERIVATIVES.
*/
package java.util;
/**
* A class to represent a pool of regular expressions. A string
* can be matched against the whole pool all at once. It is much
* faster than doing individual regular expression matches one-by-one.
* @see java.util.RegexpTarget
* @author James Gosling
*/
public class RegexpPool {
private RegexpNode prefixMachine = new RegexpNode(0);
private RegexpNode suffixMachine = new RegexpNode(0);
private static final int BIG = 0x7FFFFFFF;
private int lastBestRank = BIG;
public RegexpPool () {
prefixMachine.rank = BIG;
suffixMachine.rank = BIG;
}
/** Add a regular expression to the pool of regular expressions.
@param re The regular expression to add to the pool.
For now, only handles strings that either begin or end with
a '*'.
@param ret The object to be returned when this regular expression is
matched. If ret is an instance of the RegexpTarget class, ret.found
is called with the string fragment that matched the '*' as its parameter.
*/
public void add(String re, Object ret) {
int len = re.length();
RegexpNode p;
if (re.charAt(0) == '*') {
p = suffixMachine;
while (len > 1)
p = p.add(re.charAt(--len));
} else {
boolean exact = false;
if (re.charAt(len - 1) == '*')
len--;
else
exact = true;
p = prefixMachine;
for (int i = 0; i < len; i++)
p = p.add(re.charAt(i));
p.exact = exact;
}
if (p.result != null)
throw new REException(re + " is a duplicate");
p.result = ret;
p.rank = RegexpNode.masterrank++;
}
/** Search for a match to a string & return the object associated
with it with the match. When multiple regular expressions
would match the string, the one returned is the one first added
to the pool.
@param s The string to match against the regular expressions
in the pool.
@return null on failure, otherwise the object associated with
the regular expression when it was added to the pool.
If the object is an instance of RegexpTarget, then
the return value is the result from calling
return.found(string_that_matched_wildcard).
*/
public Object match(String s) {
return matchAfter(s, BIG);
}
/** Identical to match except that it will only find matches to
regular expressions that were added to the pool <i>after<i>
the last regular expression that matched in the last call
to match() or matchNext() */
public Object matchNext(String s) {
return matchAfter(s, lastBestRank);
}
private Object matchAfter(String s, int bestRank) {
RegexpNode p = prefixMachine;
RegexpNode best = p;
int bst = 0;
int bend = 0;
int len = s.length();
int i;
if (len <= 0)
return null;
/* March forward through the prefix machine */
for (i = 0; p != null; i++) {
if (p.result != null && p.rank < bestRank
&& (!p.exact || i == len)) {
bestRank = p.rank;
best = p;
bst = i;
bend = len;
}
if (i >= len)
break;
p = p.find(s.charAt(i));
}
/* march backward through the suffix machine */
p = suffixMachine;
for (i = len; --i >= 0 && p != null;) {
if (p.result != null && p.rank < bestRank) {
bestRank = p.rank;
best = p;
bst = 0;
bend = i+1;
}
p = p.find(s.charAt(i));
}
Object o = best.result;
if (o != null && o instanceof RegexpTarget)
o = ((RegexpTarget) o).found(s.substring(bst, bend));
lastBestRank = bestRank;
return o;
}
/** Resets the pool so that the next call to matchNext looks
at all regular expressions in the pool. match(s); is equivalent
to reset(); matchNext(s);
<p><b>Multithreading note:</b> reset/nextMatch leave state in the
regular expression pool. If multiple threads could be using this
pool this way, they should be syncronized to avoid race hazards.
match() was done in such a way that there are no such race
hazards: multiple threads can be matching in the same pool
simultaneously. */
public void reset() {
lastBestRank = BIG;
}
static public void main(String argv[]) {
RegexpPool rp = new RegexpPool ();
for (int i = 1; i < argv.length; i++)
rp.add(argv[i], argv[i]);
rp.print();
}
/** Print this pool to standard output */
public void print() {
System.out.print("Regexp pool:\n");
if (suffixMachine.firstchild != null) {
System.out.print(" Suffix machine: ");
suffixMachine.firstchild.print();
System.out.print("\n");
}
if (prefixMachine.firstchild != null) {
System.out.print(" Prefix machine: ");
prefixMachine.firstchild.print();
System.out.print("\n");
}
}
}
/* A node in a regular expression finite state machine. */
class RegexpNode {
char c;
RegexpNode firstchild;
RegexpNode nextsibling;
int rank;
boolean exact;
static int masterrank;
Object result;
RegexpNode (char C) {
c = C;
}
RegexpNode add(char C) {
RegexpNode p = firstchild;
if (p == null)
p = new RegexpNode (C);
else {
while (p != null)
if (p.c == C)
return p;
else
p = p.nextsibling;
p = new RegexpNode (C);
p.nextsibling = firstchild;
}
firstchild = p;
return p;
}
RegexpNode find(char C) {
for (RegexpNode p = firstchild;
p != null;
p = p.nextsibling)
if (p.c == C)
return p;
return null;
}
void print() {
if (nextsibling != null) {
RegexpNode p = this;
System.out.print("(");
while (p != null) {
System.out.write(p.c);
if (p.firstchild != null)
p.firstchild.print();
p = p.nextsibling;
System.out.write(p != null ? '|' : ')');
}
} else {
System.out.write(c);
if (firstchild != null)
firstchild.print();
}
}
}