home *** CD-ROM | disk | FTP | other *** search
Java Source | 2000-08-18 | 9.4 KB | 299 lines |
- //AdvSiteSearcher c1999 The Gilbert Post by David Faden
- //The applet and code are distributed as linkware...
- //If you use this applet or a variant on its code,
- //include a link to The Gilbert Post,
- //http://www.geocities.com/Athens/Parthenon/1911
- //The Gilbert Post and David Faden take no responsibility
- //for anything bad that happens as a result of using this applet
- //or a derivative based on its code. USE AT YOUR OWN RISK. (big letters)
- //Please send reports of problems to gilbertnews@hotmail.com, anyway, though.
-
- //begin DocSearcher.java
- import java.io.*;
- import java.net.*;
- import java.util.*;
-
- // A bug fix:
- //
- // 4/12/2000 fixed a "Y2K bug reported by several alert users... I am not sure what
- // I was thinking when I wrote the portion of code calling Date.getYear()... Perhaps that it
- // returns the decade? Anyway, in reality, getYear() returns the number of years
- // since 1900. Files with modification dates beyond 1999 were listed with dates greater than
- // 99 (100 for 2000).
- // Note: the whole Date class is deprecated in JDK 1.1
- // The code actually changed is found in HDocSearcher.java.
- //
- // 4/12/2000 added code that causes the DocSearcher's runner Thread to wait
- // when it is not "doing anything." This should be more efficient than in the
- // previous incarnation, where runner would sleep, then periodically wake up to
- // see if there was anything to search.
-
- class DocSearcher implements Runnable {
- private static final String[] months ={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
- private BufferedInputStream bis=null;
- private AdvSiteSearcher parent;
-
- private volatile boolean searching;
- private volatile boolean running=false;//should the main loop be running?
- //if there is a horrible error such as a MalformedURLException
- //or a SecurityException refuse to search anymore...
- private volatile boolean noHorribleError;
-
- private URL url;
- private int index;//our index according to parent
- private SearchSieve[] searchers;
- private boolean addedinfo=false;
- private Thread runner;//this DocSearcher's Thread
- private char[] cache;
- private int cachelength=0;
- private boolean allCached=false;//Is the whole file stored?
- private boolean cutHTML=false;
- private boolean bexact=false;
- private int cachesize;
- private long filelength=0;//Do not rely on what the server says
-
- /**
- * Used to synchronize search methods.
- */
- private final Object searchLock=new Object();
-
- public DocSearcher(AdvSiteSearcher parent,URL url,int index,int cachesize) {
- searching=false;
- this.parent=parent;
- this.url=url;
- noHorribleError=true;
- this.index=index;
- this.cachesize=cachesize;
- }
-
- public void searchFor(String[] s, boolean bexact, boolean cutHTML) {
- if(!noHorribleError)
- return;
- stopSearch();
- synchronized (searchLock) { //wait for runner to wait
- this.bexact=bexact;
- this.cutHTML=cutHTML;
-
- //It's not efficient to create new SearchSieves each time.
- //SearchSieve already provides a method, setKey(char[],boolean),
- //but I'm not sure how I'd like to implement the SearchSieve pool.
- searchers=new SearchSieve[s.length];
- for(int i=0;i<s.length;i++) {
- searchers[i]=new SearchSieve(s[i].toLowerCase().toCharArray(),bexact);
- }
-
- if(cache==null && cachesize>0)
- cache=new char[cachesize];
- searching=true;
- if(!running) {
- running=true;
- runner=new Thread(this,"DocSearcher"+index+" runner");
- runner.start();
- }
- try {
- searchLock.notify();
- }
- catch (IllegalMonitorStateException imse) {
- imse.printStackTrace(System.err);
- }
- }
- }
-
- //do not use this method
- //it is a crutch to deal with a problem in the Applet's init
- public void setErrored() {
- noHorribleError=false;
- //System.out.println("Errored");
- }
-
- public boolean isErrored() {
- return !noHorribleError;
- }
-
- //This might not lead to a graceful close of bis.
- //I think I've fixed the above problem, though.
- public void stopSearch() {
- searching=false;
- }
-
-
-
- public void stopRunning() {
- stopSearch();
- synchronized (searchLock) {
- try {
- searchLock.notify(); //runner will be free to run once we
- //the calling Thread exits this synchronized block
- }
- catch (IllegalMonitorStateException imse) {
- imse.printStackTrace(System.err);
- }
- running=false;
- }
- }
-
- //this could cause NullPointerException if called while a search is in progress
- public void trashCache() {
- stopSearch();
- synchronized (searchLock) {
- cache=null;
- }
- //Here were are assuming that only one Thread at a time is calling the public
- //methods of DocSearcher.
- //Otherwise, it would be possible for another search to start in the interval
- //between the two lines of this method
- }
-
-
- public void run() {
- synchronized (searchLock) {
- while (running && noHorribleError) {
- if (searching) {
- dosearch();
- searching=false;
- }
- try {
- searchLock.wait();
- }
- catch (InterruptedException e) {
- e.printStackTrace(System.err);
- running=false;
- }
- } //end of while
- } //end of synchronized block
- }
-
- private boolean openConnection() {
- //setup connection
- try{
- URLConnection uc=url.openConnection();
- if(!addedinfo) {
- addedinfo=true;
- Date d=new Date(uc.getLastModified());
- int length=uc.getContentLength()/1024;
- parent.addInfo(index,new String(length+"k "+months[d.getMonth()]+" "+
- d.getDate()+" "+(d.getYear()+1900)));
- }
- bis=new BufferedInputStream(uc.getInputStream());
- }
- catch(FileNotFoundException fnfe) {
- System.out.println(fnfe);
- closeConnection();
- noHorribleError=false;
- parent.foundNoMatch(index);
- return false;
- }
- catch(IOException e) {
- System.out.println(e);
- closeConnection();
- parent.foundNoMatch(index);
- return false;
- }
- catch(SecurityException se) {
- System.out.println(se);
- noHorribleError=false;
- closeConnection();
- parent.foundNoMatch(index);
- return false;
- }
- return true;
- }
-
- private void closeConnection() {
- try {
- if (bis!=null)
- bis.close();
- bis=null;
- }
- catch (IOException e) {
- System.err.println(e);
- }
- }
-
-
- //dosearch should only be called from active run
- private void dosearch() {
- //search loop
- char c;
- boolean cacheFullyRead=false;//stop cache from being read
- //twice in one session...otherwise it will be filled with itself
- boolean inTag=false;
- boolean connected=false;
- int cnumread=0;//number of characters read from cache
- for(;;) {
- if(!searching) break;
- if(cache!=null && cnumread<cachelength && !cacheFullyRead) {
- c=cache[cnumread];
- cnumread++;
- if (cnumread==cachelength)
- cacheFullyRead=true;
- }
- else {
- if(allCached) c=(char)-1;//This will only get called if
- //the cache was an exact fit for the file being searched.
- else if(!connected) {
- if (!openConnection())
- return;
- else {
- connected=true;
- //throw away cachelength chars
- try {
- for (int i=0;i<cachelength;i++) bis.read();
- }
- catch (IOException ewera) {
- parent.foundNoMatch(index);
- closeConnection();
- return;
- }
- continue;
- }
- }
- else {
- try {
- c=Character.toLowerCase((char)bis.read());
- if(cachelength==0) cacheFullyRead=true;
- if(cachelength<cachesize && !allCached) {
- cache[cachelength]=c;
- cachelength++;
- if(c==(char)-1) allCached=true;
- }
- }
- catch(IOException e67) {
- System.out.println(e67);
- parent.foundNoMatch(index);
- closeConnection();
- return;
- }
- }
- }
- //JavaScripts and comments might louse up this
- //current cheesy method for cutting HTML tags.
- if (c=='<')
- inTag=true;
- else if (c=='>') {
- if (cutHTML && inTag) {
- inTag=false;
- continue;
- }
- inTag=false;
- }
-
- if(cutHTML && inTag) {/*nada*/}
- else {
- for(int i=0;i<searchers.length;i++) {
- if(searchers[i].addChar(c)) {
- if(searching) parent.foundMatch(index);
- closeConnection();
- return;
- }
- }
- }
- if(c==(char)-1) break;
- }
- //tell the parent the bad news
- parent.foundNoMatch(index);
- closeConnection();
- }
-
- }//end DocSearcher.java
-