com!online 2002 May

home *** CD-ROM | disk | FTP | other *** search

/ com!online 2002 May / comcd0502.iso / homepage / javaspecial / 03_01 / sitesearcher / AdvSiteSearcher / DocSearcher.java < prev next >

Wrap

Java Source | 2000-08-18 | 9.4 KB | 299 lines

//AdvSiteSearcher c1999 The Gilbert Post by David Faden //The applet and code are distributed as linkware... //If you use this applet or a variant on its code, //include a link to The Gilbert Post, //http://www.geocities.com/Athens/Parthenon/1911 //The Gilbert Post and David Faden take no responsibility //for anything bad that happens as a result of using this applet //or a derivative based on its code. USE AT YOUR OWN RISK. (big letters) //Please send reports of problems to gilbertnews@hotmail.com, anyway, though. //begin DocSearcher.java import java.io.*; import java.net.*; import java.util.*; // A bug fix: // // 4/12/2000 fixed a "Y2K bug reported by several alert users... I am not sure what // I was thinking when I wrote the portion of code calling Date.getYear()... Perhaps that it // returns the decade? Anyway, in reality, getYear() returns the number of years // since 1900. Files with modification dates beyond 1999 were listed with dates greater than // 99 (100 for 2000). // Note: the whole Date class is deprecated in JDK 1.1 // The code actually changed is found in HDocSearcher.java. // // 4/12/2000 added code that causes the DocSearcher's runner Thread to wait // when it is not "doing anything." This should be more efficient than in the // previous incarnation, where runner would sleep, then periodically wake up to // see if there was anything to search. class DocSearcher implements Runnable { private static final String[] months ={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; private BufferedInputStream bis=null; private AdvSiteSearcher parent; private volatile boolean searching; private volatile boolean running=false;//should the main loop be running? //if there is a horrible error such as a MalformedURLException //or a SecurityException refuse to search anymore... private volatile boolean noHorribleError; private URL url; private int index;//our index according to parent private SearchSieve[] searchers; private boolean addedinfo=false; private Thread runner;//this DocSearcher's Thread private char[] cache; private int cachelength=0; private boolean allCached=false;//Is the whole file stored? private boolean cutHTML=false; private boolean bexact=false; private int cachesize; private long filelength=0;//Do not rely on what the server says /** * Used to synchronize search methods. */ private final Object searchLock=new Object(); public DocSearcher(AdvSiteSearcher parent,URL url,int index,int cachesize) { searching=false; this.parent=parent; this.url=url; noHorribleError=true; this.index=index; this.cachesize=cachesize; } public void searchFor(String[] s, boolean bexact, boolean cutHTML) { if(!noHorribleError) return; stopSearch(); synchronized (searchLock) { //wait for runner to wait this.bexact=bexact; this.cutHTML=cutHTML; //It's not efficient to create new SearchSieves each time. //SearchSieve already provides a method, setKey(char[],boolean), //but I'm not sure how I'd like to implement the SearchSieve pool. searchers=new SearchSieve[s.length]; for(int i=0;i<s.length;i++) { searchers[i]=new SearchSieve(s[i].toLowerCase().toCharArray(),bexact); } if(cache==null && cachesize>0) cache=new char[cachesize]; searching=true; if(!running) { running=true; runner=new Thread(this,"DocSearcher"+index+" runner"); runner.start(); } try { searchLock.notify(); } catch (IllegalMonitorStateException imse) { imse.printStackTrace(System.err); } } } //do not use this method //it is a crutch to deal with a problem in the Applet's init public void setErrored() { noHorribleError=false; //System.out.println("Errored"); } public boolean isErrored() { return !noHorribleError; } //This might not lead to a graceful close of bis. //I think I've fixed the above problem, though. public void stopSearch() { searching=false; } public void stopRunning() { stopSearch(); synchronized (searchLock) { try { searchLock.notify(); //runner will be free to run once we //the calling Thread exits this synchronized block } catch (IllegalMonitorStateException imse) { imse.printStackTrace(System.err); } running=false; } } //this could cause NullPointerException if called while a search is in progress public void trashCache() { stopSearch(); synchronized (searchLock) { cache=null; } //Here were are assuming that only one Thread at a time is calling the public //methods of DocSearcher. //Otherwise, it would be possible for another search to start in the interval //between the two lines of this method } public void run() { synchronized (searchLock) { while (running && noHorribleError) { if (searching) { dosearch(); searching=false; } try { searchLock.wait(); } catch (InterruptedException e) { e.printStackTrace(System.err); running=false; } } //end of while } //end of synchronized block } private boolean openConnection() { //setup connection try{ URLConnection uc=url.openConnection(); if(!addedinfo) { addedinfo=true; Date d=new Date(uc.getLastModified()); int length=uc.getContentLength()/1024; parent.addInfo(index,new String(length+"k "+months[d.getMonth()]+" "+ d.getDate()+" "+(d.getYear()+1900))); } bis=new BufferedInputStream(uc.getInputStream()); } catch(FileNotFoundException fnfe) { System.out.println(fnfe); closeConnection(); noHorribleError=false; parent.foundNoMatch(index); return false; } catch(IOException e) { System.out.println(e); closeConnection(); parent.foundNoMatch(index); return false; } catch(SecurityException se) { System.out.println(se); noHorribleError=false; closeConnection(); parent.foundNoMatch(index); return false; } return true; } private void closeConnection() { try { if (bis!=null) bis.close(); bis=null; } catch (IOException e) { System.err.println(e); } } //dosearch should only be called from active run private void dosearch() { //search loop char c; boolean cacheFullyRead=false;//stop cache from being read //twice in one session...otherwise it will be filled with itself boolean inTag=false; boolean connected=false; int cnumread=0;//number of characters read from cache for(;;) { if(!searching) break; if(cache!=null && cnumread<cachelength && !cacheFullyRead) { c=cache[cnumread]; cnumread++; if (cnumread==cachelength) cacheFullyRead=true; } else { if(allCached) c=(char)-1;//This will only get called if //the cache was an exact fit for the file being searched. else if(!connected) { if (!openConnection()) return; else { connected=true; //throw away cachelength chars try { for (int i=0;i<cachelength;i++) bis.read(); } catch (IOException ewera) { parent.foundNoMatch(index); closeConnection(); return; } continue; } } else { try { c=Character.toLowerCase((char)bis.read()); if(cachelength==0) cacheFullyRead=true; if(cachelength<cachesize && !allCached) { cache[cachelength]=c; cachelength++; if(c==(char)-1) allCached=true; } } catch(IOException e67) { System.out.println(e67); parent.foundNoMatch(index); closeConnection(); return; } } } //JavaScripts and comments might louse up this //current cheesy method for cutting HTML tags. if (c=='<') inTag=true; else if (c=='>') { if (cutHTML && inTag) { inTag=false; continue; } inTag=false; } if(cutHTML && inTag) {/*nada*/} else { for(int i=0;i<searchers.length;i++) { if(searchers[i].addChar(c)) { if(searching) parent.foundMatch(index); closeConnection(); return; } } } if(c==(char)-1) break; } //tell the parent the bad news parent.foundNoMatch(index); closeConnection(); } }//end DocSearcher.java