com!online 2002 May

home *** CD-ROM | disk | FTP | other *** search

/ com!online 2002 May / comcd0502.iso / homepage / javaspecial / 03_01 / sitesearcher / AdvSiteSearcher / AdvSiteSearcher.java < prev next >

Wrap

Java Source | 2000-08-18 | 20.1 KB | 515 lines

//AdvSiteSearcher c1999 The Gilbert Post by David Faden //The applet and code are distributed as linkware... //If you use this applet or a variant on its code, //include a link to The Gilbert Post, //http://www.geocities.com/Athens/Parthenon/1911 //The Gilbert Post and David Faden take no responsibility //for anything bad that happens as a result of using this applet //or a derivative based on its code. USE AT YOUR OWN RISK. (big letters) //Please send reports of problems to gilbertnews@hotmail.com, anyway, though. //I have not commented this source especially well (for one thing, it will not play //well with JavaDoc so go ahead and send your questions about the source to me... //I may know the answer. // //It should be evident that I'm not a professional programmer...any professionals out there, //I'd like to hear your advise (as long as it's polite). Thank you. --David import java.awt.*; import java.applet.*; import java.io.*; import java.net.*; import java.util.*; //These classes are designed to work with Netscape 3.x+ and so use JDK 1.0 // 5/31/1999 fixed a bug, discovered by Dave Langers, in showPage(URL url) that // caused the page to be loaded in both // the target and top (default) windows // Also, fixed a bug that might cause the progress bar to show 100% (after an errored // SearchThread called foundNoMatch(int i)) while files were still being searched. // // 6/2/1999 fixed a very stupid bug on my part, also discovered by Dave Langers, // that made it impossible for queries // containing uppercase characters to ever be found -- the scanned lines were // converted to lowercase while the keywords/phrases were not // // 6/4/1999 renamed to AdvSiteSearcher to differentiate from older SiteSearcher // Plan to release one more version of SiteSearcher using SearchSieves // // 6/9/1999 made many major revisions: renamed SearchThread to DocSearcher because // it is no longer a subclass of Thread (it instead implements Runnable), polished // the use of SearchSieves, began tentative support for caching, added the ability // to demand exact matches, and to ignore text in between lesser than and greater than // signs (probably HTML) // // 7/27/1999 acting on user requests, updated parameters to add control over the display text... // mostly useful to non-English speakers who want to "localize" the applet... // There are a few other barriers to full internationalization: the date // format used in the search results should be changeable, and the incoming byte stream // should be converted according to the text encoding into a char stream. There is also a // problem, discovered by Guy Capra, with ALL java text components in Netscape and MSIE's VMs // on Macintoshes with AZERTY keyboards: a key press on the AZERTY keyboard seems to be being // incorrectly interpreted as a press on the corresponding QWERTY key. I have not been able // to figure out a solution...for now, you can find my present failed efforts at // http://www.geocities.com/gilbertnews/linkware/qwerty_azerty/. // Also, wrote out full name of List (java.awt.List) to hopefully make this compile with JDK1.2. // // 11/25/1999 squashed a bug (in the run method) which was messing up the "dirs" parameter... // I'm not sure how this one slipped through. Corrected the spelling of "exclude" in the default // text of the applet. Kudos to Matthew Hutton for noticing this. // // 4/12/2000 fixed a "Y2K bug reported by several alert users... I am not sure what // I was thinking when I wrote the portion of code calling Date.getYear()... Perhaps that it // returns the decade? Anyway, in reality, getYear() returns the number of years // since 1900. Files with modification dates beyond 1999 were listed with dates greater than // 99 (100 for 2000). // Note: the whole Date class is deprecated in JDK 1.1 // The code actually changed is found in DocSearcher.java. // // 4/12/2000 added code that causes the DocSearcher's runner Thread to wait // when it is not "doing anything." This should be more efficient than in the // previous incarnation, where runner would sleep, then periodically wake up to // see if there was anything to search. public class AdvSiteSearcher extends Applet implements Runnable { DocSearcher[] workers; URL[] urls; String[] pageinfo;//name, size, last modified int[] matches; int nummatches=0; int numreported=0; int numWorkers=0; java.awt.List list;//=new java.awt.List(5,false); Button search,stop; Checkbox HTMLbox,Exactbox; TextField searchbox; ProgressBar progressbar; String target; URL docbase; boolean displayMessage; String message;//Message to be displayed in applet //to let the user know what's happening before the GUI is finished being set up static final String searchTokenSeparators="\"&|+ \t\r\n,*?"; URL thegilbertpost=null; String dirs=null;//dirs is one-time-use variable used to pass //parameter info to AdvSiteSearcher's run to parse out links from dir listings. Color bordercolor=Color.darkGray; Insets insets=new Insets(5,5,5,5); public void init() { //first initialize the variables target=getParameter("target"); docbase=getDocumentBase(); try { thegilbertpost=new URL("http://www.geocities.com/Athens/Parthenon/1911/"); } catch(MalformedURLException eww) {/*should never occur*/} if((dirs=getParameter("dirs"))!=null) { Thread tempthread=new Thread(this); tempthread.start(); } else finishInit(); } //Finish initializing the applet after AdvSiteSearcher's run has possibly been run //to collect links from directory listings... private void finishInit() { int cachesize=2000; String cacheparam=getParameter("cachesize"); if (cacheparam!=null) { try { cachesize=Integer.parseInt(cacheparam); } catch(NumberFormatException e) {cachesize=2000;} } String files=getParameter("files"); if(dirs!=null) { if(files==null) files=dirs; else files=dirs+files; } if(files!=null) { StringTokenizer st=new StringTokenizer(files,"\n\r \t,",false); int num=st.countTokens(); urls=new URL[num]; workers=new DocSearcher[num]; pageinfo=new String[num]; matches=new int[num]; numWorkers=num; String currToken; for(int i=0;i<num;i++) { currToken=st.nextToken(); pageinfo[i]=currToken; matches[i]=-1; try { urls[i]=new URL(docbase,currToken); workers[i]=new DocSearcher(this,urls[i],i,cachesize); } catch(MalformedURLException mued) { urls[i]=null; //waste an Object workers[i]=new DocSearcher(this,urls[i],i,cachesize); workers[i].setErrored(); System.out.println(mued); } } } else { displayMessage=true; System.out.println("AdvSiteSearcher Applet can't start"); System.out.println("Missing required parameter(s): files/dirs"); message="Can't continue: missing both \"files\" & \"dirs\" parameters."; repaint(); return; } //Set up GUI resize(350,300); setLayout(new BorderLayout()); //get color parameters Color color=null; if((color=getColor(getParameter("bgcolor")))!=null) setBackground(color); else setBackground(Color.gray); if((bordercolor=getColor(getParameter("bordercolor")))==null) bordercolor=Color.darkGray; if((color=getColor(getParameter("fgcolor")))!=null) setForeground(color); else setForeground(Color.black); //Lots of Panels Panel ptotal=new Panel(); ptotal.setLayout(new BorderLayout()); Panel pcontrols=new Panel(); pcontrols.setLayout(new GridLayout(3,1));//searchbox,checkboxes,progressbar //Parameters to allow control over the text in the applet // Though this needed ability is very easy to implement, I'm still faced with // the dilemma of what to name the parameters. Perhaps this is a sign of my insanity, // but I worry about whether to name them for their functionality (like "search_btn_txt") // or their English versions (like "Search_en")...for now, functionality: // search_btn_txt // stop_btn_txt // xhtml_chkbx_txt // exact_chkbx_txt // pbar_msg_txt , text the progressbar displays as it progresses // searchbox_label_txt String search_btn_txt=getParameter("search_btn_txt","Search"); String stop_btn_txt=getParameter("stop_btn_txt","Stop"); String xhtml_chkbx_txt=getParameter("xhtml_chkbx_txt","Exclude HTML"); String exact_chkbx_txt=getParameter("exact_chkbx_txt","Exact matches only"); String pbar_msg_txt=getParameter("pbar_msg_txt","Searching..."); String searchbox_label_txt=getParameter("searchbox_label_txt","Search for:"); // Panel ptop=new Panel(); ptop.setLayout(new BorderLayout()); Panel p=new Panel(); p.add(new Label(searchbox_label_txt,Label.RIGHT)); p.add(searchbox=new TextField(20)); if((color=getColor(getParameter("searchboxbgcolor")))!=null) { searchbox.setBackground(color); } else searchbox.setBackground(Color.white); if((color=getColor(getParameter("searchboxfgcolor")))!=null) { searchbox.setForeground(color); } else searchbox.setForeground(Color.black); String initsearchwrds=getParameter("startwords"); if(initsearchwrds!=null) searchbox.setText(initsearchwrds); //Color buttonbgcolor,buttonfgcolor; p.add(search=new Button(search_btn_txt));//search button p.add(stop=new Button(stop_btn_txt));//stop button if((color=getColor(getParameter("buttonbgcolor")))!=null) { search.setBackground(color); stop.setBackground(color); } if((color=getColor(getParameter("buttonfgcolor")))!=null) { search.setForeground(color); stop.setForeground(color); } ptop.add("Center",p); pcontrols.add(ptop); Panel p2=new Panel(); p2.setLayout(new FlowLayout(FlowLayout.CENTER)); p2.add(HTMLbox=new Checkbox(xhtml_chkbx_txt));//Exclude HTML checkbox p2.add(Exactbox=new Checkbox(exact_chkbx_txt));//Exact matches only checkbox if((color=getColor(getParameter("checkboxbgcolor")))!=null) { HTMLbox.setBackground(color); Exactbox.setBackground(color); } //else let default thing happen if((color=getColor(getParameter("checkboxfgcolor")))!=null) { HTMLbox.setForeground(color); Exactbox.setForeground(color); } //else let default happen pcontrols.add(p2); Panel p3=new Panel(); Color pbaroncolor,pbaroffcolor; if((pbaroncolor=getColor(getParameter("pbaroncolor")))==null) pbaroncolor=Color.cyan; if((pbaroffcolor=getColor(getParameter("pbaroffcolor")))==null) pbaroffcolor=Color.blue.brighter().brighter().brighter(); p3.add(progressbar=new ProgressBar(pbaroncolor,pbaroffcolor,300,20,numWorkers,pbar_msg_txt)); pcontrols.add(p3); ptotal.add("North",pcontrols); ptotal.add("Center",list=new java.awt.List(5,false)); if((color=getColor(getParameter("listbgcolor")))==null) color=Color.lightGray; list.setBackground(color); if((color=getColor(getParameter("listfgcolor")))==null) color=Color.black; list.setForeground(color); //stop Netscape from crashing list.addItem("[AdvSiteSearcher c1999 The Gilbert Post]"); add("Center",ptotal); validate(); } //Maybe this should be changed to getParameter(String,Object) //so it can handle all of our needs? public String getParameter(String name, String alt) { String val=getParameter(name); if(val!=null) return val; return alt; } public Color getColor(String s) { if(s==null) return null; s=s.toLowerCase(); if(s.startsWith("#")) { if(s.length()!=7) return null; else { try { int num=Integer.parseInt(s.substring(1,7),16);//parse a hex. string to dec. return new Color(num); } catch(NumberFormatException e) { return null; } } } else if("black".equals(s)) return Color.black; else if("blue".equals(s)) return Color.blue; else if("darkblue".equals(s)) return Color.blue.darker().darker().darker(); else if("lightblue".equals(s)) return Color.blue.brighter().brighter().brighter(); else if("cyan".equals(s)) return Color.cyan; else if("darkgray".equals(s)) return Color.darkGray; else if("lightgray".equals(s)) return Color.lightGray; else if("green".equals(s)) return Color.green; else if("gray".equals(s)) return Color.gray; else if("magenta".equals(s)) return Color.magenta; else if("orange".equals(s)) return Color.orange; else if("pink".equals(s)) return Color.pink; else if("red".equals(s)) return Color.red; else if("white".equals(s)) return Color.white; else if("yellow".equals(s)) return Color.yellow; else return Color.getColor(s); } //This probably belongs in a separate class... //but I'd like to keep the additional downloads to a mininum. //Parse links from HTML file(s) so that people can specify a directory //from which we will pull the server's listing of files. public void run() { System.out.println("AdvSiteSearcher: Loading links from directory listings..."); displayMessage=true; message="Loading links from directory listing(s)..."; repaint(); StringBuffer sb=new StringBuffer();//collect the internal URLs then add them to //the files parameter. //This is not a robust method for picking up links... //however, it should work for the machine generated output from the server. SearchSieve linkfinder=new SearchSieve("<a href=".toCharArray(),false); StringTokenizer st=new StringTokenizer(dirs,"\n\r \t,",false); String currToken; BufferedInputStream bis; URL url; char c; char prevc=(char)-1; int i; boolean inAHREF=false; StringBuffer currlink=new StringBuffer(); String templink;//temporarily hold newly discovered links in here //to discern whether they'll do us any good. for(;;) { if(!st.hasMoreTokens()) break; currToken=st.nextToken(); try { url=new URL(docbase,currToken); System.out.println(url); bis=new BufferedInputStream(url.openStream()); } catch(Exception ewhatever) {System.out.println(ewhatever); continue;} for(;;) { try { i=bis.read(); } catch(IOException ieee) { System.out.println(ieee); try { bis.close(); bis=null; break; } catch(IOException ewetried) { System.out.println(ewetried); break;} } if(i==-1) break; if(!inAHREF) c=Character.toLowerCase((char)i); else c=(char)i;//Preserve the case if getting link if(prevc==' ' && c==' ') continue;//cut out extra spaces if(!inAHREF) inAHREF=linkfinder.addChar(c); else { if((c=='\"' || c=='\'' || c==' ' || c=='>')) { if(currlink.length()>0) { inAHREF=false; templink=currlink.toString().toLowerCase(); //Weed out links to higher/lower dirs if((!templink.startsWith("/") && !templink.startsWith("http://") && !templink.startsWith("../")) && (templink.endsWith(".html")||templink.endsWith(".htm")|| templink.endsWith(".shtml")||templink.endsWith(".txt")||templink.endsWith(".diz"))) { //The following code is a hack allowing webmasters to sneak an html file into "dirs" if(!currToken.endsWith(".html") && !currToken.endsWith(".htm")) { sb.append(currToken); if(!currToken.endsWith("/")) sb.append('/'); } sb.append(currlink.toString()); sb.append(","); System.out.println(" Added file "+currlink.toString()+" to the search."); } currlink.setLength(0); } } else currlink.append(c); } prevc=c; } } if(sb.length()>0) dirs=sb.toString(); else dirs=null; displayMessage=false; repaint(); finishInit(); } public Insets insets() { return insets; } public void paint(Graphics g) { //Don't worry too much about making this pretty... //It's just to let the viewer (probably a disappointed webmaster) //know that there's something (probably bad) going on. if(displayMessage) g.drawString(message,7,size().height/2); //draw border g.setColor(bordercolor); g.drawRect(insets.left,insets.top,size().width-2*insets.left, size().height-2*insets.top); } public void reset() { stopAllSearches(); progressbar.reset(); list.clear(); //stop Netscape from crashing list.addItem("[AdvSiteSearcher c1999 The Gilbert Post]"); nummatches=0; } public void stopAllSearches() { for(int i=0;i<workers.length;i++) workers[i].stopSearch(); } //I can't remember why I synchronized this... //then maybe it doesn't need to be? //Oh, yeah...I was worried that the wrong index would end //up getting attached to the corresponding list item. //int i==the index of the DocSearcher that found a match public synchronized void foundMatch(int i) { if(nummatches<matches.length); matches[nummatches]=i; nummatches++; list.addItem(pageinfo[i]); progressbar.plus(1); } //A DocSearcher that is errored can only ever call this once //because its errored flag is set public void foundNoMatch(int i) { if(workers[i].isErrored()) { numWorkers--; progressbar.setMax(numWorkers); } else progressbar.plus(1); } //called after a DocSearcher opens a connection for the //first time and can get some extra info like last modified date //and file size public void addInfo(int i,String more) { pageinfo[i]+=" "+more; } public boolean action(Event evt, Object arg) { if(evt.target == list) { stopAllSearches(); int index=list.getSelectedIndex(); if(index!=-1) showPage(index); } else if(evt.target == searchbox) { search(searchbox.getText(),Exactbox.getState(),HTMLbox.getState()); } else if(evt.target == stop) stopAllSearches(); else if(evt.target == search) search(searchbox.getText(),Exactbox.getState(),HTMLbox.getState()); return true; } protected void search(String s,boolean bexact,boolean cutHTML) { reset(); StringTokenizer st=new StringTokenizer(s,searchTokenSeparators,true); Vector tempv=new Vector(); String currWord="";//this probably should be a StringBuffer String currToken=""; boolean insideQuote=false; while(st.hasMoreTokens()) { currToken=st.nextToken(); if(searchTokenSeparators.indexOf(currToken)!=-1) { if("\"".equals(currToken)) { insideQuote=!insideQuote; if(insideQuote) currWord=""; else { if(currWord!="") tempv.addElement((new String(currWord)).toLowerCase()); currWord=""; } } else if(insideQuote) currWord+=currToken; } else if(!insideQuote) { tempv.addElement((new String(currToken)).toLowerCase()); currWord=""; } else if(insideQuote) { currWord+=currToken; } } if(currWord!="") tempv.addElement((new String(currWord)).toLowerCase()); String[] ss=new String[tempv.size()]; tempv.copyInto(ss); for(int i2=0;i2<workers.length;i2++) workers[i2].searchFor(ss,bexact,cutHTML); } protected void showPage(int ii) { //result of klooge to stop Netscape from crashing if(ii==0) { stopAllSearches(); getAppletContext().showDocument(thegilbertpost,"_top"); } int i=ii-1; if(i<0) return; stopAllSearches(); if(target!=null) getAppletContext().showDocument(urls[matches[i]],target); else getAppletContext().showDocument(urls[matches[i]]); } public void stop() { stopAllSearches(); for(int i=0;i<workers.length;i++) workers[i].stopRunning(); //don't tie up CPU time when we're hidden or about to die //when stopped should probably add an additional timer thread to //null all the workers } public void destroy() { //Dump more for(int i=0;i<workers.length;i++) workers[i].trashCache(); } }//end AdvSiteSearcher.java