home *** CD-ROM | disk | FTP | other *** search
Java Source | 2000-08-18 | 20.1 KB | 515 lines |
- //AdvSiteSearcher c1999 The Gilbert Post by David Faden
- //The applet and code are distributed as linkware...
- //If you use this applet or a variant on its code,
- //include a link to The Gilbert Post,
- //http://www.geocities.com/Athens/Parthenon/1911
- //The Gilbert Post and David Faden take no responsibility
- //for anything bad that happens as a result of using this applet
- //or a derivative based on its code. USE AT YOUR OWN RISK. (big letters)
- //Please send reports of problems to gilbertnews@hotmail.com, anyway, though.
-
- //I have not commented this source especially well (for one thing, it will not play
- //well with JavaDoc so go ahead and send your questions about the source to me...
- //I may know the answer.
- //
- //It should be evident that I'm not a professional programmer...any professionals out there,
- //I'd like to hear your advise (as long as it's polite). Thank you. --David
-
- import java.awt.*;
- import java.applet.*;
- import java.io.*;
- import java.net.*;
- import java.util.*;
- //These classes are designed to work with Netscape 3.x+ and so use JDK 1.0
-
- // 5/31/1999 fixed a bug, discovered by Dave Langers, in showPage(URL url) that
- // caused the page to be loaded in both
- // the target and top (default) windows
- // Also, fixed a bug that might cause the progress bar to show 100% (after an errored
- // SearchThread called foundNoMatch(int i)) while files were still being searched.
- //
- // 6/2/1999 fixed a very stupid bug on my part, also discovered by Dave Langers,
- // that made it impossible for queries
- // containing uppercase characters to ever be found -- the scanned lines were
- // converted to lowercase while the keywords/phrases were not
- //
- // 6/4/1999 renamed to AdvSiteSearcher to differentiate from older SiteSearcher
- // Plan to release one more version of SiteSearcher using SearchSieves
- //
- // 6/9/1999 made many major revisions: renamed SearchThread to DocSearcher because
- // it is no longer a subclass of Thread (it instead implements Runnable), polished
- // the use of SearchSieves, began tentative support for caching, added the ability
- // to demand exact matches, and to ignore text in between lesser than and greater than
- // signs (probably HTML)
- //
- // 7/27/1999 acting on user requests, updated parameters to add control over the display text...
- // mostly useful to non-English speakers who want to "localize" the applet...
- // There are a few other barriers to full internationalization: the date
- // format used in the search results should be changeable, and the incoming byte stream
- // should be converted according to the text encoding into a char stream. There is also a
- // problem, discovered by Guy Capra, with ALL java text components in Netscape and MSIE's VMs
- // on Macintoshes with AZERTY keyboards: a key press on the AZERTY keyboard seems to be being
- // incorrectly interpreted as a press on the corresponding QWERTY key. I have not been able
- // to figure out a solution...for now, you can find my present failed efforts at
- // http://www.geocities.com/gilbertnews/linkware/qwerty_azerty/.
- // Also, wrote out full name of List (java.awt.List) to hopefully make this compile with JDK1.2.
- //
- // 11/25/1999 squashed a bug (in the run method) which was messing up the "dirs" parameter...
- // I'm not sure how this one slipped through. Corrected the spelling of "exclude" in the default
- // text of the applet. Kudos to Matthew Hutton for noticing this.
- //
- // 4/12/2000 fixed a "Y2K bug reported by several alert users... I am not sure what
- // I was thinking when I wrote the portion of code calling Date.getYear()... Perhaps that it
- // returns the decade? Anyway, in reality, getYear() returns the number of years
- // since 1900. Files with modification dates beyond 1999 were listed with dates greater than
- // 99 (100 for 2000).
- // Note: the whole Date class is deprecated in JDK 1.1
- // The code actually changed is found in DocSearcher.java.
- //
- // 4/12/2000 added code that causes the DocSearcher's runner Thread to wait
- // when it is not "doing anything." This should be more efficient than in the
- // previous incarnation, where runner would sleep, then periodically wake up to
- // see if there was anything to search.
-
- public class AdvSiteSearcher extends Applet implements Runnable {
- DocSearcher[] workers;
- URL[] urls;
- String[] pageinfo;//name, size, last modified
- int[] matches;
- int nummatches=0;
- int numreported=0;
- int numWorkers=0;
- java.awt.List list;//=new java.awt.List(5,false);
- Button search,stop;
- Checkbox HTMLbox,Exactbox;
- TextField searchbox;
- ProgressBar progressbar;
- String target;
- URL docbase;
- boolean displayMessage;
- String message;//Message to be displayed in applet
- //to let the user know what's happening before the GUI is finished being set up
- static final String searchTokenSeparators="\"&|+ \t\r\n,*?";
- URL thegilbertpost=null;
- String dirs=null;//dirs is one-time-use variable used to pass
- //parameter info to AdvSiteSearcher's run to parse out links from dir listings.
- Color bordercolor=Color.darkGray;
- Insets insets=new Insets(5,5,5,5);
-
- public void init() {
- //first initialize the variables
- target=getParameter("target");
- docbase=getDocumentBase();
- try {
- thegilbertpost=new URL("http://www.geocities.com/Athens/Parthenon/1911/");
- }
- catch(MalformedURLException eww) {/*should never occur*/}
- if((dirs=getParameter("dirs"))!=null) {
- Thread tempthread=new Thread(this);
- tempthread.start();
- }
- else finishInit();
- }
-
- //Finish initializing the applet after AdvSiteSearcher's run has possibly been run
- //to collect links from directory listings...
- private void finishInit() {
- int cachesize=2000;
- String cacheparam=getParameter("cachesize");
- if (cacheparam!=null) {
- try {
- cachesize=Integer.parseInt(cacheparam);
- }
- catch(NumberFormatException e) {cachesize=2000;}
- }
- String files=getParameter("files");
- if(dirs!=null) {
- if(files==null) files=dirs;
- else files=dirs+files;
- }
- if(files!=null) {
- StringTokenizer st=new StringTokenizer(files,"\n\r \t,",false);
- int num=st.countTokens();
- urls=new URL[num];
- workers=new DocSearcher[num];
- pageinfo=new String[num];
- matches=new int[num];
- numWorkers=num;
- String currToken;
- for(int i=0;i<num;i++) {
- currToken=st.nextToken();
- pageinfo[i]=currToken;
- matches[i]=-1;
- try {
- urls[i]=new URL(docbase,currToken);
- workers[i]=new DocSearcher(this,urls[i],i,cachesize);
- }
- catch(MalformedURLException mued) {
- urls[i]=null;
- //waste an Object
- workers[i]=new DocSearcher(this,urls[i],i,cachesize);
- workers[i].setErrored();
- System.out.println(mued);
- }
- }
- }
- else {
- displayMessage=true;
- System.out.println("AdvSiteSearcher Applet can't start");
- System.out.println("Missing required parameter(s): files/dirs");
- message="Can't continue: missing both \"files\" & \"dirs\" parameters.";
- repaint();
- return;
- }
-
- //Set up GUI
- resize(350,300);
- setLayout(new BorderLayout());
- //get color parameters
- Color color=null;
- if((color=getColor(getParameter("bgcolor")))!=null) setBackground(color);
- else setBackground(Color.gray);
- if((bordercolor=getColor(getParameter("bordercolor")))==null) bordercolor=Color.darkGray;
- if((color=getColor(getParameter("fgcolor")))!=null) setForeground(color);
- else setForeground(Color.black);
- //Lots of Panels
- Panel ptotal=new Panel();
- ptotal.setLayout(new BorderLayout());
- Panel pcontrols=new Panel();
- pcontrols.setLayout(new GridLayout(3,1));//searchbox,checkboxes,progressbar
- //Parameters to allow control over the text in the applet
- // Though this needed ability is very easy to implement, I'm still faced with
- // the dilemma of what to name the parameters. Perhaps this is a sign of my insanity,
- // but I worry about whether to name them for their functionality (like "search_btn_txt")
- // or their English versions (like "Search_en")...for now, functionality:
- // search_btn_txt
- // stop_btn_txt
- // xhtml_chkbx_txt
- // exact_chkbx_txt
- // pbar_msg_txt , text the progressbar displays as it progresses
- // searchbox_label_txt
- String search_btn_txt=getParameter("search_btn_txt","Search");
- String stop_btn_txt=getParameter("stop_btn_txt","Stop");
- String xhtml_chkbx_txt=getParameter("xhtml_chkbx_txt","Exclude HTML");
- String exact_chkbx_txt=getParameter("exact_chkbx_txt","Exact matches only");
- String pbar_msg_txt=getParameter("pbar_msg_txt","Searching...");
- String searchbox_label_txt=getParameter("searchbox_label_txt","Search for:");
- //
- Panel ptop=new Panel();
- ptop.setLayout(new BorderLayout());
- Panel p=new Panel();
- p.add(new Label(searchbox_label_txt,Label.RIGHT));
- p.add(searchbox=new TextField(20));
- if((color=getColor(getParameter("searchboxbgcolor")))!=null) {
- searchbox.setBackground(color);
- }
- else searchbox.setBackground(Color.white);
- if((color=getColor(getParameter("searchboxfgcolor")))!=null) {
- searchbox.setForeground(color);
- }
- else searchbox.setForeground(Color.black);
- String initsearchwrds=getParameter("startwords");
- if(initsearchwrds!=null) searchbox.setText(initsearchwrds);
- //Color buttonbgcolor,buttonfgcolor;
- p.add(search=new Button(search_btn_txt));//search button
- p.add(stop=new Button(stop_btn_txt));//stop button
- if((color=getColor(getParameter("buttonbgcolor")))!=null) {
- search.setBackground(color);
- stop.setBackground(color);
- }
- if((color=getColor(getParameter("buttonfgcolor")))!=null) {
- search.setForeground(color);
- stop.setForeground(color);
- }
- ptop.add("Center",p);
- pcontrols.add(ptop);
- Panel p2=new Panel();
- p2.setLayout(new FlowLayout(FlowLayout.CENTER));
- p2.add(HTMLbox=new Checkbox(xhtml_chkbx_txt));//Exclude HTML checkbox
- p2.add(Exactbox=new Checkbox(exact_chkbx_txt));//Exact matches only checkbox
- if((color=getColor(getParameter("checkboxbgcolor")))!=null) {
- HTMLbox.setBackground(color);
- Exactbox.setBackground(color);
- }
- //else let default thing happen
- if((color=getColor(getParameter("checkboxfgcolor")))!=null) {
- HTMLbox.setForeground(color);
- Exactbox.setForeground(color);
- }
- //else let default happen
- pcontrols.add(p2);
- Panel p3=new Panel();
- Color pbaroncolor,pbaroffcolor;
- if((pbaroncolor=getColor(getParameter("pbaroncolor")))==null) pbaroncolor=Color.cyan;
- if((pbaroffcolor=getColor(getParameter("pbaroffcolor")))==null) pbaroffcolor=Color.blue.brighter().brighter().brighter();
- p3.add(progressbar=new ProgressBar(pbaroncolor,pbaroffcolor,300,20,numWorkers,pbar_msg_txt));
- pcontrols.add(p3);
- ptotal.add("North",pcontrols);
- ptotal.add("Center",list=new java.awt.List(5,false));
- if((color=getColor(getParameter("listbgcolor")))==null) color=Color.lightGray;
- list.setBackground(color);
- if((color=getColor(getParameter("listfgcolor")))==null) color=Color.black;
- list.setForeground(color);
- //stop Netscape from crashing
- list.addItem("[AdvSiteSearcher c1999 The Gilbert Post]");
- add("Center",ptotal);
- validate();
- }
-
- //Maybe this should be changed to getParameter(String,Object)
- //so it can handle all of our needs?
- public String getParameter(String name, String alt) {
- String val=getParameter(name);
- if(val!=null) return val;
- return alt;
- }
-
- public Color getColor(String s) {
- if(s==null) return null;
- s=s.toLowerCase();
- if(s.startsWith("#")) {
- if(s.length()!=7) return null;
- else {
- try {
- int num=Integer.parseInt(s.substring(1,7),16);//parse a hex. string to dec.
- return new Color(num);
- }
- catch(NumberFormatException e) {
- return null;
- }
- }
- }
- else if("black".equals(s)) return Color.black;
- else if("blue".equals(s)) return Color.blue;
- else if("darkblue".equals(s)) return Color.blue.darker().darker().darker();
- else if("lightblue".equals(s)) return Color.blue.brighter().brighter().brighter();
- else if("cyan".equals(s)) return Color.cyan;
- else if("darkgray".equals(s)) return Color.darkGray;
- else if("lightgray".equals(s)) return Color.lightGray;
- else if("green".equals(s)) return Color.green;
- else if("gray".equals(s)) return Color.gray;
- else if("magenta".equals(s)) return Color.magenta;
- else if("orange".equals(s)) return Color.orange;
- else if("pink".equals(s)) return Color.pink;
- else if("red".equals(s)) return Color.red;
- else if("white".equals(s)) return Color.white;
- else if("yellow".equals(s)) return Color.yellow;
- else return Color.getColor(s);
- }
-
- //This probably belongs in a separate class...
- //but I'd like to keep the additional downloads to a mininum.
- //Parse links from HTML file(s) so that people can specify a directory
- //from which we will pull the server's listing of files.
- public void run() {
- System.out.println("AdvSiteSearcher: Loading links from directory listings...");
- displayMessage=true;
- message="Loading links from directory listing(s)...";
- repaint();
- StringBuffer sb=new StringBuffer();//collect the internal URLs then add them to
- //the files parameter.
- //This is not a robust method for picking up links...
- //however, it should work for the machine generated output from the server.
- SearchSieve linkfinder=new SearchSieve("<a href=".toCharArray(),false);
- StringTokenizer st=new StringTokenizer(dirs,"\n\r \t,",false);
- String currToken;
- BufferedInputStream bis;
- URL url;
- char c;
- char prevc=(char)-1;
- int i;
- boolean inAHREF=false;
- StringBuffer currlink=new StringBuffer();
- String templink;//temporarily hold newly discovered links in here
- //to discern whether they'll do us any good.
- for(;;) {
- if(!st.hasMoreTokens()) break;
- currToken=st.nextToken();
- try {
- url=new URL(docbase,currToken);
- System.out.println(url);
- bis=new BufferedInputStream(url.openStream());
- }
- catch(Exception ewhatever) {System.out.println(ewhatever); continue;}
- for(;;) {
- try {
- i=bis.read();
- }
- catch(IOException ieee) {
- System.out.println(ieee);
- try {
- bis.close();
- bis=null;
- break;
- }
- catch(IOException ewetried) { System.out.println(ewetried); break;}
- }
- if(i==-1) break;
- if(!inAHREF) c=Character.toLowerCase((char)i);
- else c=(char)i;//Preserve the case if getting link
- if(prevc==' ' && c==' ') continue;//cut out extra spaces
- if(!inAHREF) inAHREF=linkfinder.addChar(c);
- else {
- if((c=='\"' || c=='\'' || c==' ' || c=='>')) {
- if(currlink.length()>0) {
- inAHREF=false;
- templink=currlink.toString().toLowerCase();
- //Weed out links to higher/lower dirs
- if((!templink.startsWith("/") && !templink.startsWith("http://") && !templink.startsWith("../")) && (templink.endsWith(".html")||templink.endsWith(".htm")|| templink.endsWith(".shtml")||templink.endsWith(".txt")||templink.endsWith(".diz"))) {
- //The following code is a hack allowing webmasters to sneak an html file into "dirs"
- if(!currToken.endsWith(".html") && !currToken.endsWith(".htm")) {
- sb.append(currToken);
- if(!currToken.endsWith("/")) sb.append('/');
- }
- sb.append(currlink.toString());
- sb.append(",");
- System.out.println(" Added file "+currlink.toString()+" to the search.");
- }
- currlink.setLength(0);
- }
- }
- else currlink.append(c);
- }
- prevc=c;
- }
- }
- if(sb.length()>0) dirs=sb.toString();
- else dirs=null;
- displayMessage=false;
- repaint();
- finishInit();
- }
-
- public Insets insets() {
- return insets;
- }
-
- public void paint(Graphics g) {
- //Don't worry too much about making this pretty...
- //It's just to let the viewer (probably a disappointed webmaster)
- //know that there's something (probably bad) going on.
- if(displayMessage) g.drawString(message,7,size().height/2);
- //draw border
- g.setColor(bordercolor);
- g.drawRect(insets.left,insets.top,size().width-2*insets.left, size().height-2*insets.top);
- }
-
- public void reset() {
- stopAllSearches();
- progressbar.reset();
- list.clear();
- //stop Netscape from crashing
- list.addItem("[AdvSiteSearcher c1999 The Gilbert Post]");
- nummatches=0;
- }
-
- public void stopAllSearches() {
- for(int i=0;i<workers.length;i++) workers[i].stopSearch();
- }
-
- //I can't remember why I synchronized this...
- //then maybe it doesn't need to be?
- //Oh, yeah...I was worried that the wrong index would end
- //up getting attached to the corresponding list item.
- //int i==the index of the DocSearcher that found a match
- public synchronized void foundMatch(int i) {
- if(nummatches<matches.length);
- matches[nummatches]=i;
- nummatches++;
- list.addItem(pageinfo[i]);
- progressbar.plus(1);
- }
-
- //A DocSearcher that is errored can only ever call this once
- //because its errored flag is set
- public void foundNoMatch(int i) {
- if(workers[i].isErrored()) {
- numWorkers--;
- progressbar.setMax(numWorkers);
- }
- else progressbar.plus(1);
- }
-
- //called after a DocSearcher opens a connection for the
- //first time and can get some extra info like last modified date
- //and file size
- public void addInfo(int i,String more) {
- pageinfo[i]+=" "+more;
- }
-
- public boolean action(Event evt, Object arg) {
- if(evt.target == list) {
- stopAllSearches();
- int index=list.getSelectedIndex();
- if(index!=-1) showPage(index);
- }
- else if(evt.target == searchbox) {
- search(searchbox.getText(),Exactbox.getState(),HTMLbox.getState());
- }
- else if(evt.target == stop) stopAllSearches();
- else if(evt.target == search) search(searchbox.getText(),Exactbox.getState(),HTMLbox.getState());
- return true;
- }
-
- protected void search(String s,boolean bexact,boolean cutHTML) {
- reset();
- StringTokenizer st=new StringTokenizer(s,searchTokenSeparators,true);
- Vector tempv=new Vector();
- String currWord="";//this probably should be a StringBuffer
- String currToken="";
- boolean insideQuote=false;
- while(st.hasMoreTokens()) {
- currToken=st.nextToken();
- if(searchTokenSeparators.indexOf(currToken)!=-1) {
- if("\"".equals(currToken)) {
- insideQuote=!insideQuote;
- if(insideQuote) currWord="";
- else {
- if(currWord!="") tempv.addElement((new String(currWord)).toLowerCase());
- currWord="";
- }
- }
- else if(insideQuote) currWord+=currToken;
- }
- else if(!insideQuote) {
- tempv.addElement((new String(currToken)).toLowerCase());
- currWord="";
- }
- else if(insideQuote) {
- currWord+=currToken;
- }
- }
- if(currWord!="") tempv.addElement((new String(currWord)).toLowerCase());
- String[] ss=new String[tempv.size()];
- tempv.copyInto(ss);
- for(int i2=0;i2<workers.length;i2++) workers[i2].searchFor(ss,bexact,cutHTML);
- }
-
- protected void showPage(int ii) {
- //result of klooge to stop Netscape from crashing
- if(ii==0) {
- stopAllSearches();
- getAppletContext().showDocument(thegilbertpost,"_top");
- }
- int i=ii-1;
- if(i<0) return;
- stopAllSearches();
- if(target!=null) getAppletContext().showDocument(urls[matches[i]],target);
- else getAppletContext().showDocument(urls[matches[i]]);
- }
-
- public void stop() {
- stopAllSearches();
- for(int i=0;i<workers.length;i++) workers[i].stopRunning();
- //don't tie up CPU time when we're hidden or about to die
- //when stopped should probably add an additional timer thread to
- //null all the workers
- }
-
- public void destroy() {
- //Dump more
- for(int i=0;i<workers.length;i++) workers[i].trashCache();
- }
-
- }//end AdvSiteSearcher.java
-