PC World Komputer 1995 November

home *** CD-ROM | disk | FTP | other *** search

/ PC World Komputer 1995 November / PCWK1195.iso / inne / win95 / sieciowe / hotja32.lzh / hotjava / classsrc / browser / tools / javasearch / javaindex.java < prev next >

Wrap

Text File | 1995-08-11 | 6KB | 185 lines

/* * @(#)javaindex.java 1.13 95/03/14 David A. Brown * * Copyright (c) 1994 Sun Microsystems, Inc. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software * and its documentation for NON-COMMERCIAL purposes and without * fee is hereby granted provided that this copyright notice * appears in all copies. Please refer to the file "copyright.html" * for further important copyright and licensing information. * * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. */ package browser.tools.JavaSearch; /** javaindex: the JavaSearch indexer main program */ class javaindex { // Bunch of static vars we use to keep track of performance stats static int numSorts = 0; // # times we sorted the index static int sortTime = 0; // how much time spent sorting static int parseTime = 0; // how much time spent parsing doc files // static int documentsSize = 0; // Total size of Documents in the index /** Show a usage message. * But see the file README in this directory for more info! */ static void showUsage() { System.out.println(" Usage: java javaindex -db database_name [-trimprefix path_to_trim]"); System.out.println("\t [-stoplist stopfile] [-fileprefix path_prefix] [-urlprefix url_prefix]"); System.out.println("\t [-description \"Description of this database\"]"); System.out.println("\t filename filename ..."); } public static void main(String args[]) { int startTime = System.nowMillis(); // // Process args // String arg; int argc = 0; String dbname = null; String trimprefix = null; String fileprefix = ""; String urlprefix = null; String stopfile = null; String description = "JavaSearch database created with 'javaindex'"; try { while ((argc < args.length) && args[argc].startsWith("-")) { if (args[argc].equals("-db")) { dbname = args[++argc]; } else if (args[argc].equals("-trimprefix")) { trimprefix = args[++argc]; } else if (args[argc].equals("-fileprefix")) { fileprefix = args[++argc]; } else if (args[argc].equals("-urlprefix")) { urlprefix = args[++argc]; } else if (args[argc].equals("-description")) { description = args[++argc]; } else if (args[argc].equals("-stoplist")) { stopfile = args[++argc]; } else { System.out.println(" Unknown argument '"+args[argc]+"'!"); showUsage(); return; } argc++; } } catch (ArrayIndexOutOfBoundsException e) { showUsage(); return; } // Make sure we got any required parameters, and that // we have at least one filename to index! if (dbname == null) { System.out.println(" No database_name specified!"); showUsage(); return; } if (argc >= args.length) { System.out.println(" No filenames to index specified!"); showUsage(); return; } Database db = Database.CreateNewDatabase(dbname); db.docPathPrefix = fileprefix; db.docURLPrefix = urlprefix; db.description = description; System.out.println(db); Index index = new Index(stopfile); DocList doclist = new DocList(); // Process each file // Future: feature to recursively index a // whole dir tree would be nice do { String filename = args[argc]; System.out.print("--> indexing file "+filename); // REMIND: We need to handle IO Exceptions better // if they occur here, especially stuff we ought to // handle cleanly (like "File not found") // versus bad stuff like "I/O error". // But to do this we need a better IOException model; // see Article 898 in fp.livejava (from csw, 18 Oct 1994) // Make a Doc out of the file Doc doc = new Doc(filename); doc.trimFilename(trimprefix); doclist.addDoc(doc); System.out.print(" [headline '"+doc.headline+"']"); // Make an IndexingInputStream for the file long start = System.nowMillis(); IndexingInputStream input = new IndexingInputStream(filename); input.setDocType(doc.type); documentsSize += input.available(); // Tell the index to associate this Doc with // all the words in this stream: index.addDocToIndex(doc,input); System.out.println(" ("+(System.nowMillis()-start)+"ms)"); // Close the file input.close(); } while (++argc < args.length); // Debugging: System.out.println("-----"); doclist.dump(); index.sort(); //index.dump(); index.saveAs(db); doclist.saveAs(db); db.saveInfoFile(); // Show some final statistics: int totalTime = System.nowMillis() - startTime; int indexSize = db.indexSize(); int totalDBSize = db.totalDBSize(); System.out.println("\njavaindex finished. "+ doclist.size() + " document" + ((doclist.size()==1?"":"s")) + ", "+index.entries.size()+" words in index."); System.out.println(" total time "+totalTime/1000+" sec."); System.out.println(" sorted the index " + numSorts + " time"+(numSorts==1?"":"s") + "; total time " + sortTime + " ms."); System.out.println(" time processing documents: " + parseTime + " ms."); System.out.println(" total size of documents: " + documentsSize + " bytes, size of index: " + indexSize + " bytes (" + (indexSize*100/documentsSize) + "% of docs)"); System.out.println(" total size of this JavaSearch database: " + totalDBSize + " bytes (" + (totalDBSize*100/documentsSize) + "% of docs)"); } }