home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- /* ./glimpse/index/glimpse.c */
- #include "glimpse.h"
- #include <stdlib.h>
-
- extern char **environ;
- extern int errno;
- #if BG_DEBUG
- extern FILE *LOGFILE; /* file descriptor for LOG output */
- #endif /*BG_DEBUG*/
- extern FILE *STATFILE; /* file descriptor for statistical data about indexed files */
- extern FILE *MESSAGEFILE; /* file descriptor for important messages meant for the user */
- extern char INDEX_DIR[MAX_LINE_LEN];
- extern struct stat istbuf;
-
- extern int indexable_char[256];
- extern int OneFilePerBlock;
- extern int IndexNumber;
- extern int CountWords;
- extern int StructuredIndex;
- extern int MAXWORDSPERFILE;
- extern int NUMERICWORDPERCENT;
- extern int AddToIndex;
- extern int FastIndex;
- extern int BuildDictionary;
- extern int BuildDictionaryExisting;
- extern int CompressAfterBuild;
- extern int IncludeHigherPriority;
- extern int FilenamesOnStdin;
- extern int UseFilters;
- extern int ByteLevelIndex;
- /* extern int IndexUnderscore; */
- extern int IndexableFile;
- extern int MAX_PER_MB, MAX_INDEX_PERCENT;
-
- extern int AddedMaxWordsMessage;
- extern int AddedMixedWordsMessage;
-
- extern int file_num;
- extern int file_id;
- extern int part_num;
- extern char *name_list[MAX_LIST];
- extern int p_table[MAX_PARTITION];
- extern int *size_list;
- extern int p_size_list[];
- extern int disable_list[FILEMASK_SIZE];
- extern int memory_usage;
- extern int mask_int[];
-
- extern set_usemalloc(); /* compress/misc.c */
-
- char IProgname[MAX_LINE_LEN];
-
- /*
- * Has newnum crossed the boundary of an encoding? This is so rare that we
- * needn't optimize it by changing the format of the old index and reusing it.
- */
- cross_boundary(oldnum, newnum)
- int oldnum, newnum;
- {
- int ret;
-
- if (oldnum <= 0) return 0;
- ret = ( ((oldnum <= MaxNum8bPartition) && (newnum > MaxNum8bPartition)) ||
- ((oldnum <= MaxNum12bPartition) && (newnum > MaxNum12bPartition)) ||
- ((oldnum <= MaxNum16bPartition) && (newnum > MaxNum16bPartition)) );
- if (ret) fprintf(MESSAGEFILE, "Must change index format. Commencing fresh indexing...\n");
- return ret;
- }
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- int pid = getpid();
- int i;
- char *indexdir;
- char s[MAX_LINE_LEN];
- char working_dir[MAX_LINE_LEN];
- FILE *tmpfp;
- char hash_file[MAX_LINE_LEN], string_file[MAX_LINE_LEN], freq_file[MAX_LINE_LEN];
- char tmpbuf[1024];
- struct stat stbuf;
- char name[MAX_LINE_LEN];
- char outname[MAX_LINE_LEN];
- int specialwords, threshold;
- int backup;
-
- BuildDictionary = ON;
- set_usemalloc();
- srand(pid);
- umask(077);
-
- INDEX_DIR[0] = '\0';
- specialwords = threshold = -1; /* so that compute_dictionary can use defaults not visible here */
- strncpy(IProgname, argv[0], MAX_LINE_LEN);
- memset(disable_list, '\0', sizeof(int) * FILEMASK_SIZE); /* nothing is disabled initially */
- size_list = (int *)malloc(sizeof(int) * MAX_LIST);
- memset(size_list, '\0', sizeof(int) * MAX_LIST); /* free it once partition successfully calculates p_size_list */
- memset(p_size_list, '\0', sizeof(int) * MAX_PARTITION);
-
-
- /*
- * Process options.
- */
-
- while (argc > 1) {
- if (strcmp(argv[1], "-help") == 0) {
- return usage(1);
- }
- #if !BUILDCAST
- else if (strcmp(argv[1], "-V") == 0) {
- printf("\nThis is glimpseindex version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- return(0);
- }
- else if (strcmp(argv[1], "-I") == 0) {
- IndexableFile = ON;
- argc --; argv ++;
- }
- else if(strcmp(argv[1], "-a") == 0) {
- AddToIndex = ON;
- argc--; argv++;
- }
- else if(strcmp(argv[1], "-b") == 0) {
- ByteLevelIndex = ON;
- argc--; argv++;
- }
- else if(strcmp(argv[1], "-c") == 0) {
- CountWords = ON;
- argc--; argv++;
- }
- else if(strcmp(argv[1], "-f") == 0) {
- FastIndex = ON;
- argc--; argv++;
- }
- else if (strcmp(argv[1], "-o") == 0) {
- OneFilePerBlock = ON;
- argc --; argv ++;
- }
- else if (strcmp(argv[1], "-s") == 0) {
- StructuredIndex = ON;
- argc --; argv ++;
- }
- else if(strcmp(argv[1], "-z") == 0) {
- UseFilters = ON;
- argc--; argv++;
- }
- #else /*!BUILDCAST*/
- else if (strcmp(argv[1], "-V") == 0) {
- printf("\nThis is buildcast version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- return(0);
- }
- else if(strcmp(argv[1], "-C") == 0) {
- CompressAfterBuild = ON;
- argc --; argv ++;
- }
- else if(strcmp(argv[1], "-E") == 0) {
- BuildDictionaryExisting = ON;
- argc --; argv ++;
- }
- else if (strcmp(argv[1], "-t") == 0) {
- if ((argc <= 2) || !(isdigit(argv[2][0]))) {
- return usage(1);
- }
- else {
- threshold = atoi(argv[2]);
- argc -= 2; argv += 2;
- }
- }
- else if (strcmp(argv[1], "-l") == 0) {
- if ((argc <= 2) || !(isdigit(argv[2][0]))) {
- return usage(1);
- }
- else {
- specialwords = atoi(argv[2]);
- argc -= 2; argv += 2;
- }
- }
- #endif /*!BUILDCAST*/
- else if (strcmp(argv[1], "-w") == 0) {
- if (argc == 2) {
- fprintf(stderr, "-w should be followed by the number of words\n");
- return usage(1);
- }
- MAXWORDSPERFILE = atoi(argv[2]);
- argc -= 2; argv += 2;
- }
- else if (strcmp(argv[1], "-S") == 0) {
- if (argc == 2) {
- fprintf(stderr, "-S should be followed by the stop list limit\n");
- return usage(1);
- }
- MAX_PER_MB = MAX_INDEX_PERCENT = atoi(argv[2]);
- argc -= 2; argv += 2;
- }
- else if(strcmp(argv[1], "-n") == 0) {
- IndexNumber = ON;
- if ((argc <= 2) || !(isdigit(argv[2][0]))) { /* -n has no arg */
- argc --; argv ++;
- }
- else {
- NUMERICWORDPERCENT = atoi(argv[2]);
- if ((NUMERICWORDPERCENT > 100) || (NUMERICWORDPERCENT < 0)) {
- fprintf(stderr, "The percentage of numeric words must be in [0..100]\n");
- return usage(1);
- }
- argc-=2; argv+=2;
- }
- }
- else if(strcmp(argv[1], "-i") == 0) {
- IncludeHigherPriority = ON;
- argc --; argv ++;
- }
- else if(strcmp(argv[1], "-F") == 0) {
- FilenamesOnStdin = ON;
- argc--; argv++;
- }
- /*
- else if(strcmp(argv[1], "-u") == 0) {
- IndexUnderscore = ON;
- argc--; argv++;
- }
- */
- else if (strcmp(argv[1], "-H") == 0) {
- if (argc == 2) {
- fprintf(stderr, "-H should be followed by a directory name\n");
- return usage(1);
- }
- strncpy(INDEX_DIR, argv[2], MAX_LINE_LEN);
- argc -= 2; argv += 2;
- }
- else break; /* rest are directory names */
- }
-
- /*
- * Look for invalid option combos.
- */
-
- if ((argc<=1) && (!FilenamesOnStdin)) {
- return usage(1);
- }
-
- if (ByteLevelIndex) {
- if (MAX_PER_MB <= 0) {
- fprintf(stderr, "Stop list limit (#of occurrences per MB) '%d' must be > 0\n", MAX_PER_MB);
- exit(2);
- }
- }
- else if (OneFilePerBlock) {
- if ((MAX_INDEX_PERCENT <= 0) || (MAX_INDEX_PERCENT > 100)) {
- fprintf(stderr, "Stop list limit (%% of occurrences in files) '%d' must be in (0, 100]\n", MAX_INDEX_PERCENT);
- exit(2);
- }
- }
-
- /*
- * Find the index directory since it is used in all options.
- */
-
- if (INDEX_DIR[0] == '\0') {
- if ((indexdir = getenv("HOME")) == NULL) {
- getcwd(INDEX_DIR, MAX_LINE_LEN-1);
- fprintf(stderr, "Using working-directory '%s' to store index\n\n", INDEX_DIR);
- }
- else strncpy(INDEX_DIR, indexdir, MAX_LINE_LEN);
- }
- getcwd(working_dir, MAX_LINE_LEN - 1);
- if (-1 == chdir(INDEX_DIR)) {
- fprintf(stderr, "Cannot change directory to %s\n", INDEX_DIR);
- return usage(0);
- }
- getcwd(INDEX_DIR, MAX_LINE_LEN - 1); /* must be absolute path name */
- chdir(working_dir); /* get back to where you were */
-
- if (IndexableFile) { /* traverse the given directories and output names of files that are indexable on stdout */
- partition(argc, argv);
- return 0;
- }
- else {
- #if BUILDCAST
- printf("\nThis is buildcast version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- #else /*BUILDCAST*/
- printf("\nThis is glimpseindex version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- #endif /*BUILDCAST*/
- }
-
- if (ByteLevelIndex) {
- /* We'll worry about these things later */
- if (AddToIndex || FastIndex) {
- fprintf(stderr, "Fresh indexing recommended: -a and -f are not supported with -b as yet\n");
- exit(1);
- }
- CountWords = AddToIndex = FastIndex = OFF;
- OneFilePerBlock = ON;
- }
-
- /*
- * CONVENTION: all the relevant output is on stdout; warnings/errors are on stderr.
- * Initialize / open important files.
- */
-
- read_filters(INDEX_DIR, UseFilters);
-
- freq_file[0] = hash_file[0] = string_file[0] = '\0';
- strcpy(freq_file, INDEX_DIR);
- strcat(freq_file, "/");
- strcat(freq_file, DEF_FREQ_FILE);
- strcpy(hash_file, INDEX_DIR);
- strcat(hash_file, "/");
- strcat(hash_file, DEF_HASH_FILE);
- strcpy(string_file, INDEX_DIR);
- strcat(string_file, "/");
- strcat(string_file, DEF_STRING_FILE);
- initialize_tuncompress(string_file, freq_file, 0);
-
- #if BG_DEBUG
- sprintf(s, "%s/%s", INDEX_DIR, DEF_LOG_FILE);
- if((LOGFILE = fopen(s, "w")) == 0) {
- fprintf(stderr, "can't open %s for writing\n", s);
- LOGFILE = stderr;
- }
- #endif /*BG_DEBUG*/
-
- sprintf(s, "%s/%s", INDEX_DIR, DEF_MESSAGE_FILE);
- if((MESSAGEFILE = fopen(s, "w")) == 0) {
- fprintf(stderr, "can't open %s for writing\n", s);
- MESSAGEFILE = stderr;
- }
-
- sprintf(s, "%s/%s", INDEX_DIR, DEF_STAT_FILE);
- if((STATFILE = fopen(s, "w")) == 0) {
- fprintf(stderr, "can't open %s for writing\n", s);
- STATFILE = stderr;
- }
-
- #if BG_DEBUG
- fprintf(LOGFILE, "Index Directory = %s\n\n", INDEX_DIR);
- #endif /*BG_DEBUG*/
- if (MAXWORDSPERFILE != 0) fprintf(MESSAGEFILE, "Index: maximum number of indexed words per file = %d\n", MAXWORDSPERFILE);
- else fprintf(MESSAGEFILE, "Index: maximum number of indexed words per file = infinity\n");
- fprintf(MESSAGEFILE, "Index: maximum percentage of numeric words per file = %d\n", NUMERICWORDPERCENT);
-
- set_indexable_char(indexable_char);
-
- #if BUILDCAST
-
- CountWords = ON;
- AddToIndex = OFF;
- FastIndex = OFF;
-
- /* Save old search-dictionaries */
-
- sprintf(s, "%s/.glimpse_index", INDEX_DIR);
- if (!access(s, R_OK)) {
- sprintf(s, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid);
- if (-1 == mkdir(s, 0700)) {
- fprintf(stderr, "cannot create temporary directory %s\n", s);
- return -1;
- }
- sprintf(s, "mv -f %s/.glimpse_index %s/.glimpse_tempdir.%d\n", INDEX_DIR, INDEX_DIR, pid);
- system(s);
- sprintf(s, "mv -f %s/.glimpse_partitions %s/.glimpse_tempdir.%d\n", INDEX_DIR, INDEX_DIR, pid);
- system(s);
- sprintf(s, "mv -f %s/.glimpse_filenames %s/.glimpse_tempdir.%d\n", INDEX_DIR, INDEX_DIR, pid);
- system(s);
- sprintf(s, "mv -f %s/.glimpse_statistics %s/.glimpse_tempdir.%d\n", INDEX_DIR, INDEX_DIR, pid);
- system(s);
- /* Don't save messages, log, debug, etc. */
- sprintf(s, "%s/.glimpse_attributes", INDEX_DIR);
- if (!access(s, R_OK)) {
- sprintf(s, "mv -f %s/.glimpse_attributes %s/.glimpse_tempdir.%d\n", INDEX_DIR, INDEX_DIR, pid);
- system(s);
- }
- }
-
- /* Backup old cast-dictionaries: don't use move since indexing might want to use them */
- sprintf(s, "%s/.glimpse_quick", INDEX_DIR);
- if (!access(s, R_OK)) { /* there are previous cast dictionaries */
- backup = rand();
- sprintf(s, "%s/.glimpse_backup.%x", INDEX_DIR, backup);
- if (-1 == mkdir(s, 0700)) {
- fprintf(stderr, "cannot create backup directory %s\n", s);
- return -1;
- }
- sprintf(s, "cp %s/.glimpse_quick %s/.glimpse_backup.%x\n", INDEX_DIR, INDEX_DIR, backup);
- system(s);
- sprintf(s, "cp %s/.glimpse_compress %s/.glimpse_backup.%x\n", INDEX_DIR, INDEX_DIR, backup);
- system(s);
- sprintf(s, "cp %s/.glimpse_compress.index %s/.glimpse_backup.%x\n", INDEX_DIR, INDEX_DIR, backup);
- system(s);
- sprintf(s, "cp %s/.glimpse_uncompress %s/.glimpse_backup.%x\n", INDEX_DIR, INDEX_DIR, backup);
- system(s);
- sprintf(s, "cp %s/.glimpse_uncompress.index %s/.glimpse_backup.%x\n", INDEX_DIR, INDEX_DIR, backup);
- system(s);
- printf("Saved previous cast-dictionary in %s/.glimpse_backup.%x\n", INDEX_DIR, backup);
- }
-
- /* Now index these files, and build new dictionaries */
- partition(argc, argv);
- destroy_filename_hashtable();
- if (size_list != NULL) free(size_list);
- size_list = NULL;
- build_index();
-
- cleanup();
- save_data_structures();
- uninitialize_common();
- uninitialize_tcompress();
- uninitialize_tuncompress();
- compute_dictionary(threshold, DISKBLOCKSIZE, specialwords, INDEX_DIR);
-
- if (CompressAfterBuild) {
- /* For the new compression */
- if (!initialize_tcompress(hash_file, freq_file, TC_ERRORMSGS)) goto docleanup;
- printf("Compressing files with new dictionary...\n");
- /* Use the set of file-names collected during partition() / modified during build_hash */
- for(i=0; i<file_num; i++) {
- if (disable_list[block2index(i)] & mask_int[i%(8*sizeof(int))]) continue;
- strcpy(name, name_list[i]);
- tcompress_file(name, outname, TC_REMOVE | TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT);
- }
- }
-
- docleanup:
- /* Restore old search-dictionaries */
- sprintf(s, "%s/.glimpse_tempdir.%d/.glimpse_index", INDEX_DIR, pid);
- if (!access(s, R_OK)) {
- sprintf(s, "mv -f %s/.glimpse_tempdir.%d/.glimpse_* %s\n", INDEX_DIR, pid, INDEX_DIR);
- system(s);
- sprintf(s, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid);
- rmdir(s);
- }
- printf("\nBuilt new cast-dictionary in %s\n", INDEX_DIR);
-
- #else /*BUILDCAST*/
-
- if (AddToIndex || FastIndex) {
- /* Not handling byte level indices here for now */
- int indextype;
-
- sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE);
- if (-1 == stat(s, &istbuf)) {
- if (AddToIndex) {
- fprintf(stderr, "Cannot find previous index! Fresh indexing recommended\n", s);
- return usage(0);
- }
- file_num = 0;
- file_id = 0;
- part_num = 1;
- goto fresh_indexing;
- }
-
- /* Find out existing index of words and partitions/filenumbers */
- if ((indextype = get_index_type(s)) < 0) {
- fprintf(stderr, "Fresh indexing recommended: -a and -f are not supported with -b as yet\n");
- exit(1);
- }
- file_num = part_num = 0;
- sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST);
- file_num = get_array_of_lines(s, name_list, MAX_LIST, 1);
- if (!indextype) {
- sprintf(s, "%s/%s", INDEX_DIR, P_TABLE);
- part_num = get_table(s, p_table, MAX_PARTITION, 1) - 1; /* part_num INCLUDES last partition */
- }
- else merge_splits(); /* Is never called at present for ByteLevelIndex since each build is from scratch. MUST HANDLE LATER */
-
- /* Check for errors, Set OneFilePerBlock */
- if ( (file_num <= 0) || (!indextype && (part_num <= 0)) ) {
- if (AddToIndex) {
- fprintf(stderr, "Cannot find previous glimpseindex files! Fresh indexing recommended\n");
- return usage(0);
- }
- file_num = 0;
- file_id = 0;
- part_num = 1;
- goto fresh_indexing;
- }
- if (OneFilePerBlock && !indextype) {
- fprintf(stderr, "Warning: ignoring option -o: using format of existing index\n");
- }
- OneFilePerBlock = indextype;
-
- /* Used in FastIndex for all existing files, used in AddToIndex if we are trying to add an existing file */
- build_filename_hashtable(name_list, file_num);
-
- #if 0
- /* Test if these are inverses of each other */
- save_data_structures();
- merge_splits();
- #endif /*0*/
-
- /*
- * FastIndex: set disable-flag for unchanged files: remove AND
- * disable non-existent files. Let hole remain in file-names/partitions.
- */
- if (FastIndex) {
- for (i=0; i<file_num; i++)
- if (-1 == stat(name_list[i], &stbuf)) {
- remove_filename(i, -1);
- }
- else if (((stbuf.st_mode & S_IFMT) == S_IFREG) && (stbuf.st_ctime <= istbuf.st_ctime)) {
- /* This is just used as a cache since exclude/include processing is not done here: see dir.c */
- disable_list[block2index(i)] |= mask_int[i % (8*sizeof(int))];
- }
- else {
- /* Can't do it for directories since files in it can be modified w/o date reflected in the directory. Same for symlinks. */
- size_list[i] = stbuf.st_size;
- disable_list[block2index(i)] &= ~(mask_int[i % (8*sizeof(int))]);
- }
- }
- /*
- * AddToIndex: disable all existing files, remove those that don't exist now.
- * Out of old ones, only ADDED FILES are re-enabled: dir.c
- */
- else {
- for (i=0; i<file_num; i++) {
- if (-1 == stat(name_list[i], &stbuf)) {
- remove_filename(i, -1);
- }
- else {
- size_list[i] = stbuf.st_size; /* ONLY for proper statistics in save_data_structures() */
- disable_list[block2index(i)] |= mask_int[i % (8*sizeof(int))];
- }
- }
- }
-
- /* Put old/new files into partitions/filenumbers */
- if (-1 == oldpartition(argc, argv)) {
- for(i=0;i<file_num;i++) {
- #if BG_DEBUG
- memory_usage -= (strlen(name_list[i]) + 2);
- #endif /*BG_DEBUG*/
- if (name_list[i] != NULL) my_free(name_list[i], 0);
- name_list[i] = NULL;
- }
- memset(disable_list, '\0', sizeof(int) * FILEMASK_SIZE);
- file_num = 0;
- file_id = 0;
- for (i=0;i<part_num; i++) {
- p_table[i] = 0;
- }
- part_num = 1;
- destroy_filename_hashtable();
- goto fresh_indexing;
- }
-
- /* Reindex all the files but use the file-names obtained with oldpartition() */
- if (cross_boundary(OneFilePerBlock, file_num)) {
- memset(disable_list, '\0', sizeof(int) * FILEMASK_SIZE);
- }
-
- if (size_list != NULL) free(size_list);
- size_list = NULL;
- build_index();
- destroy_filename_hashtable();
- #if BG_DEBUG
- fprintf(LOGFILE, "Built indices in %s/%s\n", INDEX_DIR, INDEX_FILE);
- #endif /*BG_DEBUG*/
- goto docleanup;
- }
-
- fresh_indexing:
- /* These should be zeroed since they can confuse fsize and fsize_directory() */
- AddToIndex = 0;
- FastIndex = 0;
- #if BG_DEBUG
- fprintf(LOGFILE, "Commencing fresh indexing\n");
- #endif /*BG_DEBUG*/
- partition(argc, argv);
- destroy_filename_hashtable();
- if (size_list != NULL) free(size_list);
- size_list != NULL;
- build_index();
- #if BG_DEBUG
- fprintf(LOGFILE, "\nBuilt indices in %s/%s\n", INDEX_DIR, INDEX_FILE);
- #endif /*BG_DEBUG*/
-
- docleanup:
- cleanup();
- save_data_structures();
- #if BG_DEBUG
- fflush(LOGFILE);
- fclose(LOGFILE);
- #endif /*BG_DEBUG*/
- fflush(MESSAGEFILE);
- fclose(MESSAGEFILE);
- fflush(STATFILE);
- fclose(STATFILE);
- if (AddedMaxWordsMessage) printf("\nSome files contributed > %d words to the index: check %s\n", MAXWORDSPERFILE, DEF_MESSAGE_FILE);
- if (AddedMixedWordsMessage) printf("Some files had numerals in > %d%% of the indexed words: check %s\n", NUMERICWORDPERCENT, DEF_MESSAGE_FILE);
-
- printf("\nIndex-directory: \"%s\"\nGlimpse-files created here:\n", INDEX_DIR);
- chdir(INDEX_DIR);
- sprintf(s, "ls -lg .glimpse_* > /tmp/%d\n", pid);
- system(s);
- sprintf(s, "/tmp/%d", pid);
- if ((tmpfp = fopen(s, "r")) != NULL) {
- memset(tmpbuf, '\0', 1024);
- while(fgets(tmpbuf, 1024, tmpfp) != NULL) fputs(tmpbuf, stdout);
- fflush(tmpfp);
- fclose(tmpfp);
- unlink(s);
- }
- else fprintf(stderr, "cannot open %s to `cat': check %s for .glimpse - files\n", s, INDEX_DIR);
- #endif /*BUILDCAST*/
-
- return 0;
- }
-
- cleanup()
- {
- char s[MAX_LINE_LEN];
-
- sprintf(s, "%s/%s", INDEX_DIR, I1);
- unlink(s);
- sprintf(s, "%s/%s", INDEX_DIR, I2);
- unlink(s);
- sprintf(s, "%s/%s", INDEX_DIR, I3);
- unlink(s);
- sprintf(s, "%s/%s", INDEX_DIR, O1);
- unlink(s);
- sprintf(s, "%s/%s", INDEX_DIR, O2);
- unlink(s);
- sprintf(s, "%s/%s", INDEX_DIR, O3);
- unlink(s);
- sprintf(s, "%s/.glimpse_apply.%d", INDEX_DIR, getpid());
- unlink(s);
- }
-
- #if !BUILDCAST
- usage(flag)
- int flag;
- {
- if (flag) fprintf(stderr, "\nThis is glimpseindex version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- fprintf(stderr, "usage: %s [-help] [-a] [-f] [-i] [-n [#]] [-o] [-s] [-w #] [-F] [-H dir] [-I] [-S lim] [-V] dirs/files\n", IProgname);
- fprintf(stderr, "summary of frequently used options\n(for a more detailed listing see 'man glimpse'):\n");
- fprintf(stderr, "-help: outputs this menu\n");
- fprintf(stderr, "-a: add given files/dirs to an existing index\n");
- fprintf(stderr, "-b: build a (large) byte level index to speed up search\n");
- fprintf(stderr, "-f: use modification dates to do fast indexing\n");
- fprintf(stderr, "-n #: index numbers; warn if file adds > #%% numeric words: default is 50\n");
- fprintf(stderr, "-o: optimize for speed by building a larger index\n");
- /* fprintf(stderr, "-s: build the index for structured queries (a1=v1 &/| a2=v2...)\n"); this should not be advertised */
- fprintf(stderr, "-w #: warn if a file adds > # words to the index\n");
- fprintf(stderr, "-F: expect filenames on stdin (useful for pipelining)\n");
- fprintf(stderr, "-H 'dir': .glimpse-files should be in directory 'dir': default is '~'\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
- exit(1);
- }
- #else /*!BUILDCAST*/
- usage(flag)
- int flag;
- {
- if (flag) fprintf(stderr, "\nThis is buildcast version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- fprintf(stderr, "usage: %s [-help] [-t] [-i] [-l] [-n [#]] [-w #] [-C] [-E] [-F] [-H dir] [-V] dirs/files\n", IProgname);
- fprintf(stderr, "summary of frequently used options\n(for a more detailed listing see 'man cast'):\n");
- fprintf(stderr, "-help: output this menu\n");
- fprintf(stderr, "-n #: index numbers; warn if file adds > #%% numeric words: default is 50\n");
- fprintf(stderr, "-w #: warn if a file adds > # words to the index\n");
- fprintf(stderr, "-C: compress files with the new dictionary after building it\n");
- fprintf(stderr, "-E: build cast dictionary using existing compressed files only\n");
- fprintf(stderr, "-F: expect filenames on stdin (useful for pipelining)\n");
- fprintf(stderr, "-H 'dir': .glimpse-files should be in directory 'dir': default is '~'\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
- exit(1);
- }
- #endif /*!BUILDCAST*/
-