home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- #include <sys/types.h>
- #include <sys/stat.h>
- #include "glimpse.h"
- #include <fcntl.h>
- #define CHAR unsigned char
-
- /* ----------------------------------------------------------------------
- get_filenames()
- input: an index table, (an index vector, i-th entry is ON if
- i-th partition is to be searched.), the partition table in src_index_set[]
- and the list of all files in "NAME_LIST".
- output: the list of filenames to be searched.
- ------------------------------------------------------------------------- */
-
- #if BG_DEBUG
- extern FILE *debug;
- #endif /*BG_DEBUG*/
-
- extern int p_table[REAL_PARTITION];
- #if 0
- extern CHAR *GTextfiles[MAXNUM_FILE];
- #else /*0*/
- extern CHAR **GTextfiles;
- #endif /*0*/
- extern int GFileIndex[MAXNUM_FILE];
- extern int GNumfiles;
- extern CHAR GProgname[];
- extern CHAR FileNamePat[];
- extern int MATCHFILE;
- extern int agrep_outpointer;
-
- extern int mask_int[32];
- extern int OneFilePerBlock;
-
- char *bigbuffer = NULL; /* constant buffer to read all filenames in NAME_LIST */
- char *outputbuffer = NULL; /* keeps changing: used for -F search via memagrep */
-
- get_filenames(index_vect, argc, argv, dummylen, dummypat, file_num)
- int *index_vect;
- int argc; /* the arguments to agrep for -F */
- char *argv[];
- int dummylen;
- CHAR dummypat[];
- int file_num;
- {
- int i=0,j;
- int start, end, k, prevk;
- struct stat st;
- int filesseen;
- char *beginptr, *endptr;
-
- /* one time processing: assumes during one run of glimpse, the index remains constant! */
- if (bigbuffer == NULL) {
- FILE *fp = fopen(NAME_LIST, "r");
-
- if (fp == NULL) return;
- if (-1 == stat(NAME_LIST, &st)) {
- fclose(fp);
- return;
- }
- bigbuffer = (char *)malloc(st.st_size + MAX_PAT + 2); /* The whole file + place to store -F's pattern */
- if (bigbuffer != NULL) outputbuffer = (char *)malloc(64*MAX_NAME_SIZE); /* Space for max# files per partition */
- if (outputbuffer != NULL) GTextfiles = (CHAR **) malloc(sizeof(CHAR *) * MAXNUM_FILE);
- if (bigbuffer == NULL || outputbuffer == NULL || GTextfiles == NULL) {
- fprintf(stderr, "%s: malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__);
- if (bigbuffer != NULL) free(bigbuffer);
- if (outputbuffer != NULL) free(outputbuffer);
- if (GTextfiles != NULL) free(GTextfiles);
- exit(2);
- }
- if (st.st_size != fread(bigbuffer, 1, st.st_size, fp)) {/* read in whole file in CONTIGUOUS memory */
- free(bigbuffer);
- free(outputbuffer);
- free(GTextfiles);
- bigbuffer = outputbuffer = NULL;
- fclose(fp);
- return;
- }
- memset(bigbuffer+st.st_size, '\n', MAX_PAT + 2);
- }
-
- #if BG_DEBUG
- fprintf(debug, "get_filenames(): the following partitions are ON\n");
- for(i=0; i<((OneFilePerBlock > 0) ? round(file_num, 8*sizeof(int)) : MAX_PARTITION); i++)
- if(index_vect[i]) fprintf(debug, "i=%d,%x\n", i, index_vect[i]);
- #endif /*BG_DEBUG*/
-
- GNumfiles = 0;
- filesseen = 0;
- endptr = beginptr = bigbuffer;
-
- if(MATCHFILE == OFF) { /* just copy the filenames */
- if (OneFilePerBlock) {
- for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
- if (index_vect[i] == 0) continue;
- for (j=0; j<8*sizeof(int); j++) {
- if (!(index_vect[i] & mask_int[j])) continue;
- start = i*8*sizeof(int) + j;
- end = start + 1;
- #if BG_DEBUG
- fprintf(debug, "start=%d, end=%d\n", start, end);
- #endif /*BG_DEBUG*/
- /*
- * skip over so many filenames and get the filenames to copy.
- * NOTE: successive "start"s ALWAYS increase.
- */
-
- while(filesseen < start) {
- while(*beginptr != '\n') beginptr ++;
- beginptr ++; /* skip over '\n' */
- filesseen ++;
- }
-
- endptr = beginptr;
- while (filesseen < end) {
- while(*endptr != '\n') endptr ++;
- if (endptr == beginptr + 1) goto end_of_loop1; /* null name of non-existent file */
- *endptr = '\0';
- /* return with all the names you COULD get */
- if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
- *endptr = '\n';
- return;
- }
- GFileIndex[GNumfiles] = i*8*sizeof(int) + j;
- *endptr = '\n';
- if (++GNumfiles >= MAXNUM_FILE) return;
- end_of_loop1:
- beginptr = endptr = endptr + 1; /* skip over '\n' */
- filesseen ++;
- }
- }
- }
- } /* one file per block */
- else {
- /* Just the outer for-loop and initial begin/end values are different: rest is same */
- for (i=0; i<MAX_PARTITION; i++) {
- if(index_vect[i] > 0) {
- start = p_table[i];
- end = p_table[i+1];
- if (start >= end) continue;
- #if BG_DEBUG
- fprintf(debug, "start=%d, end=%d\n", start, end);
- #endif /*BG_DEBUG*/
- /*
- * skip over so many filenames and get the filenames to copy.
- * NOTE: successive "start"s ALWAYS increase.
- */
-
- while(filesseen < start) {
- while(*beginptr != '\n') beginptr ++;
- beginptr ++; /* skip over '\n' */
- filesseen ++;
- }
-
- endptr = beginptr;
- while (filesseen < end) {
- while(*endptr != '\n') endptr ++;
- if (endptr == beginptr + 1) goto end_of_loop2; /* null name of non-existent file */
- *endptr = '\0';
- /* return with all the names you COULD get */
- if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
- *endptr = '\n';
- return;
- }
- GFileIndex[GNumfiles] = filesseen;
- *endptr = '\n';
- if (++GNumfiles >= MAXNUM_FILE) return;
- end_of_loop2:
- beginptr = endptr = endptr + 1; /* skip over '\n' */
- filesseen ++;
- }
- }
- }
- }
- }
- else { /* search and copy matched filenames */
- if ((dummylen = memagrep_init(argc, argv, MAX_PAT, dummypat)) <= 0) return;
-
- if (OneFilePerBlock) {
- for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
- if (index_vect[i] == 0) continue;
- for (j=0; j<8*sizeof(int); j++) {
- if (!(index_vect[i] & mask_int[j])) continue;
- start = i*8*sizeof(int) + j;
- end = start + 1;
- #if BG_DEBUG
- fprintf(debug, "start=%d, end=%d\n", start, end);
- #endif /*BG_DEBUG*/
- /*
- * skip over so many filenames and get the region to search =
- * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
- */
-
- while(filesseen < start) {
- while(*beginptr != '\n') beginptr ++;
- beginptr ++; /* skip over '\n' */
- filesseen ++;
- }
- beginptr --; /* I need '\n' for memory search */
-
- endptr = beginptr+1;
- while (filesseen < end) {
- while(*endptr != '\n') endptr ++;
- endptr ++; /* skip over '\n' */
- filesseen ++;
- }
- endptr --; /* I need '\n' for memory search */
- if (endptr == beginptr + 1) goto end_of_loop3; /* null name of non-existent file */
-
- #if BG_DEBUG
- *endptr = '\0';
- fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
- *endptr = '\n';
- #endif /*BG_DEBUG*/
-
- /* if file in the partition matches then copy it */
- if (memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, 64*MAX_NAME_SIZE, outputbuffer) > 0) {
- #if BG_DEBUG
- {
- char c = outputbuffer[agrep_outpointer + 1];
- outputbuffer[agrep_outpointer + 1] = '\0';
- fprintf(debug, "OUTPUTBUFFER=%s\n", outputbuffer);
- outputbuffer[agrep_outpointer + 1] = c;
- }
- #endif /*BG_DEBUG*/
- k = prevk = 0;
- while(k+1<agrep_outpointer) { /* name of a file cannot have '\n' in it */
- k++;
- if (outputbuffer[k] == '\n') {
- outputbuffer[k] = '\0';
- /* return with all the names you COULD get */
- if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) return;
- GFileIndex[GNumfiles] = i*8*sizeof(int)+j;
- if (++GNumfiles >= MAXNUM_FILE) return;
- k = prevk = k+1;
- }
- }
- }
- else {
- index_vect[i] &= ~mask_int[j]; /* remove it from the list: used if ByteLevelIndex */
- }
-
- end_of_loop3:
- beginptr = endptr = endptr + 1;
- }
- }
- } /* one file per block */
- else {
- /* Just the outer for-loop and initial begin/end values are different: rest is same */
- for (i=0; i<MAX_PARTITION; i++) {
- if(index_vect[i] > 0) {
- start = p_table[i];
- end = p_table[i+1];
- if (start >= end) continue;
- #if BG_DEBUG
- fprintf(debug, "start=%d, end=%d\n", start, end);
- #endif /*BG_DEBUG*/
- /*
- * skip over so many filenames and get the region to search =
- * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
- */
-
- while(filesseen < start) {
- while(*beginptr != '\n') beginptr ++;
- beginptr ++; /* skip over '\n' */
- filesseen ++;
- }
- beginptr --; /* I need '\n' for memory search */
-
- endptr = beginptr+1;
- while (filesseen < end) {
- while(*endptr != '\n') endptr ++;
- endptr ++; /* skip over '\n' */
- filesseen ++;
- }
- endptr --; /* I need '\n' for memory search */
- if (endptr == beginptr + 1) goto end_of_loop4; /* null name of non-existent file */
-
- #if BG_DEBUG
- *endptr = '\0';
- fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
- *endptr = '\n';
- #endif /*BG_DEBUG*/
-
- /* if file in the partition matches then copy it */
- if (memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, 64*MAX_NAME_SIZE, outputbuffer) > 0) {
- k = prevk = 0;
- while(k+1<agrep_outpointer) { /* name of a file cannot have '\n' in it */
- k++;
- if (outputbuffer[k] == '\n') {
- outputbuffer[k] = '\0';
- /* return with all the names you COULD get */
- if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) return;
- GFileIndex[GNumfiles] = filesseen; /* not sure here which one but this is never used so ok to fill junk */
- if (++GNumfiles >= MAXNUM_FILE) return;
- k = prevk = k+1;
- }
- }
- }
- else {
- index_vect[i] = 0; /* mask it off */
- }
-
- end_of_loop4:
- beginptr = endptr = endptr + 1;
- }
- }
- }
- }
-
- #if BG_DEBUG
- fprintf(debug, "The following %d filenames are ON\n", GNumfiles);
- for (i=0; i<GNumfiles; i++)
- fprintf(debug, "\t%s\n", GTextfiles[i]);
- #endif /*BG_DEBUG*/
- return;
- }
-
-