home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- /* ./glimpse/index/dir.c */
-
- /* The function of the program is to traverse the
- direcctory tree and print the size of the files in the tree.
- This program is derived from the C-programming language book
- It opens a directory file using opendir system call, and use readdir()
- to read each entry of the directory.
- */
-
- #include <stdio.h>
- #include <sys/types.h>
- #if DIRENT
- #include <dirent.h>
- #else /*DIRENT*/
- #include <sys/dir.h>
- #endif /*DIRENT*/
- #include <sys/stat.h>
- #include <fcntl.h>
- #define BUFSIZE 256
- #define DIRSIZE 14
- #include "glimpse.h"
- #undef MAX_LIST
- #define MAX_LIST 100000
-
- #if BG_DEBUG
- extern FILE *LOGFILE;
- #endif /*BG_DEBUG*/
- extern FILE *MESSAGEFILE;
- int ndx = 0; /* file index */
- extern char *name_list[MAX_LIST]; /* store the file names */
- extern int *size_list; /* store the sizes of the files */
- extern int disable_list[FILEMASK_SIZE]; /* store whether to DISABLE indexing or not */
- extern int file_num;
- extern int file_id; /* borrowed from filetype.c */
- extern char INDEX_DIR[MAX_LINE_LEN];
- extern int AddToIndex;
- extern int FastIndex;
- extern int OneFilePerBlock;
- extern int IncludeHigherPriority;
- extern int BuildDictionaryExisting;
- extern int printed_warning;
- extern int p_table[];
- extern FILE *STATFILE;
-
- extern int IndexableFile;
- extern int files_per_partition;
- extern int new_partition;
- extern int files_in_partition;
- extern struct stat istbuf; /* imported from glimpse.c */
- extern int memory_usage;
- extern int mask_int[];
-
- extern char exin_argv[8];
- extern int exin_argc;
- extern char current_dir_buf[2*MAX_LINE_LEN + 4]; /* must have space to store pattern after directory name */
- extern unsigned char dummypat[MAX_PAT];
- extern int dummylen;
- extern FILE *dummyout;
-
- extern struct stat excstbuf;
- extern struct stat incstbuf;
-
- extern int num_filter;
- extern int filter_len[MAX_FILTER];
- extern CHAR *filter[MAX_FILTER];
- extern CHAR *filter_command[MAX_FILTER];
-
- /*
- * Exclude/Include priorities with exclude > include (IncludeHigherPriority = OFF = default):
- * 1. Command line arguments (inclusion --> exclude list is never applied)
- * 2. Exclude list (exclusion)
- * 3. Include list (inclusion)
- * 5. Symbolic links (exclusion --> applying exclude list is unnecessary)
- * 4. filetype (exclusion)
- *
- * Exclude/Include priorities with include > exclude (IncludeHigherPriority = ON = -i):
- * 1. Command line arguments (inclusion --> exclude list is never applied)
- * 2. Include list (inclusion)
- * 3. Symbolic links (exclusion --> applying exclude list is unnecessary)
- * 3. Exclude list (exclusion)
- * 4. filetype (exclusion)
- */
-
- char outname[MAX_LINE_LEN];
- char inname[MAX_LINE_LEN];
-
- fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, toplevel)
- char *name;
- char **pat;
- int *pat_len;
- int num_pat;
- char **inc;
- int *inc_len;
- int num_inc;
- int toplevel;
- {
- struct stat stbuf;
- int i;
- int fileindex = -1;
- int force_include = 0;
- int len_current_dir_buf = strlen(current_dir_buf) + 1; /* includes the '\0' which is going to be replaced by '\n' below */
- int name_len;
-
- name_len = strlen(name); /* name[name_len] is '\0' */
-
- #ifdef SW_DEBUG
- printf("num_pat= %d num_inc= %d\n", num_pat, num_inc);
- printf("name= %s\n", name);
- #endif
-
- /*
- * Find out what to exclude, what to include and skip
- * over symbolic links that don't HAVE to be included.
- * Some Extra get_filename_index calls are done but
- * that won't cost you anything (just #ing twice).
- */
-
- /* Check if cache set in glimpse.c is correct */
- if (!IndexableFile && FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1) && (disable_list[block2index(fileindex)] & mask_int[fileindex % (8*sizeof(int))])) {
- if (num_pat <= 0) {
- if (num_inc <= 0) return 0;
- else if (incstbuf.st_ctime <= istbuf.st_ctime) return 0;
- }
- else {
- if (num_inc <= 0) {
- if (excstbuf.st_ctime <= istbuf.st_ctime) return 0;
- }
- else if ((excstbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)) return 0;
- }
- }
-
- #define PROCESS_EXIT \
- {\
- if (AddToIndex || FastIndex) {\
- if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) \
- remove_filename(fileindex, new_partition);\
- }\
- }
-
- #define PROCESS_EXCLUDE \
- {\
- if (!toplevel) for(i=0; i<num_pat; i++) { /* bg: 15/mar/94 */\
- if (pat_len[i] > 0) {\
- name[name_len] = '\0';\
- if (strstr(name, pat[i]) != NULL) {\
- PROCESS_EXIT;\
- return 0;\
- }\
- }\
- else { /* must call memagrep */\
- int ret;\
- name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
- /* printf("i=%d patlen=%d pat=%s inlen=%d input=%s\n", i, -pat_len[i], pat[i], len_current_dir_buf, current_dir_buf); */\
- if (((pat_len[i] == -2) && (pat[i][0] == '.') && (pat[i][1] == '*')) ||\
- ((ret = memagrep_search(-pat_len[i], pat[i], len_current_dir_buf, current_dir_buf, 0, dummyout)) > 0))\
- {\
- /* printf("excluding with %d %s\n", ret, name); */\
- name[name_len] = '\0'; /* restore */\
- PROCESS_EXIT;\
- return 0; \
- }\
- /* else printf("ret=%d\n");*/\
- }\
- }\
- name[name_len] = '\0';\
- }
-
- #define PROCESS_INCLUDE \
- {\
- /*\
- * When include has higher priority, we want to include directories\
- * by default and match the include patterns only against filenames.\
- * Based on bug reports for glimpse-2.1. bg: 2/mar/95.\
- */\
- if (IncludeHigherPriority && ((stbuf.st_mode & S_IFMT) == S_IFDIR)) force_include = 1;\
- else for (i=0; i<num_inc; i++) { /* bg: 15/mar/94 */\
- if (inc_len[i] > 0) {\
- name[name_len] = '\0';\
- if (strstr(name, inc[i]) != NULL) {\
- force_include = 1;\
- break;\
- }\
- }\
- else { /* must call memagrep */\
- name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
- /* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\
- if (((inc_len[i] == -2) && (inc[i][0] == '.') && (inc[i][1] == '*')) ||\
- (memagrep_search(-inc_len[i], inc[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\
- {\
- force_include = 1;\
- break;\
- }\
- }\
- }\
- if (!force_include) for (i=0; i<num_filter; i++) { /* bg: 16/sep/94 */\
- if (filter_len[i] > 0) {\
- name[name_len] = '\0';\
- if (strstr(name, filter[i]) != NULL) {\
- force_include = 1;\
- break;\
- }\
- }\
- else { /* must call memagrep */\
- name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\
- /* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\
- if (((filter_len[i] == -1) && (filter[i][0] == '.') && (filter[i][1] == '*')) ||\
- (memagrep_search(-filter_len[i], filter[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\
- {\
- force_include = 1;\
- break;\
- }\
- }\
- }\
- name[name_len] = '\0'; /* restore */\
- if (toplevel) force_include = 1;\
- }
-
- if(lstat(name, &stbuf) == -1) {
- if (IndexableFile) return 0;
- /* Can happen for command line arguments, not stuff obtained from fsize_directory() */
- #if BG_DEBUG
- fprintf(LOGFILE, "cannot find %s -- not indexing\n", name);
- #endif /*BG_DEBUG*/
- PROCESS_EXIT;
- return 0;
- }
- /* Else lstat has all the requisite information */
-
- if ((stbuf.st_mode & S_IFMT) == S_IFLNK) {
- if (IndexableFile) return 0;
- PROCESS_INCLUDE;
- if (!force_include) {
- #if BG_DEBUG
- fprintf(LOGFILE, "%s is a symbolic link -- not indexing\n", name);
- #endif /*BG_DEBUG*/
- PROCESS_EXIT;
- return 0;
- }
- if (-1 == stat(name, &stbuf)) {
- #if BG_DEBUG
- fprintf(LOGFILE, "cannot find target of symbolic link %s -- not indexing\n", name);
- #endif /*BG_DEBUG*/
- PROCESS_EXIT;
- return 0;
- }
- }
- else /* if (!IndexableFile) */ {
- /* Put exclude include processing here... stat all the time: that is faster than former! */
- if (FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1)) {
- /* Don't process exclude/include if the file `name' is older then the index AND the exclude/include file is older then the index */
- if (IncludeHigherPriority) {
- if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))
- PROCESS_INCLUDE;
- if (!force_include && !((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))
- PROCESS_EXCLUDE;
- }
- else {
- if (!((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))
- PROCESS_EXCLUDE;
- if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))
- PROCESS_INCLUDE;
- }
- }
- else { /* Either AddToIndex or fresh indexing or previously excluded file: process exclude and include */
- if (IncludeHigherPriority) {
- PROCESS_INCLUDE;
- if (!force_include)
- PROCESS_EXCLUDE;
- }
- else {
- PROCESS_EXCLUDE;
- PROCESS_INCLUDE;
- }
- }
- }
-
- /* Here, the file exists and has not been excluded -- possibly has been included */
-
- if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {
- if (-1 == fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)) return -1;
- }
- else if ((stbuf.st_mode & S_IFMT) == S_IFREG) { /* regular file */
- if (IndexableFile) {
- if (!filetype(name, 0)) printf("%s\n", name);
- return 0;
- }
- file_id ++;
- if (BuildDictionaryExisting) {
- /* Don't even store the names of the files that are not uncompressible */
- if (file_num >= MAXNUM_FILE) {
- fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
- return -1;
- }
- if (tuncompress_file(name, outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT) <= 0) return 0;
- file_num++;
- name_list[ndx] = (char *) my_malloc(strlen(outname) + 2);
- strcpy(name_list[ndx], outname);
- size_list[ndx++] = stbuf.st_size;
- return 0;
- }
-
- #ifdef SW_DEBUG
- printf("%s: ", name);
- #endif
-
- if (AddToIndex || FastIndex) {
- if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) {
- size_list[fileindex] = stbuf.st_size;
- if (FastIndex && (stbuf.st_ctime <= istbuf.st_ctime))
- disable_list[block2index(fileindex)] |= mask_int[fileindex % (8*sizeof(int))];
- else { /* AddToIndex or file was modified (=> its type might have changed!) */
- if (filetype(name, 1)) {
- if (!force_include) {
- remove_filename(fileindex, new_partition);
- return 0;
- }
- else {
- #if BG_DEBUG
- fprintf(LOGFILE, "overriding and indexing: %s\n", name);
- #endif /*BG_DEBUG*/
- }
- }
- disable_list[block2index(fileindex)] &= ~(mask_int[fileindex % (8*sizeof(int))]);
- }
- }
- else { /* new file not in filenames so no point in checking */
- if(filetype(name, 1)) {
- if (!force_include) return 0;
- else {
- #if BG_DEBUG
- fprintf(LOGFILE, "overriding and indexing: %s\n", name);
- #endif /*BG_DEBUG*/
- }
- }
-
- if (file_num >= MAXNUM_FILE) {
- fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
- return -1;
- }
- name_list[file_num] = (char *)my_malloc(strlen(name) + 2);
- strcpy(name_list[file_num], name);
- size_list[file_num] = stbuf.st_size;
- insert_filename(name_list[file_num], file_num);
- file_num ++;
-
- if (!OneFilePerBlock) {
- if (files_in_partition + 1 > files_per_partition) {
- if (new_partition + 1 > MaxNumPartition) {
- if (!printed_warning) {
- printed_warning = 1;
- if (AddToIndex) {
- fprintf(MESSAGEFILE, "Warning: partition-table overflow! Fresh indexing recommended.n");
- }
- else {
- fprintf(MESSAGEFILE, "Warning: partition-table overflow! Commencing fresh indexing...\n");
- return -1;
- }
- }
- }
- else new_partition++;
- files_in_partition = 0;
- /* so that we don't get into this if-branch until another files_per_partition new files are seen */
- }
-
- p_table[new_partition] = file_num;
- files_in_partition ++;
- }
- }
- }
- else { /* Fresh indexing: very simple -- add everything */
- if(filetype(name, 1)) {
- if (!force_include) return 0;
- else {
- #if BG_DEBUG
- fprintf(LOGFILE, "overriding and indexing: %s\n", name);
- #endif /*BG_DEBUG*/
- }
- }
- if (file_num >= MAXNUM_FILE) {
- fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MAXNUM_FILE);
- return -1;
- }
- file_num++;
- name_list[ndx] = (char *) my_malloc(strlen(name) + 2);
- strcpy(name_list[ndx], name);
- size_list[ndx++] = stbuf.st_size;
- }
- }
- return 0;
- }
-
- /* uses the space in the same "name" to get names of files in that directory and calls fsize */
- /* pat, pat_len, num_pat, inc, inc_len, num_inc are just used for recursive calls to fsize */
- fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)
- char *name;
- char **pat;
- int *pat_len;
- int num_pat;
- char **inc;
- int *inc_len;
- int num_inc;
- {
- #if DIRENT
- struct dirent *dp;
- #else /*DIRENT*/
- struct direct *dp;
- #endif /*DIRENT*/
- char *nbp, *nep;
- int i;
- DIR *dirp;
- /*
- printf("in fsize_directory, name= %s\n",name);
- */
- nbp = name + strlen(name);
- if( nbp+DIRSIZE+2 >= name+BUFSIZE ) /* name too long */
- { fprintf(stderr, "name too long\n");
- return 0;
- }
- if((dirp = opendir(name)) == NULL) {
- fprintf(stderr, "permission denied or non-existent directory: %s\n", name);
- return 0;
- }
- *nbp++ = '/';
- for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
- if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..")==0)
- goto CONT;
- for(i=0, nep=nbp; dp->d_name[i] != '\0'; i++)
- *nep++ = dp->d_name[i];
- *nep++ = '\0';
- /*
- printf("name= %s\n", name);
- */
- if (-1 == fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, 0)) return -1;
- CONT:
- i=0;
- }
- closedir (dirp);
- *--nbp = '\0'; /* restore name */
- return 0;
- }
-
- typedef struct _name_hashelement {
- char name[MAX_LINE_LEN];
- int index;
- struct _name_hashelement *next;
- } name_hashelement;
-
- name_hashelement *name_hashtable[MAX_4K_HASH];
-
- /*
- * Returns the index of the name if the it is found amongst the set
- * of files in name_array; -1 otherwise.
- */
- int
- get_filename_index(name)
- char *name;
- {
- int index = hash4k(name, strlen(name));
- /* int skips=0; */
- name_hashelement *e;
-
- e = name_hashtable[index];
- while((e != NULL) && (strcmp(name, e->name))) {
- /* skips ++; */
- e = e->next;
- }
- /* fprintf(STATFILE, "skips = %d\n", skips); */
- if (e == NULL) return -1;
- return e->index;
- }
-
- insert_filename(name, name_index)
- char *name;
- int name_index;
- {
- int len = strlen(name);
- int index;
- name_hashelement **pe;
-
- index = hash4k(name, len);
- pe = &name_hashtable[index];
- while((*pe != NULL) && (strcmp((*pe)->name, name))) pe = &(*pe)->next;
- if ((*pe) != NULL) return;
- *pe = (name_hashelement *)my_malloc(sizeof(name_hashelement));
- (*pe)->next = NULL;
- strcpy((*pe)->name, name);
- (*pe)->index = name_index;
- }
-
- build_filename_hashtable(names, num)
- char *names[];
- int num;
- {
- int i;
-
- for (i=0; i<MAX_4K_HASH; i++) name_hashtable[i] = NULL;
- for (i=0; i<num; i++) insert_filename(names[i], i);
- }
-
- destroy_filename_hashtable()
- {
- int i;
- name_hashelement *e, *t;
-
- for (i=0; i<MAX_4K_HASH; i++) {
- e = name_hashtable[i];
- while(e!=NULL) {
- t = e;
- e = e->next;
- memory_usage -= sizeof(name_hashelement);
- my_free(t, 0);
- }
- name_hashtable[i] = NULL;
- }
- }
-