home *** CD-ROM | disk | FTP | other *** search
- From: Peter Bain <decvax!watmath!wateng!pdbain>
- Subject: bm version 1.1
- Newsgroups: mod.sources
- Approved: john@genrad.UUCP
-
- Mod.sources: Volume 2, Issue 14
- Submitted by: Peter Bain <decvax!watmath!wateng!pdbain>
-
-
- This is an fgrep-like Boyer-Moore program called "bm"
- which is much faster than the bgrep program posted recently (see below).
- I have found that for 99% of the cases bm is entirely sufficient.
-
- */bin/time bgrep Zurich /usr/dict/words
- Zurich
- 38.7 real 7.3 user 1.1 sys
- */bin/time bm Zurich /usr/dict/words
- Zurich
- 5.2 real 0.9 user 0.5 sys
- script done on Thu Jul 4 10:09:56 1985
-
- 1. Bug fixes from release 1.0:
- - incorrect operation of -x with -c, -s, -l. Fixed.
- - major bug, which was acute with piped input: lines containing
- matches were mangled or missed entirely. Fixed.
- - Some sites were having trouble with externs and #include files
- (the systems at Waterloo run 4.2, and we have not found this,
- but I have changed thing to make everyone happy.
- - Some compilers don't like line 36 of Search.c. I have
- added alternate code (line 39) against this possibility
-
- One site reported abnormally high system time. Installers may want to
- play around with MAXBUFF to optimize.
-
- 2. Added features:
- -e: take next argument as a pattern, even if it starts with a '-'
- -h: do not print file names.
-
- 3. Why bm is always case-sensitive.
- 95% of the time I use a pattern matcher, I know the pattern exactly.
- If not, I use grep. I didn't consider the speed penalty worth it.
-
- -------------- snip snip snip -----------------------------------------
- : This is a shar archive. Extract with sh, not csh.
- : The rest of this file will extract:
- : bm.h bm.c Execute.c Extern.h GetPatFile.c Global.c MakeDesc.c MakeSkip.c MatchFound.c MkDescVec.c MoveResidue.c PrintLine.c PutUsage.c Search.c Makefile README bm.p
- echo Extracting bm.h
- sed 's/^X//' > bm.h << 'e-o-f'
- X#define FIRSTCHAR ' '
- X#define MAXCHAR 0377
- X#define MAXBUFF 8192
- X#define MAXSIZE 100
- X#define MAXPATS 100 /* max number of patterns */
- X#define min(x,y) (x) < (y) ? (x) : (y)
- X#define max(x,y) (x) > (y) ? (x) : (y)
- Xstruct PattDesc {
- X int *Skip1, *Skip2; /* pointers to skip tables */
- X char *Pattern;
- X int PatLen; /* pattern length */
- X char *Start;
- X /* starting position of search (at beginning of pattern) */
- X int Success; /* true when pattern found */
- X};
- e-o-f
- echo Extracting bm.c
- sed 's/^X//' > bm.c << 'e-o-f'
- X#include <stdio.h>
- X#include <sys/file.h>
- X#include <strings.h>
- X#include "bm.h"
- X#include "Extern.h"
- Xmain(argc,argv)
- Xint argc;
- Xchar *argv[];
- X{
- X /* test driver for grep based on Boyer-Moore algorithm */
- X char BigBuff[MAXBUFF + 2];
- X /*
- X * We leave one extra character at the beginning and end of the buffer,
- X * but don't tell Execute about it. This is so when someone is
- X * scanning the buffer and scans past the end (or beginning)
- X * we are still technically in the buffer (picky, but there ARE
- X * machines which would complain)
- X */
- X int ret = 1, /* return code from Execute */
- X NFiles,
- X NPats; /* number of patterns to search for */
- X char i,
- X *FlagPtr,
- X **OptPtr; /* used to scan command line */
- X int TextFile /* file to search */;
- X struct PattDesc *DescVec[MAXPATS];
- X
- X --argc;
- X OptPtr = argv + 1;
- X while ( argc && **OptPtr == '-') {
- X FlagPtr = *OptPtr + 1;
- X while (*FlagPtr) {
- X switch (*FlagPtr) {
- X case 'c': cFlag = 1; break;
- X case 'e': eFlag = 1;
- X /* get the patterns from next arg */
- X NPats = MkDescVec(DescVec,*++OptPtr);
- X --argc;
- X break;
- X case 'f': fFlag = 1;
- X /* read the patterns from a file */
- X NPats = GetPatFile(*++OptPtr,DescVec);
- X --argc;
- X break;
- X case 'l': lFlag = 1; break;
- X case 'n': nFlag = 1; break;
- X case 's': sFlag = 1; break;
- X case 'x': xFlag = 1; break;
- X case 'h': hFlag = 1; break;
- X default:
- X fprintf(stderr,
- X "bm: invalid option: -%c \n",
- X *FlagPtr);
- X PutUsage();
- X exit(2);
- X } /* switch */
- X ++FlagPtr;
- X } /* while */
- X ++OptPtr; --argc;
- X } /* while */
- X /* OptPtr now points to patterns */
- X if (!fFlag && !eFlag) {
- X if (!argc) {
- X fprintf(stderr,"bm: no pattern specified\n");
- X PutUsage();
- X exit(2);
- X } else
- X NPats = MkDescVec(DescVec,*OptPtr);
- X ++OptPtr; --argc;
- X }
- X /* OptPtr now points to first file */
- X NFiles = argc;
- X if (!NFiles)
- X ret &= Execute(DescVec,NPats,0,BigBuff+1);
- X else while (argc--) {
- X if ((NFiles > 1) || lFlag) FileName = *OptPtr;
- X if ((TextFile = open(*OptPtr,O_RDONLY,0)) < 0) {
- X fprintf(stderr,"bm: can't open %s\n",*OptPtr);
- X exit(2);
- X } /* if */
- X ret &= Execute(DescVec,NPats,TextFile,BigBuff+1);
- X if (sFlag && !ret)
- X exit(0);
- X ++OptPtr;
- X close(TextFile);
- X } /* while */
- X if (cFlag) printf("%d\n",MatchCount);
- X exit(ret);
- X} /* main */
- e-o-f
- echo Extracting Execute.c
- sed 's/^X//' > Execute.c << 'e-o-f'
- X#include <stdio.h>
- X#include "bm.h"
- X#include "Extern.h"
- XExecute(DescVec, NPats, TextFile, Buffer)
- Xstruct PattDesc *DescVec[]; /* pointers to status vectors for the different
- X * patterns, including skip tables, position in buffer, etc. */
- Xint NPats; /* number of patterns */
- Xchar Buffer[]; /* holds text from file */
- Xint TextFile; /* file to search */
- X{
- X int NRead, /* number of chars read from file */
- X NWanted, /* number of chars wanted */
- X NAvail, /* number of chars actually read */
- X BuffSize, /* number of chars in buffer */
- X BuffPos, /* offset of first char in Buffer in TextFile */
- X BuffEx, /* flag to indicate that buffer has been searched */
- X ResSize,
- X /* number of characters in the last, incomplete line */
- X Found, /* flag indicates whether pattern found
- X * completely and all matches printed */
- X Valid; /* was the match "valid", i.e. if -x used,
- X * did the whole line match? */
- X char *BuffEnd; /* pointer to last char of last complete line */
- X
- X /* misc working variables */
- X int i;
- X
- X /* initialize */
- X ResSize = 0;
- X Found = 0;
- X BuffPos = 0;
- X for (i=0; i < NPats; i++) {
- X DescVec[i] -> Success = 0;
- X DescVec[i] -> Start = Buffer;
- X } /* for */
- X /* now do the searching */
- X do {
- X /* first, read a bufferfull and set up the variables */
- X NWanted = MAXBUFF - ResSize; NRead = 0;
- X do {
- X NAvail =
- X read(TextFile,Buffer + ResSize + NRead, NWanted);
- X if (NAvail == -1) {
- X fprintf(stderr,
- X "bm: error reading from input file\n");
- X exit(2);
- X } /* if */
- X NRead += NAvail; NWanted -= NAvail;
- X } while (NAvail && NWanted);
- X BuffEx = 0;
- X BuffSize = ResSize + NRead;
- X BuffEnd = Buffer + BuffSize - 1;
- X /* locate the end of the last complete line */
- X while (*BuffEnd != '\n' && BuffEnd >= Buffer)
- X --BuffEnd;
- X if (BuffEnd < Buffer)
- X BuffEnd = Buffer + BuffSize - 1;
- X while (!BuffEx) { /* work through one buffer full */
- X BuffEx = 1; /* set it provisionally, then clear
- X * it if we find the buffer non-empty */
- X for (i=0; i< NPats; i++) {
- X if (!DescVec[i]->Success)
- X /* if the pattern has not been found */
- X DescVec[i]-> Success =
- X Search(DescVec[i]->Pattern,
- X DescVec[i]->PatLen, Buffer, BuffEnd,
- X DescVec[i]->Skip1, DescVec[i]->Skip2,
- X DescVec[i]);
- X if (DescVec[i]->Success){
- X /* if a match occurred */
- X BuffEx = 0;
- X Valid = MatchFound(DescVec[i],BuffPos,
- X Buffer, BuffEnd);
- X Found |= Valid;
- X if ((sFlag || lFlag) && Found)
- X return(0);
- X } /* if */
- X } /* for */
- X } /* while */
- X if(NRead) {
- X ResSize = MoveResidue(DescVec,NPats,Buffer,BuffEnd);
- X BuffPos += BuffSize - ResSize;
- X } /* if */
- X } while (NRead);
- X return(!Found);
- X} /* Execute */
- e-o-f
- echo Extracting Extern.h
- sed 's/^X//' > Extern.h << 'e-o-f'
- X/* global flags for bm */
- Xextern int cFlag, /* true if we want only a count of matching lines */
- X eFlag, /* indicates that next argument is the pattern */
- X fFlag, /* true if the patterns arew to come from a file */
- X lFlag, /* true if we want a list of files containing the pattern */
- X nFlag, /* true if we want the character offset of the pattern */
- X sFlag, /* true if we want silent mode */
- X xFlag, /* true if we want only lines which match entirely */
- X hFlag, /* true if we want no filenames in output */
- X
- X MatchCount; /* count of number of times a search string was found
- X * in the text */
- Xextern char *FileName;
- e-o-f
- echo Extracting GetPatFile.c
- sed 's/^X//' > GetPatFile.c << 'e-o-f'
- X#include <stdio.h>
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#include "bm.h"
- Xint
- XGetPatFile(PatFile, DescVec)
- Xchar *PatFile;
- Xstruct PattDesc *DescVec[];
- X/* read patterns from a file and set up a pattern descriptor vector */
- X{
- X extern char *malloc();
- X FILE *PFile;
- X struct stat StatBuff;
- X int PatSize; /* the number of chars in all the patterns */
- X char *PatBuff; /* hold the patterns */
- X if (!(PFile = fopen(PatFile,"r"))) {
- X fprintf(stderr,"bm: can't open pattern file %s\n",PatFile);
- X exit(2);
- X } /* if */
- X /* find out how big the patterns are */
- X if (fstat(fileno(PFile),&StatBuff) == -1) {
- X fprintf(stderr,"bm: can't fstat %s\n",PatFile);
- X exit(2);
- X } /* if */
- X PatSize = StatBuff.st_size;
- X if (!PatSize) {
- X fprintf(stderr,"bm: pattern file is empty\n");
- X exit(2);
- X } /* if */
- X if (!(PatBuff = malloc(PatSize))) {
- X fprintf(stderr,"bm: insufficient memory to store patterns\n");
- X exit(2);
- X } /* if */
- X fread(PatBuff,1,PatSize,PFile); /* get the patterns */
- X /* make sure the patterns are null-terminated. We can't have
- X * nulls in the patterns */
- X if (PatBuff[PatSize-1] == '\n')
- X PatBuff[PatSize-1] = '\0';
- X else
- X PatBuff[PatSize] = '\0';
- X return(MkDescVec(DescVec,PatBuff));
- X} /* GetPatFile */
- e-o-f
- echo Extracting Global.c
- sed 's/^X//' > Global.c << 'e-o-f'
- X/* global flags for bm */
- Xint cFlag=0, /* true if we want only a count of matching lines */
- X eFlag=0, /* indicates that next argument is the pattern */
- X fFlag=0, /* true if the patterns are to come from a file */
- X lFlag=0, /* true if we want a list of files containing the pattern */
- X nFlag=0, /* true if we want the character offset of the pattern */
- X sFlag=0, /* true if we want silent mode */
- X xFlag=0, /* true if we want only lines which match entirely */
- X hFlag=0, /* true if we want no filenames in output */
- X
- X MatchCount=0; /* count of number of times a search string was found
- X * in the text */
- Xchar *FileName = 0;
- e-o-f
- echo Extracting MakeDesc.c
- sed 's/^X//' > MakeDesc.c << 'e-o-f'
- X#include <stdio.h>
- X#include "bm.h"
- X#include "Extern.h"
- Xextern char * malloc();
- X/* makes a pattern descriptor */
- Xstruct PattDesc *MakeDesc(Pattern)
- Xchar *Pattern;
- X{
- X struct PattDesc *Desc;
- X Desc = (struct PattDesc *) malloc(sizeof(struct PattDesc));
- X if (!(Desc->Skip1 = (int *) malloc( sizeof(int) * (MAXCHAR + 1)))){
- X fprintf(stderr,"bm: can't allocate space\n");
- X exit(2);
- X } /* if */
- X if (!(Desc->Skip2 = (int *) malloc(sizeof(int) * strlen(Pattern)))){
- X fprintf(stderr,"bm: can't allocate space\n");
- X exit(2);
- X } /* if */
- X Desc->Pattern=Pattern;
- X Desc->PatLen = strlen(Desc->Pattern);
- X MakeSkip(Desc->Pattern,Desc->Skip1,
- X Desc->Skip2,Desc->PatLen);
- X return(Desc);
- X} /* main */
- e-o-f
- echo Extracting MakeSkip.c
- sed 's/^X//' > MakeSkip.c << 'e-o-f'
- X#include "bm.h"
- Xextern char *malloc();
- X
- XMakeSkip(Pattern,Skip1,Skip2,PatLen)
- Xchar Pattern[];
- Xint Skip1[], Skip2[];
- Xint PatLen;
- X/* generate the skip tables for Boyer-Moore string search algorithm.
- X* Skip1 is the skip depending on the character which failed to match
- X* the pattern, and Skip2 is the skip depending on how far we got into
- X* the pattern. Pattern is the search pattern and PatLen is strlen(Pattern) */
- X{
- X int *BackTrack; /* backtracking table for t when building skip2 */
- X int c; /* general purpose constant */
- X int j,k,t,tp; /* indices into Skip's and BackTrack */
- X
- X if (!(BackTrack = (int *) malloc(PatLen * (sizeof (int))))){
- X fprintf("bm: can't allocate space\n");
- X exit(2);
- X } /* if */
- X for (c=0; c<=MAXCHAR; ++c)
- X Skip1[c] = PatLen;
- X for (k=0;k<PatLen;k++) {
- X Skip1[Pattern[k]] = PatLen - k - 1;
- X Skip2[k] = 2 * PatLen - k - 1;
- X } /* for */
- X for (j=PatLen - 1,t=PatLen;j >= 0; --j,--t) {
- X BackTrack[j] = t;
- X while (t<PatLen && Pattern[j] != Pattern[t]) {
- X Skip2[t] = min(Skip2[t], PatLen - j - 1);
- X t = BackTrack[t];
- X } /* while */
- X } /* for */
- X for (k=0;k<=t;++k)
- X Skip2[k] = min(Skip2[k],PatLen+t-k);
- X tp=BackTrack[t];
- X while(tp<PatLen) {
- X while(t<PatLen) {
- X Skip2[t] = min(Skip2[t],tp-t+PatLen);
- X ++t;
- X } /* while */
- X tp = BackTrack[tp];
- X } /* while */
- X cfree(BackTrack);
- X} /* MakeSkip */
- e-o-f
- echo Extracting MatchFound.c
- sed 's/^X//' > MatchFound.c << 'e-o-f'
- X#include <stdio.h>
- X#include "bm.h"
- X#include "Extern.h"
- XMatchFound(Desc, BuffPos, Buffer, BuffEnd)
- Xstruct PattDesc *Desc; /* state info about search for one string */
- Xint BuffPos; /* offset of first char of buffer into the file being searched */
- Xchar *Buffer, /* pointer to the first character in the buffer */
- X *BuffEnd; /* pointer to the last character in the buffer */
- X{
- X char *MLineBegin, *MLineEnd;
- X Desc->Success = 0;
- X /* Start points to first character after a successful match */
- X MLineBegin = MLineEnd = Desc->Start - 1;
- X while(MLineBegin >=Buffer && *MLineBegin != '\n') --MLineBegin;
- X ++MLineBegin;
- X while( MLineEnd <= BuffEnd && *MLineEnd != '\n') ++MLineEnd;
- X if (MLineEnd > BuffEnd) --MLineEnd;
- X /* fixed 25jun85 pdbain. suppress multiple matches of the same
- X * pattern on one line */
- X Desc->Start = MLineEnd + 1;
- X /* check if exact match */
- X if (xFlag && !( Desc->PatLen == (*MLineEnd != '\n' ? ((MLineEnd -
- X MLineBegin) + 1) : (MLineEnd - MLineBegin))))
- X return(0); /* failure */
- X if (sFlag) return(1);
- X if (cFlag) {
- X ++MatchCount;
- X return(1);
- X } /* if */
- X PrintLine(BuffPos+(Desc->Start-Buffer),MLineBegin,MLineEnd);
- X return(1);
- X} /* MatchFound */
- e-o-f
- echo Extracting MkDescVec.c
- sed 's/^X//' > MkDescVec.c << 'e-o-f'
- X#include "bm.h"
- X#include <strings.h>
- X/* scan a newline-separated string of patterns and set up the
- X* vector of descriptors, one pattern descriptor per pattern.
- X* Return the number of patterns */
- Xint
- XMkDescVec(DescVec, Pats)
- Xstruct PattDesc *DescVec[];
- Xchar *Pats;
- X{
- X int NPats = 0;
- X char *EndPat;
- X extern struct PattDesc *MakeDesc();
- X while (*Pats && (EndPat = index(Pats,'\n')) && NPats < MAXPATS) {
- X *EndPat = '\0';
- X DescVec[NPats] = MakeDesc(Pats);
- X Pats = EndPat + 1;
- X ++NPats;
- X } /* while */
- X if (*Pats && NPats < MAXPATS) {
- X DescVec[NPats] = MakeDesc(Pats);
- X ++NPats;
- X } /* if */
- X return(NPats);
- X} /* MkDescVec */
- e-o-f
- echo Extracting MoveResidue.c
- sed 's/^X//' > MoveResidue.c << 'e-o-f'
- X#include "bm.h"
- X/* Moves the text which has not been completely searched at the end of
- X* the buffer to the beginning of the buffer
- X* and returns number of bytes moved */
- Xint MoveResidue(DescVec, NPats,Buffer, BuffEnd)
- Xstruct PattDesc *DescVec[];
- Xint NPats;
- Xchar *Buffer, *BuffEnd;
- X{
- X char *FirstStart, *f, *t, *Residue;
- X int ResSize, i;
- X FirstStart = BuffEnd;
- X /* find the earliest point which has not been
- X * completely searched */
- X for (i=0; i < NPats; i++) {
- X FirstStart =
- X min(FirstStart, DescVec[i]-> Start );
- X } /* for */
- X /* now scan to the beginning of the line containing the
- X * unsearched text */
- X for (Residue = FirstStart; *Residue != '\n' &&
- X Residue >= Buffer; Residue--);
- X if (Residue < Buffer)
- X Residue = FirstStart;
- X else ++Residue;
- X ResSize = (BuffEnd - Residue + 1);
- X /* now move the residue to the beginning of
- X * the file */
- X t = Buffer; f = Residue;
- X for(i=ResSize;i;--i)
- X *t++ = *f++;
- X /* now fix the status vectors */
- X for (i=0; i < NPats; i++) {
- X DescVec[i]->Start -= (Residue - Buffer);
- X } /* for */
- X return(ResSize);
- X} /* MoveResidue */
- e-o-f
- echo Extracting PrintLine.c
- sed 's/^X//' > PrintLine.c << 'e-o-f'
- X#include <stdio.h>
- X#include <strings.h>
- X#include "Extern.h"
- XPrintLine(OffSet,LineStart,LineEnd)
- Xint OffSet; /* offset of LineStart from beginning of file */
- Xchar *LineStart,
- X *LineEnd;
- X{
- X char OffStr[80];
- X if (lFlag) {
- X if (strlen(FileName) > 76) {
- X fprintf(stderr,"bm: filename too long\n");
- X exit(2);
- X } /* if */
- X sprintf(OffStr,"%s\n",FileName);
- X write(1,OffStr,strlen(OffStr));
- X return;
- X } /* if */
- X if (FileName && !hFlag) {
- X if (strlen(FileName) > 76) {
- X fprintf(stderr,"bm: filename too long\n");
- X exit(2);
- X } /* if */
- X sprintf(OffStr,"%s:",FileName);
- X write(1,OffStr,strlen(OffStr));
- X } /* if */
- X if (nFlag) {
- X sprintf(OffStr,"%d: ",OffSet);
- X write(1,OffStr,strlen(OffStr));
- X } /* if */
- X write(1,LineStart,LineEnd-LineStart+1);
- X if (*LineEnd != '\n') write (1,"\n",1);
- X} /* PrintLine */
- e-o-f
- echo Extracting PutUsage.c
- sed 's/^X//' > PutUsage.c << 'e-o-f'
- X#include <stdio.h>
- XPutUsage()
- X{
- X fprintf(stderr,
- X "bm: search for a given string or strings in a file or files\n");
- X fprintf(stderr,
- X "synopsis: bm [option]* [string(s)] [file]*\n");
- X fprintf(stderr,
- X "options:\n");
- X fprintf(stderr,
- X "-c print only a count of matching lines \n");
- X fprintf(stderr,
- X "-e Take next argument as the pattern\n");
- X fprintf(stderr,
- X "-f PFile read patterns from a file PFile\n");
- X fprintf(stderr,
- X "-h Do not print file names\n");
- X fprintf(stderr,
- X "-l print a list of files containing the pattern(s) \n");
- X fprintf(stderr,
- X "-n print the character offset of the pattern(s) \n");
- X fprintf(stderr,
- X "-s silent mode. Return only success (0) or failure (1)\n");
- X fprintf(stderr,
- X "-x print only lines which match entirely \n");
- X} /*PutUsage */
- e-o-f
- echo Extracting Search.c
- sed 's/^X//' > Search.c << 'e-o-f'
- X#include "bm.h"
- X#include "Extern.h"
- Xint Search(Pattern,PatLen,Buffer, EndBuff, Skip1, Skip2, Desc)
- Xchar Pattern[];
- Xint PatLen;
- Xchar Buffer[];
- Xchar *EndBuff;
- Xint Skip1[], Skip2[];
- Xstruct PattDesc *Desc;
- X{
- X register char *k, /* indexes text */
- X *j, /* indexes Pattern */
- X *PatBegin; /* register pointing to char
- X * before beginning of Pattern */
- X register int Skip; /* skip distance */
- X char *PatEnd,
- X *BuffEnd; /* pointers to last char in Pattern and Buffer */
- X BuffEnd = EndBuff;
- X PatBegin = Pattern - 1;
- X PatEnd = Pattern + PatLen - 1;
- X
- X k = Desc->Start;
- X Skip = PatLen-1;
- X while ( Skip <= (BuffEnd - k) ) {
- X j = PatEnd;
- X k = k + Skip;
- X while (j > PatBegin && *j == *k) {
- X --j; --k;
- X } /* while */
- X if (j< Pattern) {
- X /* found it. Start next search
- X * just after the pattern */
- X Desc -> Start = k + 1 + Desc->PatLen;
- X return(1);
- X } /* if */
- X Skip = max(Skip1[*(unsigned char *)k],Skip2[j-Pattern]);
- X } /* while */
- X Desc->Start = k;
- X return(0);
- X} /* Search */
- e-o-f
- echo Extracting Makefile
- sed 's/^X//' > Makefile << 'e-o-f'
- XCCFLAGS = -O
- XSOURCES = bm.h bm.c Execute.c Extern.h\
- X GetPatFile.c Global.c MakeDesc.c MakeSkip.c \
- X MatchFound.c \
- X MkDescVec.c MoveResidue.c PrintLine.c PutUsage.c Search.c
- XOBJECTS = bm.o Execute.o \
- X GetPatFile.o Global.o MakeDesc.o MakeSkip.o \
- X MatchFound.o \
- X MkDescVec.o MoveResidue.o Search.o PrintLine.o PutUsage.o
- XBASEFILES = $(SOURCES) Makefile README bm.p
- Xbm: $(OBJECTS)
- X cc -o bm $(CCFLAGS) $(OBJECTS)
- Xinstall: bm
- X rm /usr/public/bm
- X cp bm /usr/public/bm
- X rm /usr/src/public/bm/*
- X cp $(BASEFILES) /usr/src/public/bm
- Xshar:
- X /usr/public/shar $(BASEFILES) >bm.shar
- Xman: /usr/man/manp/bm.p
- X/usr/man/manp/bm.p: bm.p
- X rm -f /usr/man/manp/bm.p
- X cp bm.p /usr/man/manp/bm.p
- X man bm > /dev/null
- Xbm.o: bm.c bm.h Extern.h
- X cc -c $(CCFLAGS) bm.c
- XPutUsage.o: PutUsage.c bm.h
- X cc -c $(CCFLAGS) PutUsage.c
- XMakeSkip.o: MakeSkip.c bm.h
- X cc -c $(CCFLAGS) MakeSkip.c
- XSearch.o: Search.c bm.h Extern.h
- X cc -c $(CCFLAGS) Search.c
- XExecute.o: Execute.c bm.h
- X cc -c $(CCFLAGS) Execute.c
- XMoveResidue.o: MoveResidue.c bm.h Extern.h
- X cc -c $(CCFLAGS) MoveResidue.c
- XMatchFound.o: MatchFound.c bm.h Extern.h
- X cc -c $(CCFLAGS) MatchFound.c
- XPrintLine.o: PrintLine.c Extern.h
- X cc -c $(CCFLAGS) PrintLine.c
- XMkDescVec.o: MkDescVec.c bm.h
- X cc -c $(CCFLAGS) MkDescVec.c
- XGetPatFile.o: GetPatFile.c bm.h
- X cc -c $(CCFLAGS) GetPatFile.c
- XMakeDesc.o: MakeDesc.c bm.h
- X cc -c $(CCFLAGS) MakeDesc.c
- XGlobal.o: Global.c
- X cc -c $(CCFLAGS) Global.c
- Xlisting:
- X print -i3 $(BASEFILES)
- e-o-f
- echo Extracting README
- sed 's/^X//' > README << 'e-o-f'
- XBm is a fast pattern matching utility, intended to be almost
- Xidentical in functionality to fgrep (ugh!) but much faster. It uses
- Xthe Boyer-Moore algorithm, as described in the papers listed below:
- X
- XD.E. Knuth, J.H. Morris, V.R. Pratt,"Fast Pattern Matching in Strings",
- XSIAM J. Comput., 6(2), June 1977, 323-350,
- X
- XZ. Galil,
- X"On Improving the Worst Case Running Time of the Boyer-Moore String
- XMatching Algorithm",
- XCACM, 22(9), Sept. 1979, ACM,
- X
- XR.S. Boyer, J.S. Moore,"A Fast String Searching Algorithm", CACM, 20(10),
- XOct. 1977, 762-772,
- X
- XG. de V. Smit,"A Comparison of Three String Matching Algorithms",
- XSoftware - Practice and Experience, vol. 12, 1982, 57-66,
- X
- XThe files are:
- XExecute.c: search a file for the patterns
- XExtern.h: declarations of externs
- XGetPatFile.c: read in patterns from a file and set up a vector of
- X pattern descriptors
- XGlobal.c: global variables (complement to Extern.h)
- XMakeDesc.c: create a pattern descriptor for one pattern, including
- X skip tables, etc.
- XMakeSkip.c: make the skip tables for one pattern
- XMakefile: you can figure this one out for yourself
- XMatchFound.c: what to do when you actually FIND a pattern - print it,
- X update flags, etc.
- XMkDescVec.c: make a vector of pattern descriptors, given a string
- X of newline-separated patterns
- XMoveResidue.c: when you come to the end of the buffer, move the
- X unsearched "residue" to the beginning and start again
- XPrintLine.c: print the appropriate stuff after finding a match
- XPutUsage.c: mini-man page.
- XREADME: this file
- XSearch.c: the guts. Implements B-M algorithm given a pattern, skip
- X tables for the pattern, and a buffer to search
- Xbm.c: mainline. mostly interpreting the command line and tidying
- X up at the end. Calls Execute for each file.
- Xbm.h: constants
- Xbm.p: man page
- e-o-f
- echo Extracting bm.p
- sed 's/^X//' > bm.p << 'e-o-f'
- X.TH BM PUBLIC "8 July 1985"
- X.UC 4
- X.SH NAME
- Xbm \- search a file for a string
- X.SH SYNOPSIS
- X.B /usr/public/bm
- X[ option ] ...
- X[ strings ]
- X[ file ]
- X.SH DESCRIPTION
- X.I Bm
- Xsearches the input
- X.I files
- X(standard input default) for lines matching a string.
- XNormally, each line found is copied to the standard output.
- XIt is blindingly fast.
- X.I Bm
- Xstrings are fixed sequences of characters:
- Xthere are no wildcards, repetitions, or other features
- Xof regular expressions.
- XBm is also case sensitive.
- XThe following options are recognized.
- X.TP
- X.B \-x
- X(Exact) only lines matched in their entirety are printed
- X.TP
- X.B \-l
- XThe names of files with matching lines are listed (once) separated by newlines.
- X.TP
- X.B \-c
- XOnly a count of the number of matches
- Xis printed
- X.TP
- X.B \-e "string"
- XThe string is the next argument after the
- X.B \-e
- Xflag. This allows strings beginning with '-'.
- X.TP
- X.B \-h
- XNo filenames are printed, even if multiple files are searched.
- X.TP
- X.B \-n
- XEach line is preceded by the number
- Xof characters from the beginning of the file
- Xto the match.
- X.TP
- X.B \-s
- XSilent mode. Nothing is printed (except error messages).
- XThis is useful for checking the error status.
- X.TP
- X.BI \-f " file"
- XThe string list
- Xis taken from the
- X.I file.
- X.LP
- XUnless the
- X.B \-h
- Xoption is specified
- Xthe file name is shown if there is more than one input file.
- XCare should be taken when using the characters $ * [ ^ | ( ) and \\ in the
- X.I strings
- X(listed on the command line)
- Xas they are also meaningful to the Shell. It is safest to enclose the entire
- X.I expression
- Xargument in single quotes \' \'.
- X.LP
- X.I Bm
- Xsearches for lines that contain one of the (newline-separated)
- X.I strings,
- Xusing
- Xthe Boyer-Moore algorithm.
- XIt is far superior in terms of speed to the grep (egrep, fgrep)
- Xfamily of pattern matchers for fixed-pattern searching,
- Xand its speed increases with pattern length.
- X.SH "SEE ALSO"
- Xgrep(1)
- X.SH DIAGNOSTICS
- XExit status is 0 if any matches are found,
- X1 if none, 2 for syntax errors or inaccessible files.
- X.SH AUTHOR
- XPeter Bain (pdbain@wateng), with modifications suggested by John Gilmore
- X.SH BUGS
- XOnly 100 patterns are allowed.
- X.LP
- XPatterns may not contain newlines.
- X.LP
- XIf a line (delimited by newlines, and the beginning and end of the file)
- Xis longer than 8000 charcters (e.g. in a core dump),
- Xit will not be completely printed.
- X.LP
- XIf multiple patterns are specified, the order of the ouput lines is not
- Xnecessarily the same as the order of the input lines.
- X.LP
- XA line will be printed once for each different string on that line.
- X.LP
- XThe algorithm cannot count lines.
- X.LP
- XThe
- X.B -n
- Xand
- X.B -c
- Xwork differently from fgrep.
- X.LP
- XThe
- X.B -v,
- X.B -i,
- Xand
- X.B -b
- Xare not available.
- e-o-f
- exit 0
-
-