home *** CD-ROM | disk | FTP | other *** search
- Subject: v06i047: 'Globbing' library routine (glob)
- Newsgroups: mod.sources
- Approved: rs@mirror.UUCP
-
- Submitted by: seismo!mcvax!guido (Guido van Rossum)
- Mod.sources: Volume 6, Issue 47
- Archive-name: glob
-
- [ A few items: 1) If you use csh, you have to invoke the test program
- at "./glob"; 2) A comment in the code says [...] is not implemented;
- it is; 3) As the README says, some sites will have to add -Dindex=strchr
- to the CFLAGS variable in the Makefile; 4) This uses the opendir()
- family of routines. --r$ ]
-
- : This is a shell archive.
- : Extract with 'sh this_file'.
- echo 'Start of glob -- expand meta-characters, part 01 out of 01:'
- echo 'x - Makefile'
- sed 's/^X//' > 'Makefile' << 'EOF'
- XCFLAGS=
- XLINTFLAGS=-abh
- XSRCS= glob.c main.c
- XOBJS= glob.o main.o
- XFILES= README $(SRCS) Makefile glob.3
- X
- Xglob: glob.o main.o
- X $(CC) -o glob $(CFLAGS) glob.o main.o
- X
- Xtags: $(SRCS)
- X ctags $(SRCS)
- X
- Xlint: $(SRCS)
- X lint $(LINTFLAGS) $(SRCS)
- X
- Xshar: $(FILES)
- X packmail -o shar -t 'glob -- expand meta-characters' $(FILES)
- EOF
- echo 'x - README'
- sed 's/^X//' > 'README' << 'EOF'
- XGlob -- expand meta-characters in file names -- by Guido van Rossum
- X
- XThis is a filename 'globbing' routine that I wrote. Besides expanding
- X*, ? and [...] exactly like sh(1), it also replaces initial ~ or ~user
- Xby user's home directory, replaces initial $var by the contents of the
- Xenvironment variable, and has a quoting mechanism whereby any of the
- Xmagic characters can be quoted by '\' (including '\' itself).
- X
- XIt is really intended for inclusion in some larger program, but there is
- Xa small test program that expands patterns given as arguments.
- X
- XYou should have received the following files:
- X
- X README this file
- X glob.c the code
- X main.c test program
- X glob.3 manual page
- X Makefile make file
- X
- XPortability issues: I have only been able to test it on a VAX running
- X4.3-beta, but I believe it can be ported to anything that supports
- Xopendir, readdir and closedir. If you don't have index, substitute
- Xstrchr using the -D flag to the C compiler. The program passes lint
- Xexcept for the usual complaints about malloc and strcpy.
- X
- XThis software is copyright (c) 1986 by Stichting Mathematisch Centrum.
- XYou may use, modify and copy it, provided this notice is present in all
- Xmodified or unmodified copies, and provided you don't make money for it.
- X
- X*** You may only distribute the set of files mentioned above as a whole. ***
- X
- XIf you find bugs, or have any other questions, please mail them to the
- Xauthor, whose address is:
- X
- X Guido van Rossum
- X CWI, dept. AA
- X Kruislaan 413
- X 1089 SJ Amsterdam
- X
- X Electronic mail: guido@mcvax.UUCP
- EOF
- echo 'x - glob.3'
- sed 's/^X//' > 'glob.3' << 'EOF'
- X.TH GLOB 3 "June 28, 1986"
- X.AT 3
- X.SH NAME
- Xglob \- expand meta-characters in file names
- X.SH SYNOPSIS
- X.nf
- X.B "int glob(pattern, buffer, size)
- X.B "char *pattern;
- X.B "char *buffer;
- X.B "unsigned int size;
- X.fi
- X.SH DESCRIPTION
- X.I Glob
- Xexpands the meta-characters *, ? and [...] in
- X.I pattern
- Xand copies the file name(s) that match the pattern to
- X.I buffer ,
- Xwhose length is given by
- X.I size .
- X.PP
- XBefore expansion is attempted, the pattern is processed as follows:
- Xan initial
- X.I ~user
- Xis replaced by
- X.I user's
- Xhome directory, an initial ~ is replaced by the contents of $HOME,
- Xand an initial
- X.I $variable
- Xis replaced by the contents of the environment variable by that name.
- XThe universal quoting character \e can be prefixed to *, ? and [, to
- Xinitial ~ or $, and to itself to prevent the special meaning.
- X.PP
- XThe expansion algorithm used is believed to give exactly the same
- Xresults as the expansion of meta-characters by the Bourne shell
- X.I sh (1),
- Xexcept for the different quoting mechanism and the expansion of ~ and $.
- XThe files are sorted.
- X.SH RETURN VALUE
- X.I Glob
- Xreturns the number of files matching the pattern that could be
- Xtransferred to the buffer.
- XIf no files match, the return value is 0 but the pattern is copied to
- Xthe buffer after expansion of ~, $ and \e-quotes.
- X.SH SEE ALSO
- Xsh(1)
- X.SH AUTHOR
- XGuido van Rossum, CWI, Amsterdam <guido@mcvax.uucp>
- X.SH DIAGNOSTICS
- XThere are no explicit diagnostics.
- XIf there is not enough memory to hold the internal lists of
- Xpartially-matching files, not all matching files may be found.
- XIf the buffer has not enough space to hold all matching files, as many
- Xas will fit are transferred to it.
- XIf
- X.I $variable
- Xhas no value, the empty string is substituted.
- XIf no expansion can be found for ~ or
- X.I ~user ,
- Xit is left in the pattern unchanged.
- XIf
- X.SH BUGS
- XThere should be an interface to the ``raw'' globbing algorithm, which
- Xcan return an arbitrary number of files in a ``malloced'' structure and
- Xdoes not do ~ or $ expansion or \e-quoting.
- X.PP
- XThe code uses the Berkeley directory-reading package, so in order to
- Xport it to v7 or sys5 you'll need to get hold of a public domain
- Xre-implementation of that package.
- X.PP
- XThe name is a relict from ancient times.
- EOF
- echo 'x - glob.c'
- sed 's/^X//' > 'glob.c' << 'EOF'
- X/* This software is copyright (c) 1986 by Stichting Mathematisch Centrum.
- X * You may use, modify and copy it, provided this notice is present in all
- X * copies, modified or unmodified, and provided you don't make money for it.
- X *
- X * Written 86-jun-28 by Guido van Rossum, CWI, Amsterdam <guido@mcvax.uucp>
- X */
- X
- X/*
- X * 'Glob' routine -- match * and ? in filename.
- X * Extra services: initial ~username is replaced by username's home dir,
- X * initial ~ is replaced by $HOME, initial $var is replaced by
- X * return value of getenv("var").
- X * Quoting convention: \ followed by a magic character inhibits its
- X * special meaning.
- X * BUGS: [...] is not implemented.
- X * Nonexisting $var is treated as empty string; nonexisting ~user
- X * is copied literally.
- X */
- X
- X#include <stdio.h>
- X#include <strings.h>
- X#include <ctype.h>
- X#include <pwd.h>
- X#include <sys/types.h>
- X#include <sys/dir.h>
- X
- Xstruct passwd *getpwnam();
- Xchar *getenv();
- Xchar *malloc();
- Xchar *realloc();
- X
- X#define EOS '\0'
- X#define SEP '/'
- X#define DOT '.'
- X#define QUOTE '\\'
- X
- X#define BOOL int
- X#define NO 0
- X#define YES 1
- X
- X#define MAXPATH 1024
- X#define MAXBASE 256
- X
- X#define META(c) ((char)((c)|0200))
- X#define M_ALL META('*')
- X#define M_ONE META('?')
- X#define M_SET META('[')
- X#define M_RNG META('-')
- X#define M_END META(']')
- X
- Xstruct flist {
- X int len;
- X char **item;
- X};
- X
- X/* Make a null-terminated string of the chars between two pointers */
- X/* (Limited length, static buffer returned) */
- X
- Xstatic char *
- Xmakestr(start, end)
- X char *start;
- X char *end;
- X{
- X static char buf[100];
- X char *p= buf;
- X
- X while (start < end && p < &buf[100])
- X *p++ = *start++;
- X *p= EOS;
- X return buf;
- X}
- X
- X/* Append string to buffer, return new end of buffer. Guarded. */
- X
- Xstatic char *
- Xaddstr(dest, src, end)
- X char *dest;
- X char *src;
- X char *end;
- X{
- X while (*dest++ = *src++) {
- X if (dest >= end)
- X break;
- X }
- X return --dest;
- X}
- X
- X/* Form a pathname by concatenating head, maybe a / and tail. */
- X/* Truncates to space available. */
- X
- Xstatic void
- Xformpath(dest, head, tail, size)
- X char *dest;
- X char *head;
- X char *tail;
- X unsigned int size; /* sizeof dest */
- X{
- X char *start= dest;
- X
- X for (;;) {
- X if (--size == 0)
- X goto out;
- X if ((*dest++ = *head++) == EOS)
- X break;
- X }
- X if (*tail != EOS && size != 0) {
- X --dest;
- X ++size;
- X if (dest > start && dest[-1] != SEP) {
- X *dest++ = SEP;
- X --size;
- X }
- X for (;;) {
- X if (--size == 0)
- X goto out;
- X if ((*dest++ = *tail++) == EOS)
- X break;
- X }
- X }
- X return;
- X
- X out: *dest= EOS;
- X}
- X
- X/* Find a user's home directory, NULL if not found */
- X
- Xstatic char *
- Xgethome(username)
- X char *username;
- X{
- X struct passwd *p;
- X
- X p= getpwnam(username);
- X if (p == NULL)
- X return NULL;
- X return p->pw_dir;
- X}
- X
- X/* String compare for qsort */
- X
- Xstatic int
- Xcompare(p, q)
- X char **p;
- X char **q;
- X{
- X return strcmp(*p, *q);
- X}
- X
- X/*
- X * Maintain lists of strings.
- X * When memory is full, data is lost but
- X */
- X
- Xstatic void
- Xaddfile(list, head, tail)
- X struct flist *list;
- X char *head;
- X char *tail;
- X{
- X char *str;
- X
- X str= malloc((unsigned) strlen(head) + strlen(tail) + 2);
- X
- X if (str == NULL)
- X return;
- X if (list->item == 0) {
- X list->len= 0;
- X list->item= (char**) malloc(sizeof(char*));
- X }
- X else {
- X list->item= (char**) realloc((char*)list->item,
- X (unsigned) (list->len+1)*sizeof(char*));
- X if (list->item == 0)
- X list->len= 0;
- X }
- X if (list->item != NULL) {
- X formpath(str, head, tail, MAXPATH);
- X list->item[list->len++]= str;
- X }
- X else
- X free(str);
- X}
- X
- X/* Clear the fields of a struct flist before first use */
- X
- Xstatic void
- Xclear(list)
- X struct flist *list;
- X{
- X list->len= 0;
- X list->item= NULL;
- X}
- X
- X/* Free memory held by struct flist after use */
- X
- Xstatic void
- Xdiscard(list)
- X struct flist *list;
- X{
- X int i= list->len;
- X
- X if (list->item != NULL) {
- X while (--i >= 0) {
- X if (list->item[i] != NULL)
- X free(list->item[i]);
- X }
- X free((char*)list->item);
- X list->item= NULL;
- X }
- X list->len= 0;
- X}
- X
- X/* Pattern matching function for filenames */
- X/* Each occurrence of the * pattern causes a recursion level */
- X
- Xstatic BOOL
- Xmatch(name, pat)
- X char *name;
- X char *pat;
- X{
- X char c, k;
- X BOOL ok;
- X
- X while ((c= *pat++) != EOS) {
- X switch (c) {
- X
- X case M_ONE:
- X if (*name++ == EOS)
- X return NO;
- X break;
- X
- X case M_ALL:
- X if (*pat == EOS)
- X return YES;
- X for (; *name != EOS; ++name) {
- X if (match(name, pat))
- X return YES;
- X }
- X return NO;
- X
- X case M_SET:
- X ok= NO;
- X k= *name++;
- X while ((c= *pat++) != M_END) {
- X if (*pat == M_RNG) {
- X if (c <= k && k <= pat[1])
- X ok= YES;
- X pat += 2;
- X }
- X else if (c == k)
- X ok= YES;
- X }
- X if (!ok)
- X return NO;
- X break;
- X
- X default:
- X if (*name++ != c)
- X return NO;
- X break;
- X
- X }
- X }
- X return *name == EOS;
- X}
- X
- X/* Perform pattern matching for one segment of the pathname */
- X
- Xstatic void
- Xsegment(files, mid, pat)
- X struct flist *files;
- X char *mid;
- X char *pat;
- X{
- X char path[MAXPATH];
- X int i;
- X DIR *dirp;
- X struct direct *dp;
- X struct flist new;
- X
- X clear(&new);
- X for (i= 0; i < files->len; ++i) {
- X strcpy(path, files->item[i]);
- X strcat(path, mid);
- X free(files->item[i]);
- X files->item[i]= NULL;
- X dirp= opendir(path);
- X if (dirp == NULL)
- X continue;
- X while ((dp= readdir(dirp)) != NULL) {
- X if (*dp->d_name == DOT && *pat != DOT)
- X ; /* No meta-match on initial '.' */
- X else if (match(dp->d_name, pat))
- X addfile(&new, path, dp->d_name);
- X }
- X closedir(dirp);
- X }
- X discard(files);
- X *files= new;
- X}
- X
- X/* Finish by matching the rest of the pattern (which has no metas) */
- X
- Xstatic void
- Xfindfiles(files, tail)
- X struct flist *files;
- X char *tail;
- X{
- X int i;
- X struct flist new;
- X char path[MAXPATH];
- X
- X if (*tail == EOS || files->len == 0)
- X return;
- X clear(&new);
- X for (i= 0; i < files->len; ++i) {
- X strcpy(path, files->item[i]);
- X strcat(path, tail);
- X free(files->item[i]);
- X files->item[i]= NULL;
- X if (access(path, 0) == 0)
- X addfile(&new, path, "");
- X }
- X discard(files);
- X *files= new;
- X}
- X
- Xstatic void
- Xglob1(pat, files)
- X char *pat;
- X struct flist *files;
- X{
- X char mid[MAXPATH];
- X char *end= mid;
- X char *basestart= mid;
- X BOOL meta= NO;
- X char c;
- X
- X clear(files);
- X addfile(files, "", "");
- X for (;;) {
- X switch (c= *pat++) {
- X
- X case EOS:
- X case SEP:
- X *end= EOS;
- X if (meta) {
- X if (basestart == mid)
- X segment(files, "", basestart);
- X else if (basestart == mid+1) {
- X static char sepstr[]= {SEP, EOS};
- X segment(files, sepstr, basestart);
- X }
- X else {
- X basestart[-1]= EOS;
- X segment(files, mid, basestart);
- X }
- X if (files->len == 0)
- X return;
- X end= basestart= mid;
- X meta= NO;
- X }
- X else if (c == EOS)
- X findfiles(files, mid);
- X if (c == EOS)
- X return;
- X *end++= c;
- X basestart= end;
- X break;
- X
- X case M_ALL:
- X case M_ONE:
- X case M_SET:
- X meta= YES;
- X /* Fall through */
- X default:
- X *end++ = c;
- X
- X }
- X }
- X}
- X
- X/*
- X * The main 'glob' routine: does $ and ~ substitution and \ quoting,
- X * and then calls 'glob1' to do the pattern matching.
- X * Returns 0 if file not found, number of matches otherwise.
- X * The matches found are appended to the buffer, separated by
- X * EOS characters. If no matches were found, the pattern is copied
- X * to the buffer after $ and ~ substitution and \ quoting.
- X */
- X
- Xint
- Xglob(pat, buf, size)
- X char *pat;
- X char *buf;
- X unsigned int size; /* sizeof buf */
- X{
- X char *p, *q;
- X char c;
- X struct flist files;
- X char *start= buf;
- X char *end= buf+size;
- X
- X c= *pat;
- X if (c == '~') {
- X p= ++pat;
- X while (*p != EOS && *p != SEP)
- X ++p;
- X if (p == pat) {
- X q= getenv("HOME");
- X if (q == NULL)
- X --pat;
- X else
- X buf= addstr(buf, q, end);
- X }
- X else {
- X q= gethome(makestr(pat, p));
- X if (q == NULL)
- X --pat;
- X else {
- X buf= addstr(buf, q, end);
- X pat= p;
- X }
- X }
- X }
- X else if (c == '$') {
- X p= ++pat;
- X while (isalnum(*p) || *p == '_')
- X ++p;
- X q= getenv(makestr(pat, p));
- X if (q != NULL)
- X buf= addstr(buf, q, end);
- X pat= p;
- X }
- X else if (c == QUOTE && (pat[1] == '$' || pat[1] == '~'))
- X ++pat;
- X
- X while (buf < end && (c= *pat++) != EOS) {
- X switch (c) {
- X
- X case QUOTE:
- X if ((c= *pat++) != EOS && index("\\*?[", c) != NULL)
- X *buf++ = c;
- X else {
- X *buf++ = QUOTE;
- X --pat;
- X }
- X break;
- X
- X case '*':
- X *buf++ = M_ALL;
- X break;
- X
- X case '?':
- X *buf++ = M_ONE;
- X break;
- X
- X case '[':
- X if (*pat == EOS || index(pat+1, ']') == NULL) {
- X *buf++ = c;
- X break;
- X }
- X *buf++ = M_SET;
- X c= *pat++;
- X do {
- X *buf++ = c;
- X if (*pat == '-' && (c= pat[1]) != ']') {
- X *buf++ = M_RNG;
- X *buf++= c;
- X pat += 2;
- X }
- X } while ((c= *pat++) != ']');
- X *buf++ = M_END;
- X break;
- X
- X default:
- X *buf++ = c;
- X break;
- X
- X }
- X }
- X *buf= EOS;
- X
- X glob1(start, &files);
- X
- X if (files.len == 0) {
- X /* Change meta characters back to printing characters */
- X for (buf= start; *buf != EOS; ++buf) {
- X if (*buf & 0200)
- X *buf &= ~0200;
- X }
- X return 0; /* No match */
- X }
- X else {
- X int i, len;
- X
- X qsort((char*)files.item, files.len, sizeof(char*), compare);
- X buf= start;
- X *buf= EOS;
- X for (i= 0; i < files.len; ++i) {
- X len= strlen(files.item[i]);
- X if (len+1 > size)
- X break;
- X strcpy(buf, files.item[i]);
- X buf += len+1;
- X size -= len+1;
- X }
- X discard(&files);
- X return i;
- X }
- X}
- EOF
- echo 'x - main.c'
- sed 's/^X//' > 'main.c' << 'EOF'
- X/*
- X * Main program to test 'glob' routine.
- X */
- X
- X#include <stdio.h>
- X
- Xextern int glob();
- X
- Xmain(argc, argv)
- X int argc;
- X char **argv;
- X{
- X int i, j, n;
- X char *p;
- X char buffer[10000];
- X
- X if (argc < 2) {
- X fprintf(stderr, "usage: %s pattern ...\n", argv[0]);
- X exit(2);
- X }
- X for (i= 1; i < argc; ++i) {
- X printf("%s: ", argv[i]);
- X n= glob(argv[i], buffer, sizeof buffer);
- X printf("%d\n", n);
- X p= buffer;
- X if (n == 0)
- X ++n;
- X for (j= 0; j < n; ++j) {
- X printf("\t%s\n", p);
- X p += strlen(p) + 1;
- X }
- X }
- X exit(0);
- X}
- EOF
- echo 'Part 01 out of 01 of glob -- expand meta-characters complete.'
- exit 0
-