home *** CD-ROM | disk | FTP | other *** search
- Subject: v13i068: Perform "set" operations on command line arguments
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rsalz@uunet.UU.NET
-
- Submitted-by: Chris Tweed <mcvax!caad.ed.ac.uk!chris@UUNET.UU.NET>
- Posting-number: Volume 13, Issue 68
- Archive-name: sets
-
- I am sending you a program called 'sets' which you may wish to consider
- for comp.unix.sources. Sets performs union, intersection, and difference
- operations on elements of two sets given on the command line. I have
- found it most useful for limiting the set of filenames I want another
- command to work on.
-
- For example, if I want to edit every file except those ending in '.c' I
- could type:
- vi `sets * -d *.c`
- which prints out the difference between the set of all files in the
- current directory and the set of all '.c' files in the same directory.
-
- Another example is to list the files in one directory which have the same
- names as those in another directory, i.e. the intersection of the two
- sets:
- sets * -i ../*
-
- Hope it's useful.
- Chris Tweed
- chris@caad.ed.ac.uk
- ..!mcvax!ukc!edcaad!chris
-
- # This is a shell archive. Remove anything before this line,
- # then unpack it by saving it in a file and typing "sh file".
- # Contents: sets.L Makefile sets.c
-
- echo x - sets.L
- sed 's/^@//' > "sets.L" <<'@//E*O*F sets.L//'
- @.TH SETS L "December 3, 1987" "" "Local UNIX Programmer's Manual"
- @.UC 4
- @.SH NAME
- sets \- performs set operations on its arguments
- @.SH SYNOPSIS
- @.B sets
- [-p] e1 e2 ... en [\-u] [\-d] [\-i] e1 e2 ... en
- @.br
- @.SH DESCRIPTION
- @.I Sets
- prints on the standard output stream the result of a
- @.B single
- set operation on
- two sets of elements provided on the command line.
- The sets are separated by the operator flag.
- The program collects the elements for each set, removes
- duplicated elements, and then performs the set operation.
- @.PP
- @.I Sets
- performs three set operations:
- @.TP
- @.B "e1 e2 ... en \-u[nion] e1 e2 ... en"
- prints the union of the two sets;
- @.TP
- @.B "e1 e2 ... en \-d[ifference] e1 e2 ... en"
- prints the set difference
- @.I "e1 e2 ... en"
- \-
- @.I "e1 e2 ... en;"
- @.TP
- @.B "e1 e2 ... en \-i[ntersection] e1 e2 ... en"
- prints the intersection of the two sets.
- @.PP
- As
- @.I sets
- is intended to be used on filenames it ignores leading pathnames
- in the set operations.
- The
- @.B \-p
- flag makes pathnames significant in membership tests.
- @.SH "USAGE"
- @.PP
- @.I Sets
- is most useful for restricting the files to be processed by some
- other command.
- For example, to
- @.I grep
- all files in a directory except the object files you might use:
- @.TP
- grep string `sets * -d *.o`
- @.PP
- Since by default leading pathnames are ignored,
- @.I sets
- can be used across directories \- for example, to list files
- with the same names in two directories:
- @.TP
- sets ../* -i *
- @.PP
- Note that full pathnames are included in the output.
- As a result the relative position
- of the sets on the command line is significant.
- The above command will print all matching names with a leading "../".
- If the position of the sets is reversed only the filenames will be printed.
- @.SH "FILES"
- @.PP
- /usr/local/sets
- @.br
- /usr/src/local/sets.c
- @.\" .SH "SEE ALSO"
- @.\" .SH "DIAGNOSTICS"
- @.\" .SH "BUGS"
- @.SH "AUTHOR"
- @.PP
- Chris Tweed
- @//E*O*F sets.L//
- chmod u=rw,g=r,o=r sets.L
-
- echo x - Makefile
- sed 's/^@//' > "Makefile" <<'@//E*O*F Makefile//'
- CFLAGS=-O
- PROG=sets
- FINAL=/usr/local/bin/$(PROG)
- MAN=/usr/man/manl/$(PROG).l
-
- $(PROG): $(PROG).c
- cc $(CFLAGS) -o $(PROG) $(PROG).c
-
- install: $(PROG)
- install -s ./$(PROG) $(FINAL)
- cp $(PROG).L $(MAN)
- @//E*O*F Makefile//
- chmod u=rw,g=r,o=r Makefile
-
- echo x - sets.c
- sed 's/^@//' > "sets.c" <<'@//E*O*F sets.c//'
- /*
- * sets - performs set operations on two sets of arguments and
- * prints the result on the standard output stream
- *
- * usage: sets [-p[aths]] e1 e2 ... en \-u[nion] e1 e2 ... en
- * OR
- * e1 e2 ... en \-d[ifference] e1 e2 ... en
- * OR
- * e1 e2 ... en \-i[ntersection] e1 e2 ... en
- *
- * This code may be freely distributed provided this comment
- * is not removed or substantially altered. Please mail me any
- * fixes, changes, or enhancements.
- *
- * Christopher Tweed, EdCAAD, University of Edinburgh, Scotland.
- * chris@caad.ed.ac.uk
- * ..mcvax!ukc!edcaad!chris
- *
- * 3 December 1987.
- *
- */
-
- #include <stdio.h>
-
- #define MAXSET 256 /* maximum size of a set */
-
- #define STREQ(s1, s2) (strcmp((s1), (s2)) == 0)
- #define NOT(p) ((p) == FALSE)
- #define NAME(s) ((ignorep == TRUE) ? nopath(s) : s)
-
- typedef enum { FALSE=0, TRUE } BOOLEAN;
- typedef enum { NULL_OP=0, UNION, DIFF, INTERSECT } OPERATOR;
-
- extern int strcmp();
- static void too_many();
- static void usage();
- static char *nopath();
- static BOOLEAN member();
- static BOOLEAN ignorep = TRUE;
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- int i, j; /* general purpose */
- BOOLEAN second = FALSE; /* flag set after operator */
- char *set1[MAXSET]; /* the first set */
- int n1 = 0; /* number of elements in first set */
- char *set2[MAXSET]; /* the second set */
- int n2 = 0; /* number of elements in second set */
- int n; /* number in each set */
- register OPERATOR op = NULL_OP; /* set operation to perform */
-
- if (argc < 2) {
- fprintf(stderr, "not enough arguments\n");
- (void) usage(argv[0]); /* EXITS */
- }
-
- n2 = n1 = 0;
- /* collect sets */
- while(--argc) {
- if (argv[1][0] == '-') {
- second = TRUE; /* found an operator */
- switch (argv[1][1]) {
- case 'u': /* set union */
- op = UNION;
- break;
- case 'd': /* set difference */
- op = DIFF;
- break;
- case 'i': /* set intersection */
- op = INTERSECT;
- break;
- case 'p': /* don't ignore paths */
- ignorep = FALSE;
- break;
- default:
- fprintf(stderr, "illegal set operator %c\n",
- argv[1][1]);
- (void) usage(argv[0]); /* EXITS */
- }
- } else {
- if (second == TRUE) {
- if (n2 == MAXSET)
- (void) too_many(); /* EXITS */
- set2[n2++] = argv[1];
- } else {
- if (n1 == MAXSET)
- (void) too_many(); /* EXITS */
- set1[n1++] = argv[1];
- }
- }
- argv++;
- }
-
- if (op == NULL_OP) {
- fprintf(stderr, "missing operator\n");
- (void) usage(argv[0]);
- }
-
- /* remove duplicates */
- n1 = nodups(set1, n1);
- n2 = nodups(set2, n2);
- /*
- * do set operation and print result
- *
- */
- n = (op == UNION) ? (n1 + n2) : n1;
- for (i = 0; i < n; i++) {
- switch(op) {
- case UNION:
- j = i - n1;
- if (i < n1)
- printf("%s ", set1[i]);
- else if (NOT(member(set2[j], set1, n1)))
- printf("%s ", set2[j]);
- break;
- case DIFF:
- if (member(set1[i], set2, n2) == FALSE) {
- printf("%s ", set1[i]);
- }
- break;
- case INTERSECT:
- if (member(set1[i], set2, n2) == TRUE) {
- printf("%s ", set1[i]);
- }
- break;
- }
- }
-
- printf("\n");
- exit(0);
- }
-
- /*
- * nodups(set, n)
- *
- * removes duplicates from set of n elements and returns number
- * of remaining elements in the set
- *
- */
-
- int
- nodups(set, n)
- char *set[];
- int n;
- {
- register int i;
- register int j;
- register int k;
- register int nn = n;
-
- /*
- * start at the top of the list
- *
- */
- for(i=n-1; i>0; i--)
- for(j=0; j<i; j++) {
- if (set[i][0] == set[j][0] && STREQ(set[i], set[j])) {
- set[i] = NULL; /* cancel the duplicate */
- /*
- * move everything above
- * the duplicate down one
- *
- */
- for(k=i+1; k<nn; k++) {
- set[k-1] = set[k];
- set[k] = NULL;
- }
- nn--;
- break;
- }
- }
- return nn;
- }
-
- /*
- * member(s, set, n)
- *
- * returns TRUE if string s is a member of set which has n members
- * otherwise return FALSE
- *
- */
-
- static BOOLEAN
- member(s, set, n)
- register char *s, *set[];
- register int n;
- {
- register int i;
-
- for (i = 0; i < n; i++)
- if (STREQ(NAME(s), NAME(set[i])))
- return TRUE;
-
- return FALSE;
- }
-
- /*
- * nopath(s)
- *
- * Strips leading path from s if necessary; otherwise
- * returns s.
- *
- */
-
- static char *
- nopath(s)
- char *s;
- {
- extern char *rindex();
- char *p;
-
- if (p=rindex(s, '/'))
- return ++p;
- else
- return s;
- }
-
- static void
- too_many()
- {
- fprintf(stderr, "too many members\n");
- exit(1);
- }
-
- static void
- usage(prog)
- char *prog;
- {
- char *set = "e1 e2 ... en";
-
- fprintf(stderr, "%s\t%s -u[nion] %s\n", prog, set, set);
- fprintf(stderr, "\t%s -d[ifference] %s\n", set, set);
- fprintf(stderr, "\t%s -i[ntersection] %s\n", set, set);
- fprintf(stderr, "\t-p[aths]\t/* don't ignore leading paths */\n");
- exit(1);
- }
- @//E*O*F sets.c//
- chmod u=rw,g=r,o=r sets.c
-
- echo Inspecting for damage in transit...
- temp=/tmp/shar$$; dtemp=/tmp/.shar$$
- trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
- cat > $temp <<\!!!
- 74 339 1851 sets.L
- 11 20 202 Makefile
- 238 763 4758 sets.c
- 323 1122 6811 total
- !!!
- wc sets.L Makefile sets.c | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
- if [ -s $dtemp ]
- then echo "Ouch [diff of wc output]:" ; cat $dtemp
- else echo "No problems found."
- fi
- exit 0
-
-
-