home *** CD-ROM | disk | FTP | other *** search
- From: lee@sqarc.sq.com (Liam R. E. Quin)
- Newsgroups: alt.sources
- Subject: cmpall -- find identical (duplicated) files
- Message-ID: <1990Aug2.212411.3078@sq.sq.com>
- Date: 2 Aug 90 21:24:11 GMT
-
- I wrote cmpall some time ago, when I found that I had lots of copies and
- duplicated directory hierarchies.
-
- You say (for example)
- find $HOME | cmpall
- and it says
- same /home/lee/src/lqtext/doc/README /home/lee/doc/README
- and so forth.
-
- This version only compares files that have the same name and size.
- You could change this to compare files that have the same size if you
- wanted -- I wasn't too bothered by that. It won't (of course) detect
- duplicates if one is compressed.
-
- It works unchanged on SysV and SunOS, should be OK on other Unix systems.
-
- No manpage. Just do
- make cmpall
- (you don't even need a makefile).
-
- Let me know if it helps, or if you change it...
-
- Lee
-
- : To unbundle, sh this file
- echo x - cmpall.c 1>&2
- sed 's/^X//' >cmpall.c <<'@@@End of cmpall.c'
- X/* $Header: /home/lee/src/cmpall/cmpall.c,v 1.2 90/08/02 17:22:12 lee Exp Locker: lee $
- X *
- X */
- X
- X#include <stdio.h>
- X#include <malloc.h>
- X#include <errno.h>
- X#include <fcntl.h>
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X
- Xextern int errno;
- X
- Xtypedef struct s_listel {
- X char *Name;
- X struct stat *statbuf;
- X int n_seen;
- X struct s_listel *Next;
- X} t_listel;
- X
- X#define STREQ(henry,utzoo) ((*(henry)== *(utzoo))&&!strcmp(henry,utzoo))
- X#define new(type) ((type *) malloc(sizeof(type)))
- X
- Xint UseBytes = 0;
- Xchar *progname;
- X
- X/* read filenames a line at a time.
- X * whenever we get a file we've seen before, compare it
- X * with the other of the sames name.
- X * If they are the same, print "choose -s first second"
- X * otherwise, print "choose -d first second"
- X */
- Xmain(argc, argv)
- X int argc;
- X char *argv[];
- X{
- X t_listel *NewEntry();
- X void AddEntry();
- X char *GetLine();
- X char *Path;
- X
- X progname = argv[0];
- X
- X if (argc != 1) {
- X if (argc == 2 && STREQ(argv[1], "-b")) {
- X UseBytes = 1;
- X } else {
- X (void) fprintf(stderr, "usage: find <dir> ... -print | %s [-b]\n",
- X progname);
- X exit(1);
- X }
- X }
- X
- X while ((Path = GetLine(stdin, "/dev/stdin")) != (char *) 0) {
- X AddEntry(stdin, "/dev/stdin", Path);
- X }
- X
- X exit(0);
- X}
- X
- Xt_listel *SeenSoFar = (t_listel *) 0;
- X
- Xvoid
- XAddEntry(fd, Name, Path)
- X FILE *fd;
- X char*Name;
- X char *Path;
- X{
- X extern char *strrchr();
- X
- X t_listel *e;
- X t_listel **lpp;
- X
- X if ((e = new(t_listel)) == (t_listel *) 0) {
- X (void) fprintf(stderr, "no RAM reading \"%s\" ", Name);
- X /* possibly we dumped core just then... */
- X (void) fprintf(stderr, "at \"%s\"\n", Path);
- X exit(1);
- X }
- X
- X e->Name = Path;
- X e->n_seen = 1;
- X e->statbuf = (struct stat *) 0;
- X
- X for (lpp = &SeenSoFar; *lpp; lpp = &(*lpp)->Next) {
- X register char *Old = (*lpp)->Name;
- X int i;
- X char *PathLastSlash;
- X char *OldLastSlash;
- X
- X if ((PathLastSlash = strrchr(Path, '/')) == (char *) 0) {
- X PathLastSlash = Path;
- X } else {
- X ++PathLastSlash;
- X }
- X
- X if ((OldLastSlash = strrchr(Old, '/')) == (char *) 0) {
- X OldLastSlash = Old;
- X } else {
- X ++OldLastSlash;
- X }
- X
- X if (*PathLastSlash == *OldLastSlash) {
- X i = strcmp(PathLastSlash, OldLastSlash);
- X } else {
- X i = *PathLastSlash - *OldLastSlash;
- X }
- X
- X if (i < 0) continue; /* not there yet */
- X if (i == 0) {
- X /* found it... */
- X
- X /* If the files are the same, we should add them to a single
- X * linked list and traverse it at the end, I suppose!
- X */
- X if (Compare(fd, Name, *lpp, e) == 0) {
- X /* They were the same, so no point storing this one... */
- X (void) printf("same '%s' '%s'\n", (*lpp)->Name, e->Name);
- X if (e->statbuf) {
- X (void) free((char *) e->statbuf);
- X }
- X (void) free((char *) e);
- X (void) free(Path);
- X return;
- X }
- X /* the files were different, so add the new one to the list */
- X /* FALLTHROUGH */
- X }
- X break;
- X }
- X
- X /* lpp points either at the last "Next", or at the Next
- X * of the element before which we must insert a new entry...
- X */
- X
- X e->Next = (*lpp);
- X *lpp = e;
- X return;
- X}
- X
- X/*ARGSUSED*/
- Xint
- XCompare(fd, Name, Old, New)
- X FILE *fd;
- X char *Name;
- X t_listel *Old;
- X t_listel *New;
- X{
- X /* We return
- X * 0 if the files are identical
- X * -1 if we can't stat Path
- X * 1 if the files are (or might be) different
- X * 2 if they are definitely different
- X *
- X * If we can stat Path but not e, we return 3, and delete e
- X * from the list.
- X */
- X if (Old->statbuf == (struct stat *) 0) {
- X if ((Old->statbuf = new(struct stat)) == (struct stat *) 0) {
- X (void) fprintf(stderr, "reading \"%s\", no RAM to stat \"%s\"\n",
- X Name, Old->Name);
- X exit(1);
- X }
- X if (stat(Old->Name, Old->statbuf) < 0) {
- X Delete(Old);
- X return 3;
- X }
- X }
- X
- X if (New->statbuf == (struct stat *) 0) {
- X if ((New->statbuf = new(struct stat)) == (struct stat *) 0) {
- X (void) fprintf(stderr, "reading \"%s\", no RAM to stat \"%s\"\n",
- X Name, New->Name);
- X exit(1);
- X }
- X if (stat(New->Name, New->statbuf) < 0) {
- X return -1;
- X }
- X }
- X
- X /* if they are not normal files, they are the same if the major/minor
- X * numbers are the same:
- X */
- X /* NOTDONE */
- X
- X /** Now compare the files: */
- X
- X /* first, are they the same size? */
- X if (Old->statbuf->st_size != New->statbuf->st_size) {
- X return 2; /* definitely different */
- X }
- X
- X if (Old->statbuf->st_ino == New->statbuf->st_ino &&
- X Old->statbuf->st_dev == New->statbuf->st_dev &&
- X Old->statbuf->st_nlink == New->statbuf->st_nlink) {
- X
- X /* they are linked to each other, so they are the same! */
- X return 0; /* this is only really a heuristic */
- X }
- X
- X switch (CmpFile(Old->Name, New->Name)) {
- X case 0: return 0;
- X case 1: return 2; /* different! */
- X case -1: /* no a */
- X case -2: /* no b */
- X return -1;
- X default:
- X return -1;
- X }
- X}
- X
- Xint
- XCmpFile(a, b)
- X char *a, *b;
- X{
- X /* return 0 if same, -ve on error, 1 otherwise */
- X int afd = open(a, O_RDONLY, 0);
- X int bfd = open(b, O_RDONLY, 0);
- X char ablk[1024], bblk[1024];
- X int ar, br;
- X
- X if (afd < 0) return -1;
- X if (bfd < 0) return -2;
- X
- X while ((ar = read(afd, ablk, 1024)) == (br = read(bfd, bblk, 1024))) {
- X if ((ar == 0) || memcmp(ablk, bblk, ar) != 0) {
- X (void) close(afd);
- X (void) close(bfd);
- X /* ar == br == 0: got to EOF, they are the same */
- X return (ar == 0) ? 0 : 1;
- X } else if (ar < 0) {
- X return -3; /* read error! */
- X }
- X }
- X /* Since we use stat to check the size before calling this function,
- X * this should never happen... except in the case of a read error.
- X */
- X return 1; /* different lengths */
- X}
- X
- Xchar *
- XGetLine(fd, Name)
- X FILE *fd;
- X char *Name;
- X{
- X char *Result;
- X register char *p;
- X unsigned int len = 20;
- X int ch;
- X
- X if ((Result = malloc(len)) == (char *) 0) {
- X (void) fprintf(stderr, "out of mem reading \"%s\"\n", Name);
- X exit(1);
- X }
- X
- X p = Result;
- X
- X while ((ch = getc(fd)) != EOF) {
- X if (ch == '\n') break;
- X if (p - Result >= len - 1) {
- X int WhereWeWere = p - Result;
- X
- X if ((Result = realloc(Result, len += 20)) == (char *) 0) {
- X (void) fprintf(stderr, "no more mem in \"%s\"\n", Name);
- X exit(1);
- X }
- X
- X /* Realloc() might have returned a different address,
- X * so we must ensure that p points to the new array...
- X */
- X p = &Result[WhereWeWere];
- X }
- X *p++ = ch;
- X }
- X *p = '\0';
- X
- X if (ch == EOF) {
- X if (Result[0] != '\0') {
- X return Result;
- X }
- X return (char *) 0;
- X }
- X if (p - Result > 1) {
- X Result = realloc(Result, (unsigned) (p - Result + 1));
- X if (Result == (char *) 0) {
- X (void) fprintf(stderr, "can't save line in \"%s\"\n", Name);
- X exit(1);
- X }
- X }
- X return Result;
- X}
- X
- XDelete(e)
- X t_listel *e;
- X{
- X register t_listel **lpp;
- X
- X for (lpp = &SeenSoFar; *lpp; lpp = &(*lpp)->Next) {
- X if ((*lpp)->Name == e->Name) { /* same address, NOT strcmp! */
- X t_listel *next = e->Next;
- X
- X (void) free(e->Name);
- X if (e->statbuf) {
- X (void) free((char *) e->statbuf);
- X }
- X (void) free((char *) e);
- X *lpp = next;
- X
- X break;
- X }
- X }
- X}
- X
- X/*
- X * $Log: cmpall.c,v $
- X * Revision 1.2 90/08/02 17:22:12 lee
- X * delinted it a little...
- X *
- X * Revision 1.1 90/08/02 17:04:26 lee
- X * Initial revision
- X *
- X *
- X */
- @@@End of cmpall.c
- ls -l cmpall.c
- exit 0
-
- Lee
- --
- Liam R. E. Quin, lee@sq.com, {utai,utzoo}!sq!lee, SoftQuad Inc., Toronto
- ``He left her a copy of his calculations [...] Since she was a cystologist,
- she might have analysed the equations, but at the moment she was occupied
- with knitting a bootee.'' [John Boyd, Pollinators of Eden, 217]
-