home *** CD-ROM | disk | FTP | other *** search
-
- /*
- *
- * Probe's multiple link eliminator for Matt's free for all links page
- * Version 1.0
- * Usama Wazeer (usamaw@cs.utexas.edu)
- * URL: http://www.cs.utexas.edu/users/usamaw
- *
- * # Define the Output and Input files below..
- * DONT define the same file for both this will
- * delete your file and you will lose all data.
- * # Compile this using your favorite C compiler..
- * ex: gcc -o urlchk urlchk.c
- * # and then just run: urlchk
- *
- * The program will display the line number and entry for each
- * link that is repeated and create the output file.
- *
- * Feel free to copy or change this program in any way,
- * as long as you give me credit. :)
- *
- */
-
- /******** DEFINE THESE TWO VARIABLES ********/
-
- #define INPUT_FILE "/u/usamaw/www/links.html"
- #define OUTPUT_FILE "/u/usamaw/www/links.html.out"
-
- /********************************************/
-
- #include <stdio.h>
- #include <string.h>
-
- typedef struct URLType
- {
- char url[150];
- struct URLType *next;
- } URLStruct;
-
- URLStruct *URLList;
-
- #ifndef NULL
- #define NULL(type) (type)0;
- #endif
-
- URLStruct *init_urllist( void )
- {
- URLStruct *l_list;
- l_list = (URLStruct *) malloc (sizeof(l_list));
- l_list = NULL;
- return (l_list);
- }
-
- URLStruct *find_url(char *userhost)
- {
- URLStruct *User;
-
- if (!userhost)
- return NULL;
-
- for( User = URLList; User; User = User->next )
- if( !strcasecmp( User->url, userhost ) )
- return(User);
- return(NULL);
- }
-
- int readln_from_a_file( FILE *stream, char *lin)
- {
- char *p;
-
- do
- p = fgets( lin, 1000, stream );
- while( ( p != NULL ) && ( *lin == '#') );
-
- if( p == NULL )
- return( 0 );
- if (strchr(lin, '\n'))
- *strchr(lin, '\n') = '\0';
- if (strchr(lin, '\r'))
- *strchr(lin, '\r') = '\0';
- return( 1 );
- }
-
- char *furl( char *userhost )
- {
- URLStruct *dummy;
-
- if( (dummy = find_url(userhost)) != NULL )
- return (dummy->url);
- return(NULL);
- }
-
- int add_to_urllist( char *url)
- {
- URLStruct *New_user;
- char buffer[200];
-
- if( (New_user = find_url(url)) != NULL )
- return 0;
- if( (New_user = (URLStruct *) malloc (sizeof(*New_user))) == NULL)
- return 0;
-
- strcpy(New_user->url, url);
- New_user->next = URLList;
- URLList = New_user;
- return 1;
- }
-
- int checkurl(char *url2)
- {
- char *url;
- url = strtok(url2, ">");
-
- if(furl(url))
- return 0;
- else
- {
- add_to_urllist(url);
- return 1;
- }
- }
- int read_urllist( char *filename, char *filename2 )
- {
- FILE *fp;
- int i = 0;
- char lin[2000];
- char url2[200];
- char rest[2000];
- URLStruct *dummy;
- FILE *list_file;
-
- if( ( fp = fopen( filename, "r" ) ) == NULL )
- return 0;
-
- if( ( list_file = fopen( filename2, "w" ) ) == NULL )
- return 0;
-
- for( dummy = URLList; dummy; dummy = dummy->next )
- free(dummy);
-
- URLList = init_urllist();
-
- while( readln_from_a_file( fp, lin) )
- {
- i++;
- strcpy(url2, "");
- sscanf(lin, "<li><a href=%s %s</a>\n", url2, rest);
- if(!*url2)
- fprintf( list_file, "%s\n", lin);
- else
- {
- if(checkurl(url2))
- fprintf( list_file, "%s\n", lin);
- else
- printf("%-4i Entry: %s \n", i, lin);
- }
- }
- fclose( fp );
- fclose( list_file );
- return( 1 );
- }
-
- int main()
- {
- printf("Here we go.... \n\n");
- read_urllist(INPUT_FILE, OUTPUT_FILE);
- printf("\n\nAll done!!!!!\n");
- }
-
-
-
-