home *** CD-ROM | disk | FTP | other *** search
- /* WIDE AREA INFORMATION SERVER SOFTWARE:
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer.
-
- Brewster@think.com
- */
-
- /* Copyright (c) CNIDR (see ../COPYRIGHT) */
-
-
- /* this file defines a set of helper functions
- * for indexing common types of files.
- * -brewster 7/90
- */
-
- /* I encourage adding customizations.
- * (too bad they all have to be hard coded, but
- * C did not have convenient dynamic linking facilities)
- *
- * Add three functions to this file:
- * boolean foo_separator_function(char *line){}
- * void foo_header_function(char *line){}
- * long foo_date_function(char *line){}
- * void foo_finish_header_function(char *header){}
- *
- * then add the prototypes to ircfiles.h
- * then add the functions to the big case statement in waisindex.c
- *
- *
- * to do:
- * filter for digests
- *
- * Tracy pointed out 2 things which we should consider when redesigning the
- * parser:
- *
- * - there should be a way for the parser to decide to skip a section of
- * input text (ie. not index it). she does this by having global variable
- * which is set by her custom seperator function when it wants to tell
- * map_over_words() to not add the words on the current line
- *
- * - there should be a way to switch lexers depending what section of a
- * document you are in (since word separators will change). This is
- * needed by the european patent office too.
- *
- */
-
-
- /* Change log:
- * 8/90 brewster added the library customizations
- * 6/91 and before - added a bunch of other filters - JG
- * $Log: ircfiles.c,v $
- * Revision 1.3 93/07/21 18:45:34 warnock
- * Added new functions to handle listserv logs and STELAR-specific types
- *
- * Revision 1.2 93/07/19 16:31:50 warnock
- * Added document type URL from Nathan.Torkington@vuw.ac.nz
- *
- * Revision 1.1 1993/02/16 15:05:35 freewais
- * Initial revision
- *
- * Revision 1.34 92/05/06 17:28:23 jonathan
- * Added filename_finish_header_function. Puts leaf name into header.
- *
- * Revision 1.33 92/05/05 11:10:50 jonathan
- * Added fix to bibtex indexer to ignore subsequent "booktitles" after title
- * has been set. Thanks to Lutz Prechelt (prechelt@ira.uka.de).
- *
- * Revision 1.32 92/04/30 12:31:08 jonathan
- * Fixed syntax errors in OBJ C functions.
- *
- * Revision 1.31 92/04/29 14:08:57 shen
- * chnage catalaog header string to "Title:"
- *
- * Revision 1.30 92/04/26 14:45:08 brewster
- * debug ziff
- *
- * Revision 1.29 92/04/26 14:39:24 brewster
- * tweeked ziff filter
- *
- * Revision 1.28 92/04/25 21:14:05 brewster
- * added ziff
- *
- * Revision 1.27 92/04/20 15:21:06 morris
- * added todo's for tracy
- *
- * Revision 1.26 92/03/22 18:38:29 brewster
- * added objective C filter
- *
- * Revision 1.25 92/03/13 08:21:37 jonathan
- * Added length limits to scanf's in my_getdate, thanks to
- * sendall@dxpt01.cern.ch (Mike Sendall).
- *
- * Revision 1.24 92/02/29 20:13:54 jonathan
- * separated =- for some compilers that get confused (ULTRIX).
- *
- * Revision 1.23 92/02/20 09:50:14 jonathan
- * Added bibtex and nhyp filters from S.P.vandeBurgt@research.ptt.nl.
- *
- * Revision 1.22 92/02/12 13:11:25 jonathan
- * Changed library catalog functions for new format (from fad).
- *
- *
- *
- */
-
- #include <string.h>
- #include <ctype.h>
- #include "cutil.h"
- #include "ircfiles.h"
-
- extern char *current_filename;
- extern int current_filecount;
-
- #define MAX_HEADER_LEN 100
-
- #define MAX_AUTHOR_LEN 25
- #define MAX_DATE_LEN 4
-
- static char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL};
-
-
- static char* trim_trailing_newline _AP((char* string));
- static char* trim_leading_blanks _AP((char* string));
-
- static char*
- trim_trailing_newline(string)
- char* string;
- {
- if(string)
- if(strlen(string) > 0)
- if(string[strlen(string) -1] == '\n')
- string[strlen(string) -1] = '\0';
- return(string);
- }
-
- static char*
- trim_leading_blanks(string)
- char* string;
- {
- if(string)
- if(strlen(string) > 0)
- while(string[0] <= ' ')
- string++;
- return(string);
- }
-
-
- #ifdef BIO
-
- char bio_header1[MAX_HEADER_LEN + 1];
- char bio_header2[MAX_HEADER_LEN + 1];
-
-
- /* ==========================================
- *
- * === Genbank Flat-file Customizations ===
- *
- * d.g.gilbert, 15feb92,
- * gilbertd@bio.indiana.edu
- *
- * ==========================================
- */
-
- #define genbank_data_tab 12
- #define genbank_date_tab 63
-
- /* Genbank Flat-file format:
- LOCUS ACAAC01 1571 bp ds-DNA INV 05-NOV-1991 << start entry
- 12345678901234567890123456789012345678901234567890123456789012345678901234567890
- .........1.........2.........3.........4.........5.........6.........7.........8
- all data starts at tab=13
- on locus line, data starts at tab=63
- ...
- LOCUS blah << Start entry, index LOCUS_NAME, includes DATE
- DEFINITION blah << Index def line == HEADER line
- ACCESSION blah << Index acc line
- KEYWORDS blah << index keywords
- SOURCE blah << index source
- ORGANISM blah << index organism
- blah << index taxonomy
- blah << "
- AUTHORS blah << Index
- TITLE blah << Index
- blah << Index
- ANYOTHERS jazz << skipit
- // << end of entry == entry separator
- LOCUS ACAAC01 1571 bp ds-DNA INV 05-NOV-1991 << start entry
- DEFINITION Acanthamoeba castelani gene encoding actin I.
- ACCESSION V00002 J01016
- KEYWORDS actin.
- SOURCE Acanthamoeba castellanii DNA.
- ORGANISM Acanthamoeba castellanii
- Eukaryota; Animalia; Protozoa; Sarcomastigophora; Sarcodina;
- Rhizopoda; Lobosa; Gymnamoeba; Amoebida; Acanthopodina;
- Acanthamoebidae.
- REFERENCE 1 (bases 1 to 1571)
- AUTHORS Nellen,W. and Gallwitz,D.
- TITLE Actin genes and actin messenger RNA in Acanthamoeba castellani.
- Nucleotide sequence of the split actin gene I
- JOURNAL J. Mol. Biol. 159, 1-18 (1982)
- COMMENT SWISS-PROT; P02578; ACT1$ACACA.
- From EMBL 26 entry ACAC01; dated 22-JAN-1991.
-
- FEATURES Location/Qualifiers
- >>> ignore all features
-
- BASE COUNT 313 a 535 c 389 g 334 t
- ORIGIN
- 1 ggagaagcgt gcacgcaata accaagcgac agagcaacct ctctggcacc acgccccaca
- >>> ignore all seq data in indexing
- // <<< end of entry
- LOCUS ACAMHCA 5894 bp ds-DNA INV 30-SEP-1988
- ...
- *****/
-
-
- static boolean keepindexing = false;
-
- void genbank_filter_for_index(line)
- char* line;
- {
- /* check whether to index anything in line,
- * call this from genbank_header_function which is called for
- * each line.
- * Blank out parts of line not for indexing...
- */
- char *c;
- long i;
-
- if (strlen(line) <= genbank_data_tab) {
- for (c=line ; *c>=' '; c++) *c=' ';
- keepindexing= false;
- }
-
- else if (substrcmp(line, " ")) {
- /* most lines are like this, including nucleotides */
- if (!keepindexing) for (c=line ; *c>=' '; c++) *c=' ';
- }
-
- /*******
- else if (substrcmp(line, "LOCUS ")){
- // I think this is bad, locus not in index ...
- for (c=line, i=0; *c>=' ' && i<genbank_data_tab; i++, c++) *c=' ';
- for ( ; *c>' '; c++) ; // leave LOCUS ID intact
- for ( ; *c>=' '; c++) *c=' ';
- keepindexing= false;
- }
- ******/
-
- else if (
- substrcmp(line, "DEFINITION ")
- || substrcmp(line, "LOCUS ")
- || substrcmp(line, "ACCESSION ")
- || substrcmp(line, "KEYWORDS ")
- || substrcmp(line, "SOURCE ")
- || substrcmp(line, " ORGANISM ")
- || substrcmp(line, " AUTHORS ")
- || substrcmp(line, " TITLE ")
- ){
- for (c=line, i=0; *c>=' ' && i<genbank_data_tab; i++, c++) *c=' ';
- keepindexing= true;
- }
-
- else {
- for (c=line ; *c>=' '; c++) *c=' ';
- keepindexing= false;
- }
- }
-
-
-
- boolean genbank_separator_function(line)
- char *line;
- {
- /* !! with // as separator, we get // at top of entry which will
- screw up seqanal software... */
- /* if ((strlen(line) > 1) && (0==strncmp(line, "//", 2))){
- return(true);
- }
- */
- if ((strlen(line) > genbank_data_tab) && substrcmp(line, "LOCUS ")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- long genbank_getdate(line)
- char *line;
- {
- /* genbank date == 30-SEP-1988*/
- char date[255], *temp;
- int day, month, year;
- char cmonth[25];
-
- strcpy(date, line);
-
- temp = date;
-
- while(!isdigit(*temp)) temp++;
-
- /* sscanf(temp, "%d %s %d", &day, cmonth, &year); */
- sscanf(temp, "%d-%s-%d", &day, cmonth, &year);
-
- for(month = 0; months[month] != NULL; month++)
- /* if(!strcmp(cmonth, months[month])) break; */
- if(!strcasecmp(cmonth, months[month])) break; /* was stricmp !! */
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
- return 0;
- }
-
- long genbank_date_function(line)
- char *line;
- {
- if ((strlen(line) > genbank_data_tab) && substrcmp(line, "LOCUS ")){
- return(genbank_getdate(line+genbank_date_tab));
- }
- else
- return -1;
- }
-
-
-
- char *genbank_def = bio_header1;
- char *genbank_accession= bio_header2;
-
- void genbank_header_function(line)
- char *line;
- {
- if ((strlen(line) > genbank_data_tab) && substrcmp(line, "DEFINITION ") &&
- (strlen(genbank_def) == 0)){
- strncpy(genbank_def, line + genbank_data_tab, MAX_HEADER_LEN);
- trim_trailing_newline(genbank_def);
- }
-
- else if ((strlen(line) > genbank_data_tab) &&
- substrcmp(line, "ACCESSION ") &&
- (strlen(genbank_accession) == 0)){
- /* cut extra acc. numbers from this -- we want only 1st */
- char *cp;
- for (cp=line+genbank_data_tab; *cp==' '; cp++) ;
- strncpy(genbank_accession, cp, MAX_HEADER_LEN);
- cp= strchr(genbank_accession, ' ');
- if (cp!=NULL) *cp=0; /* drop after 1st */
- trim_trailing_newline(genbank_accession);
- }
-
- genbank_filter_for_index( line);
-
- }
-
- void genbank_finish_header_function(header)
- char *header;
- {
- if(strlen(genbank_def) != 0 && strlen(genbank_accession) != 0){
- strncpy(header, genbank_accession, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, genbank_def, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(genbank_def) != 0){
- strncpy(header, genbank_def, MAX_HEADER_LEN);
- }
- else if(strlen(genbank_accession) != 0){
- strncpy(header, genbank_accession, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- genbank_def[0] = '\0';
- genbank_accession[0] = '\0';
- }
-
-
- /* ==========================================
- *
- * === PIR Protein Customizations ===
- *
- * d.g.gilbert, 11Mar92,
- * gilbertd@bio.indiana.edu
- *
- * ==========================================
- */
-
- #define pir_data_tab 16
-
- /* pir Flat-file format: (lines are prepended with '|' to keep cpp happy)
- |ENTRY CCHU #Type Protein
- 12345678901234567890123456789012345678901234567890123456789012345678901234567890
- .........1.........2.........3.........4.........5.........6.........7.........8
- all data starts at tab=17 or further
-
- |ENTRY blah << Start entry, index it
- |TITLE blah << Index def line == HEADER line
- |ACCESSION blah << Index acc line
- |KEYWORDS blah << index keywords
- |SOURCE blah << index source
- |REFERENCE blah << Index
- |SUPERFAMILY blah << Index
- | blah << Index
- |ANYOTHERS jazz << skipit
- |any word starting w/ "#", skipit
- |/// << end of entry == entry separator
- |ENTRY CCHU #Type Protein
- |TITLE Cytochrome c - Human
- |DATE #Sequence 30-Sep-1991 #Text 30-Sep-1991
- |PLACEMENT 1.0 1.0 1.0 1.0 1.0
- |SOURCE Homo sapiens #Common-name man
- |ACCESSION A31764\ A05676\ A00001
- |REFERENCE
- | #Authors Evans M.J., Scarpulla R.C.
- | #Journal Proc. Natl. Acad. Sci. U.S.A. (1988) 85:9625-9629
- | #Title The human somatic cytochrome c gene: two classes of
- | processed pseudogenes demarcate a period of rapid
- | molecular evolution.
- | #Reference-number A31764
- | #Accession A31764
- | #Molecule-type DNA
- | #Residues 1-105 <EVA>
- | #Cross-reference GB:M22877
- |REFERENCE
- | #Authors Matsubara H., Smith E.L.
- | #Journal J. Biol. Chem. (1963) 238:2732-2753
- | #Reference-number A05676
- | #Accession A05676
- | #Molecule-type protein
- | #Residues 2-28;29-46;47-100;101-105 <MATS>
- |REFERENCE
- | #Authors Matsubara H., Smith E.L.
- | #Journal J. Biol. Chem. (1962) 237:3575-3576
- | #Reference-number A00001
- | #Comment 66-Leu is found in 10% of the molecules in pooled
- | protein.
- |GENETIC
- | #Introns 57/1
- |SUPERFAMILY #Name cytochrome c
- |KEYWORDS acetylation\ electron transport\ heme\
- | mitochondrion\ oxidative phosphorylation\
- | polymorphism\ respiratory chain
- |FEATURE
- | 2-105 #Protein cytochrome c (experimental)
- | <MAT>\
- | 2 #Modified-site acetylated amino end
- | (experimental)\
- | 15,18 #Binding-site heme (covalent)\
- | 19,81 #Binding-site heme iron (axial ligands)
- |SUMMARY #Molecular-weight 11749 #Length 105 #Checksum 3247
- |SEQUENCE
- | 5 10 15 20 25 30
- | 1 M G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G
- | 31 P N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I I W
- | 61 G E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E
- | 91 E R A D L I A Y L K K A T N E
- |///
- |
-
- *****/
-
-
- void pir_filter_for_index(line)
- char* line;
- {
- /* check whether to index anything in line,
- * call this from pir_header_function which is called for
- * each line.
- * Blank out parts of line not for indexing...
- */
- char *c;
- long i;
-
- if (strlen(line) <= pir_data_tab) {
- for (c=line ; *c>=' '; c++) *c=' ';
- keepindexing= false;
- }
-
- /* drop some ref junk that is not of much indexing interest... */
- else if (substrcmp(line, " #Reference-number ")
- || substrcmp(line, " #Residues ")
- || substrcmp(line, " #Accession ")
- || substrcmp(line, " #Residues ")
- || substrcmp(line, " #Cross-reference ")
- || substrcmp(line, " #Molecule-type ")
- || substrcmp(line, " #Journal ") ) {
- for (c=line ; *c>=' '; c++) *c=' ';
- /* keepindexing is based on last main keyword (ENTRY, REF...) */
- }
-
- else if (substrcmp(line, " ")) {
- /* some good & bad continuation lines start like this */
- if (!keepindexing) for (c=line ; *c>=' '; c++) *c=' ';
- }
-
- else if (
- substrcmp(line, "ENTRY ")
- || substrcmp(line, "TITLE ")
- || substrcmp(line, "SOURCE ")
- || substrcmp(line, "ACCESSION ")
- || substrcmp(line, "REFERENCE")
- /* REFERENCE line seems to have no data on line, but it follows (keepindexing) */
- || substrcmp(line, "SUPERFAMILY ")
- || substrcmp(line, "KEYWORDS ")
- ){
- for (c=line, i=0; *c>=' ' && i<pir_data_tab; i++, c++) *c=' ';
- keepindexing= true;
- }
-
- else {
- for (c=line ; *c>=' '; c++) *c=' ';
- keepindexing= false;
- }
-
- /* pir -- blank out #words */
- for (c=line; *c != 0; ) {
- if (*c=='#') do { *c++=' '; } while (*c > ' ');
- else c++;
- }
-
- }
-
-
-
- boolean pir_separator_function(line)
- char *line;
- {
- /* !! with /// as separator, we get /// at top of entry which will
- screw up seqanal software... */
- /* if ((strlen(line) > 1) && (0==strncmp(line, "///", 2))){
- return(true);
- }
- */
- if ((strlen(line) > pir_data_tab) && substrcmp(line, "ENTRY ")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
-
- long pir_date_function(line)
- char *line;
- { /* later maybe */
- return -1;
- }
-
-
-
- char *pir_def = bio_header1;
- char *pir_accession= bio_header2;
-
- void pir_header_function(line)
- char *line;
- {
- if ((strlen(line) > pir_data_tab) &&
- substrcmp(line, "TITLE ") &&
- (strlen(pir_def) == 0)){
- strncpy(pir_def, line + pir_data_tab, MAX_HEADER_LEN);
- trim_trailing_newline(pir_def);
- }
-
- else if ((strlen(line) > pir_data_tab) &&
- substrcmp(line, "ACCESSION ") &&
- (strlen(pir_accession) == 0)){
- /* cut extra acc. numbers from this -- we want only 1st */
- char *cp;
- for (cp=line+pir_data_tab; *cp==' '; cp++) ;
- strncpy(pir_accession, cp, MAX_HEADER_LEN);
- cp= strchr(pir_accession, ' ');
- if (cp!=NULL) *cp=0; /* drop after 1st */
- trim_trailing_newline(pir_accession);
- }
-
- pir_filter_for_index( line);
-
- }
-
- void pir_finish_header_function(header)
- char *header;
- {
- if(strlen(pir_def) != 0 && strlen(pir_accession) != 0){
- strncpy(header, pir_accession, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, pir_def, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(pir_def) != 0){
- strncpy(header, pir_def, MAX_HEADER_LEN);
- }
- else if(strlen(pir_accession) != 0){
- strncpy(header, pir_accession, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- pir_def[0] = '\0';
- pir_accession[0] = '\0';
- }
-
-
-
- /* ==========================================
- * === EMBL Flat-file Customizations ===
- * d.g.gilbert, 23Feb92,
- * ==========================================
- */
-
- #define embl_data_tab 5
-
- /* EMBL Flat-file format:
-
- ID BAAMYLA standard; DNA; PRO; 7872 BP.
- 1234567890
- XX
- AC X62835;
- XX
- DT 12-NOV-1991 (Rel. 29, Last updated, Version 1)
- DT 12-NOV-1991 (Rel. 29, Created)
- XX
- DE B.acidocaldarius amy gene for amylase
- XX
- KW amy gene; amylase.
- XX
- OS Bacillus acidocaldarius
- OC Prokaryota; Bacteria; Firmicutes; Endospore-forming rods and cocci;
- OC Bacillaceae; Bacillus.
- XX
- RN [1]
- RP 1-7872
- RA Hemila H.O.;
- RT ;
- RL Submitted (22-OCT-1991) on tape to the EMBL Data Library by:
- RL H.O. Hemila, Institute of Biotechnology, Valimotie 7, 00380
- RL Helsinki, FINLAND
- XX
- RN [2]
- RP 1-7872
- RA Koivula T., Hemilae H.;
- RT ;
- RL Unpublished.
- XX
- CC *source: strain=ATCC 27009;
- CC *source: clone_library=lambda gt-10;
- XX
- FH Key Location/Qualifiers
- FH
- FT -35_signal 3224..3229
- FT -10_signal 3246..3251
- FT RBS 3288..3294
- FT /note="amy gene"
- FT CDS 3297..7202
- FT /gene="amy" /product="amylase"
- FT CDS 7332..>7872
- FT /product="malE protein-homologue"
- XX
- SQ Sequence 7872 BP; 1615 A; 2240 C; 2473 G; 1544 T; 0 other;
- cgttcctcgt gccgtccgaa gcgttcccga cgaatctgcg cggcaccgcc gcgggatctc
- //
- *****/
-
-
-
- void embl_filter_for_index(line)
- char* line;
- {
- /* check whether to index anything in line,
- * call this from embl_header_function which is called for
- * each line.
- * Blank out parts of line not for indexing...
- */
- char *c;
- long i;
-
- if (strlen(line) <= embl_data_tab) {
- for (c=line; *c>=' '; c++) *c=' ';
- }
-
- else if (
- substrcmp(line, "DE ")
- || substrcmp(line, "ID ")
- || substrcmp(line, "AC ")
- || substrcmp(line, "KW ")
- || substrcmp(line, "OS ")
- || substrcmp(line, "OC ")
- || substrcmp(line, "RA ")
- || substrcmp(line, "RT ")
- ){
- for (c=line, i=0; *c>=' ' && i<embl_data_tab; i++, c++) *c=' ';
- }
-
- else {
- for (c=line ; *c>=' '; c++) *c=' ';
- }
- }
-
-
-
- boolean embl_separator_function(line)
- char *line;
- {
- /* !! with // as separator, we get // at top of entry which will
- screw up seqanal software... */
- /* if ((strlen(line) > 1) && (0==strncmp(line, "//", 2))){
- return(true);
- }
- */
- if ((strlen(line) > embl_data_tab) && substrcmp(line, "ID ")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- /* embl date == 30-SEP-1988 == genbank_date*/
-
- long embl_date_function(line)
- char *line;
- {
- if ((strlen(line) > embl_data_tab) && substrcmp(line, "DT ")){
- return(genbank_getdate(line+embl_data_tab));
- }
- else
- return -1;
- }
-
-
-
- char *embl_def = bio_header1;
- char *embl_accession= bio_header2;
-
- void embl_header_function(line)
- char *line;
- {
- if ((strlen(line) > embl_data_tab) &&
- substrcmp(line, "DE ") &&
- (strlen(embl_def) == 0)){
- strncpy(embl_def, line + embl_data_tab, MAX_HEADER_LEN);
- trim_trailing_newline(embl_def);
- }
-
- else if ((strlen(line) > embl_data_tab) &&
- substrcmp(line, "AC ") &&
- (strlen(embl_accession) == 0)){
- /* cut extra acc. numbers from this -- we want only 1st */
- char *cp;
- for (cp=line+embl_data_tab; *cp==' '; cp++) ;
- strncpy(embl_accession, cp, MAX_HEADER_LEN);
- cp= strchr(embl_accession, ' ');
- if (cp!=NULL) *cp=0; /* drop after 1st */
- trim_trailing_newline(embl_accession);
- }
-
- embl_filter_for_index( line);
-
- }
-
- void embl_finish_header_function(header)
- char *header;
- {
- if(strlen(embl_def) != 0 && strlen(embl_accession) != 0){
- strncpy(header, embl_accession, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, embl_def, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(embl_def) != 0){
- strncpy(header, embl_def, MAX_HEADER_LEN);
- }
- else if(strlen(embl_accession) != 0){
- strncpy(header, embl_accession, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- embl_def[0] = '\0';
- embl_accession[0] = '\0';
- }
-
-
-
- /* ==========================================
- *
- * === Prosite Dat & Doc Customizations ===
- *
- * d.g.gilbert, 18feb92,
- * gilbertd@bio.indiana.edu
- *
- * ==========================================
- */
-
- #define prositedat_data_tab 5
-
- /* Prosite DOC format:
-
- {END}
- {PDOC00002}
- {PS00002; GLYCOSAMINOGLYCAN}
- {BEGIN}
- *************************************
- * Glycosaminoglycan attachment site *
- *************************************
-
- Proteoglycans [1] are complex glycoconjugates consisting of a core protein to
- which a variable number of glycosaminoglycan chains (such as heparin sulfate,
- chondroitin sulfate, etc.) are covalently attached. The glycosaminoglycans are
- attached to the core proteins through a xyloside residue which is in turn is
- linked to a serine residue of the protein. A consensus sequence for the
- attachment site seems to exist [2]. However, it must be noted that this
- consensus is only based on the sequence of three proteoglycans core proteins.
-
- -Consensus pattern: S-G-x-G
- [S is the attachment site]
- Additional rule: There must be at least two acidic amino acids from -2 to -4
- relative to the serine.
- -Last update: June 1988 / First entry.
-
- [ 1] Hassel J.R., Kimura J.H., Hascall V.C.
- Annu. Rev. Biochem. 55:539-567(1986).
- [ 2] Bourdon M.A., Krusius T., Campbell S., Schwarz N.B.
- Proc. Natl. Acad. Sci. U.S.A. 84:3194-3198(1987).
- {END}
- {PDOC00003}
- {PS00003; SULFATATION}
- {BEGIN}
-
- *****/
-
- /* Prosite DAT format:
- //
- ID ASN_GLYCOSYLATION; PATTERN.
- 1234567890
- AC PS00001;
- DT APR-1990 (CREATED); APR-1990 (DATA UPDATE); APR-1990 (INFO UPDATE).
- DE N-glycosylation site.
- PA N-{P}-[ST]-{P}.
- CC /TAXO-RANGE=??E?V;
- CC /SITE=1,carbohydrate;
- CC /SKIP-FLAG=TRUE;
- DO PDOC00001;
- //
-
- *****/
-
- boolean prositedoc_separator_function(line)
- char *line;
- {
- if ((strlen(line) > strlen("{END}")) && substrcmp(line, "{END}")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
-
- char *prositedoc_def = bio_header1;
- char *prositedoc_accession= bio_header2;
-
- void prositedoc_header_function(line)
- char *line;
- {
- if ((strlen(line)>2) && (line[0]=='*') && (line[1]==' ') &&
- (strlen(prositedoc_def) == 0)){
- strncpy(prositedoc_def, line + 2, MAX_HEADER_LEN);
- trim_trailing_newline(prositedoc_def);
- }
- else if ((strlen(line)>2) && (line[0]=='{') &&
- (!substrcmp(line, "{END}")) &&
- (strlen(prositedoc_accession) == 0)){
- char *cp;
- strncpy(prositedoc_accession, line+1, MAX_HEADER_LEN);
- cp= strchr(prositedoc_accession, '}');
- if (cp!=NULL) *cp=0;
- trim_trailing_newline(prositedoc_accession);
- }
-
- }
-
- void prositedoc_finish_header_function(header)
- char *header;
- {
- if(strlen(prositedoc_def) != 0 && strlen(prositedoc_accession) != 0){
- strncpy(header, prositedoc_accession, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, prositedoc_def, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(prositedoc_def) != 0){
- strncpy(header, prositedoc_def, MAX_HEADER_LEN);
- }
- else if(strlen(prositedoc_accession) != 0){
- strncpy(header, prositedoc_accession, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- prositedoc_def[0] = '\0';
- prositedoc_accession[0] = '\0';
- }
-
-
- boolean prositedat_separator_function(line)
- char *line;
- {
- /* !! with // as separator, we get // at top of entry which will
- screw up seqanal software... */
- /* if ((strlen(line) > 1) && (0==strncmp(line, "//", 2))){
- return(true);
- }
- */
- if ((strlen(line) > prositedat_data_tab) && substrcmp(line, "ID ")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
-
- char *prositedat_def = bio_header1;
- char *prositedat_accession= bio_header2;
-
- void prositedat_header_function(line)
- char *line;
- {
- int i;
-
- if ((strlen(line) > prositedat_data_tab) &&
- substrcmp(line, "DE ") &&
- (strlen(prositedat_def) == 0)){
- strncpy(prositedat_def, line + prositedat_data_tab, MAX_HEADER_LEN);
- trim_trailing_newline(prositedat_def);
- }
-
- else if ((strlen(line) > prositedat_data_tab) &&
- substrcmp(line, "AC ") &&
- (strlen(prositedat_accession) == 0)){
- /* cut extra acc. numbers from this -- we want only 1st */
- char *cp;
- for (cp=line+prositedat_data_tab; *cp==' '; cp++) ;
- strncpy(prositedat_accession, cp, MAX_HEADER_LEN);
- cp= strchr(prositedat_accession, ' ');
- if (cp!=NULL) *cp=0; /* drop after 1st */
- trim_trailing_newline(prositedat_accession);
- }
-
- if (strlen(line) > prositedat_data_tab)
- for (i=0; i<prositedat_data_tab; i++) line[i]= ' ';
-
- }
-
- void prositedat_finish_header_function(header)
- char *header;
- {
- if(strlen(prositedat_def) != 0 && strlen(prositedat_accession) != 0){
- strncpy(header, prositedat_accession, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, prositedat_def, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(prositedat_def) != 0){
- strncpy(header, prositedat_def, MAX_HEADER_LEN);
- }
- else if(strlen(prositedat_accession) != 0){
- strncpy(header, prositedat_accession, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- prositedat_def[0] = '\0';
- prositedat_accession[0] = '\0';
- }
-
- /* ====================
- * Bio Journals
- * (modified EMBL format)
- * dgg
- * ==================
- */
-
- /******
- //
- RA Casida L.E. Jr.;
- 123456
- RT "Protozoan Response to the Addition of Bacterial Predators and Other
- RT Bacteria to Soil.";
- RL Appl. Environ. Microbiol. 55:1857-1859(1989).
- //
- RA Caldwell B.A., Ye C., Griffiths R.P., Moyer C.L., Morita R.Y.;
- RT "Plasmid Expression and Maintenance during Long-Term Starvation-Survival
- RT of Bacteria in Well Water.";
- RL Appl. Environ. Microbiol. 55:1860-1864(1989).
- //
- *******/
-
- #define biojournal_tab 5
-
- boolean biojournal_separator_function(line)
- char *line;
- {
- if ((strlen(line) > 1) && (0==strncmp(line, "//", 2))){
- return(true);
- }
- /* if ((strlen(line) > biojournal_tab) && substrcmp(line, "RA ")){
- return(true);
- }
- */
- else{
- return(false);
- }
- }
-
-
- char *biojournal_title = bio_header1;
- char *biojournal_author= bio_header2;
-
- void biojournal_header_function(line)
- char *line;
- {
- int i;
-
- if ((strlen(line) > biojournal_tab) && substrcmp(line, "RT ") &&
- (strlen(biojournal_title) == 0)){
- strncpy(biojournal_title, line + biojournal_tab, MAX_HEADER_LEN);
- trim_trailing_newline(biojournal_title);
- }
-
- else if ((strlen(line) > biojournal_tab) && substrcmp(line, "RA ") &&
- (strlen(biojournal_author) == 0)){
- char *cp;
- strncpy(biojournal_author, line+biojournal_tab, MAX_HEADER_LEN);
- cp= strchr(biojournal_author, ' ');
- if (cp!=NULL) *cp=0; /* drop after 1st */
- trim_trailing_newline(biojournal_author);
- }
-
- if (strlen(line) > biojournal_tab)
- for (i=0; i<biojournal_tab; i++) line[i]= ' ';
- }
-
- void biojournal_finish_header_function(header)
- char *header;
- {
- if(strlen(biojournal_title) != 0 && strlen(biojournal_author) != 0){
- strncpy(header, biojournal_author, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, biojournal_title, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- else if(strlen(biojournal_title) != 0){
- strncpy(header, biojournal_title, MAX_HEADER_LEN);
- }
- else if(strlen(biojournal_author) != 0){
- strncpy(header, biojournal_author, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Entry");
- }
- biojournal_title[0] = '\0';
- biojournal_author[0] = '\0';
- }
-
-
- /* ==========================================
- *
- * === Drosophila Redbook Customizations ===
- *
- * d.g.gilbert, 18feb92,
- * gilbertd@bio.indiana.edu
- * ==========================================
- */
-
- /*------ example
- |#Abnormal: see A
- |#abnormal abdomen: see a( )
- |#Abnormal abdomen: see A
- |# abnormal eye: see mit15
- |#abnormal oocytes: see abo
- |#abnormal tergites: see abt
- |#abnormal wings: see abw
- |#abo: abnormal oocyte
- | location: 2-44.0 (mapped with respect to J, 2-41).
- | origin: Naturally occurring allele recovered near Rome,
- | Italy.
- | references: Sandler, Lindsley, Nicoletti, and Trippa,
- | ...
- ----*/
-
-
- boolean redbook_separator_function(line)
- char *line;
- {
- if(*line=='#'){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char *redbook_header = bio_header1;
-
- void redbook_header_function(line)
- char *line;
- {
- if(redbook_separator_function(line)){
- strncpy(redbook_header, line + 1, MAX_HEADER_LEN);
- }
- }
-
- void redbook_finish_header_function(header)
- char *header;
- {
- if(strlen(redbook_header) == 0){
- strncpy(header, "Unknown", MAX_HEADER_LEN);
- }
- else{
- strncpy(header, redbook_header, MAX_HEADER_LEN);
- }
- redbook_header[0] = '\0';
- }
-
-
- /* ==========================================
- *
- * === Drosophila flybase Customizations ===
- *
- * d.g.gilbert, 18feb92,
- * gilbertd@bio.indiana.edu
- * ==========================================
- */
-
- /*----------------------------------------
- ::::::::::::::
- ABAUTOSY.TEXT
- ::::::::::::::
- LS(2)P6
- 24E-24F 28A-28D A
- LS(2)P11
- 25E-25F 35D A
- ::::::::::::::
- ABDELETE.TEXT
- ::::::::::::::
- Df(1)FM7
- 1A 1B2-1B3 Df |l(1)1Aa--ac|
- In(1)y3P$+L$-sc8$+R$-
- 1A 1B2-1B3 Df |y--ac|
- ::::::::::::::
- ABDUPLIC.TEXT
- ::::::::::::::
- In(1)sc8$+L$-EN$+R$-
- 1A 1B2-1B3 Dp |l(1)1Ac--ac|
- In(1)sc8$+L$-y3P$+R$-
- 1A 1B2-1B3 Dp |y--ac|
- ::::::::::::::
- ABINSERT.TEXT
- ::::::::::::::
- TE298
- 1E [] I
- TE276
- 3A1-3A2 [] I
- ::::::::::::::
- ABINVERT.TEXT
- ::::::::::::::
- In(1)l-v227
- 1-2 19-20 In
- In(1)y-G
- 1A 1C3-1C4 In |y--y|;;
- ::::::::::::::
- ABREFS.TEXT
- ::::::::::::::
- 3R3L.3R Novitski, Genetics 98:257
- B$+S$- v$++$- y$++Y$- Voelker, Genetics 107:279
- B$+S$-Ybb$+l$- Polembo, Molec.Gen.Genet. 195:35
- ::::::::::::::
- ABTRANSL.TEXT
- ::::::::::::::
- T(1;2)gl$++$-
- 1A 21C1 T
- T(1;2)y-v1
- 1A 39 T |y--y|;;
- T(1;2)SP55
- 1A 41 T
- ::::::::::::::
- ABTRANSP.TEXT
- ::::::::::::::
- Tp(3;1)pn36
- 1A 61A Tp
- Tp(1;1)Si2
- 1A1-1A8 14D2-14E1 Tp |r--r|;;
- Tp(1;1)Si2
- 1A1-1A8 18F Tp
- ::::::::::::::
- COSMID.TEXT
- ::::::::::::::
- 1A\S\T\U 0 0 0 23E12
- 1A\VS 0 ~50 BH\W 125H10
- 1B\M\U 0 0 0 88B3
- ::::::::::::::
- FUNCTION.TEXT
- ::::::::::::::
- 3-hydroxy-3-methylglutaryl-coenzyme-A-reductase 1.1.1.34 HmG-CoAR
- 6-pyruvoyl-tetrahydropterin-synthase pr
- 14-3-3-protein D14-3-3
- ::::::::::::::
- LOCI.TEXT
- ::::::::::::::
- 3S18; 3S18-element
- repetitive-element-3S18
- 4.5SRNA; 4.5SRNA
- 3-[21] 65A
- RNA-4.5S
- 5HT-R1; serotonin-receptor-1
- 3-[102] 100A
- serotonin-receptor
- transmembrane-protein
- G-protein-coupled-receptor
- 5HT-R2A; serotonin-receptor
- 2-[87] 56A-56B
- ::::::::::::::
- LZSYN.TEXT
- ::::::::::::::
- Acp-g1 AcpG
- Acr96A Acr96Aa
- Aldox-1 Aldox1
- ::::::::::::::
- MAP.TEXT
- ::::::::::::::
- 1-[0] 1A6 ? l(1)Ac
- 1-0.0 cc
- 1-0.0 clv-1
- ::::::::::::::
- REFS.TEXT
- ::::::::::::::
- 4.5SRNA Steffenson Genetics 110:s84
- 5HT-R1 Boschert 12th Europ.Dros.Conference
- 5HT-R2A Boschert 12th Europ.Dros.Conference
- ::::::::::::::
- SYNONYMS.TEXT
- ::::::::::::::
- 1C Pk36A
- 1J Pk91C
- 2sm$+lab$- sm
- 3-2 Pk45C
- ::::::::::::::
- UID.TEXT
- ::::::::::::::
- 00001;4.5SRNA
- 00002;5SRNA
- 00003;7SLRNA
- 00004;17.6
- -----------------------------*/
-
- /* need something like this for some doc formats.... */
-
-
-
- boolean flybase_separator_function(line)
- char *line;
- {
- if (isgraph(*line)) {
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char *flybase_header = bio_header1;
-
- void flybase_header_function(line)
- char *line;
- {
- char *c;
- int i;
-
- if (flybase_separator_function(line)) {
- for (c=line, i=0; isgraph(*c) && (i<MAX_HEADER_LEN); )
- flybase_header[i++]= *c++;
- flybase_header[i]= '\0';
- }
- }
-
- void flybase_finish_header_function(header)
- char *header;
- {
- if(strlen(flybase_header) == 0){
- strncpy(header, "Unknown", MAX_HEADER_LEN);
- }
- else{
- strncpy(header, flybase_header, MAX_HEADER_LEN);
- }
- flybase_header[0] = '\0';
- }
-
- /* ==========================================
- * DIN news -- like BIO but "***" separator
- * dgg
- */
-
- boolean din_hit_head = false;
- char din_header[MAX_HEADER_LEN + 1];
-
- boolean din_separator_function(line)
- char *line;
- {
- if ((strlen(line) >= 3) && substrcmp(line, "***")) {
- return(true);
- }
- else{
- return(false);
- }
- }
-
-
- void din_header_function(line)
- char *line;
- {
- if(din_hit_head /* we just hit a seperator previous to this */
- && strlen(line) > 3 /* line is valid */
- && isalnum(*line) /* and is word */
- && (!din_separator_function(line)) /* we are not on the separator now */
- && strlen(din_header) == 0){ /* and we have not saved the headline yet */
- strcpy(din_header, line);
- waislog(WLOG_MEDIUM, WLOG_INDEX, "storing line: %s", din_header);
- din_hit_head = false;
- }
- }
-
- void din_finish_header_function(header)
- char *header;
- {
- din_hit_head = true; /* turn on the flag */
- if(strlen(din_header) == 0){
- strcpy(header, "Unknown Title");
- }
- else{
- strcpy(header, din_header);
- }
- din_header[0] = '\0';
- }
-
-
-
- #endif /* BIO */
-
-
- /* =================================
- * === Groliers Customizations ===
- * =================================
- */
-
- boolean groliers_separator_function(line)
- char *line;
- {
- if((strlen(line) > strlen("ARTICLE")) &&
- substrcmp(line, "ARTICLE")){
- /* printf("hit %s\n", line); */
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char groliers_header[MAX_HEADER_LEN + 1];
-
- void groliers_header_function(line)
- char *line;
- {
- if(groliers_separator_function(line)){
- s_strncpy(groliers_header, line + strlen("ARTICLE") + 2, MAX_HEADER_LEN);
- }
- }
-
- void groliers_finish_header_function(header)
- char *header;
- {
- if(strlen(groliers_header) == 0){
- s_strncpy(header, "Unknown Title", MAX_HEADER_LEN);
- }
- else{
- s_strncpy(header, groliers_header, MAX_HEADER_LEN);
- }
- groliers_header[0] = '\0';
- }
-
-
- /* ==============================
- * === RMail Customizations ===
- * ==============================
- */
-
- /* this is just a preliminary version. A good version would
- * produce a headline like gnu emacs RMAIL
- */
-
-
- boolean mail_separator_function(line)
- char *line;
- {
- /* this should really look for a "<cr><cr>From " rather than "<cr>From " */
- if((strlen(line) > strlen("From ")) &&
- substrcmp(line, "From ")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- boolean rmail_separator_function(line)
- char *line;
- {
- if(0 == strcmp(line, "\n")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- /* This one is portable, but might get the wrong answer.
- I'm open to better code. - Jonny G
- */
-
-
- long my_getdate(line)
- char *line;
- {
- char date[255], *temp;
- int day, month, year;
- char cmonth[25], dow[5], tod[10];
-
- strcpy(date, line);
-
- temp = date;
-
- while(!isdigit(*temp)) temp++;
-
- sscanf(temp, "%d %25s %d", &day, cmonth, &year);
-
- for(month = 0; months[month] != NULL; month++)
- if(!strcmp(cmonth, months[month])) break;
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
-
- month = -1; day = -1; year = -1;
-
- sscanf(temp, "%d/%d/%d", &month, &day, &year);
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
-
- month = -1; day = -1; year = -1;
-
- sscanf(temp, "%d/%d/%d", &year, &month, &day);
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
-
- temp = date;
-
- sscanf(temp, "%5s %25s %d %10s %d", dow, cmonth, &day, tod, &year);
-
- for(month = 0; months[month] != NULL; month++)
- if(!strcmp(cmonth, months[month])) break;
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
-
- return 0;
- }
-
- long mail_date_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Date: ")) &&
- substrcmp(line, "Date: ")){
- return(my_getdate(line+6));
- }
- else if((strlen(line) > strlen("From ")) &&
- substrcmp(line, "From ")){
- char *p;
- #ifdef WIN32
- p = (char*)strchr(line+5, ' ');
- #else
- p = (char*)index(line+5, ' ');
- #endif
- if(p != NULL)
- return(my_getdate(p+1));
- }
- else return -1;
- }
-
-
-
- char mail_subject[MAX_HEADER_LEN + 1];
- char mail_from[MAX_HEADER_LEN + 1];
-
- void mail_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Subject: ")) &&
- substrcmp(line, "Subject: ") &&
- (strlen(mail_subject) == 0)){
- strcpy(mail_subject, "Re: ");
- s_strncat(mail_subject, line + strlen("Subject: "), MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(mail_subject);
- }
- else if((strlen(line) > strlen("From: ")) &&
- substrcmp(line, "From: ") &&
- (strlen(mail_from) == 0)){
- /* this should find the <foo@bar> field in the from list */
- s_strncpy(mail_from, line + strlen("From: "), MAX_HEADER_LEN);
- trim_trailing_newline(mail_from);
- }
-
- }
-
- void mail_finish_header_function(header)
- char *header;
- {
- if(strlen(mail_subject) != 0 &&
- strlen(mail_from) != 0){
- /* trim the from line if needed */
- if(strlen(mail_from) > 10){
- mail_from[10] = '\0';
- }
- s_strncpy(header, mail_from, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, mail_subject, MAX_HEADER_LEN, MAX_HEADER_LEN);
- /* printf("%s\n", header); */
- }
- else if(strlen(mail_subject) != 0){
- s_strncpy(header, mail_subject, MAX_HEADER_LEN);
- }
- else if(strlen(mail_from) != 0){
- s_strncpy(header, mail_from, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Subject");
- }
- mail_from[0] = '\0';
- mail_subject[0] = '\0';
- }
-
-
-
-
- boolean mail_or_rmail_separator(line)
- char *line;
- {
- static boolean blank_line = false;
-
- if((strlen(line) > strlen("From ")) &&
- substrcmp(line, "From ") &&
- blank_line == true){
- blank_line = false;
- return(true);
- }
-
- if(substrcmp(line, "")){
- blank_line = true;
- return(true);
- }
-
- if(!strcmp(line, "\n")){
- blank_line = true;
- }
- else{
- blank_line = false;
- }
-
- return(false);
- }
-
- #ifdef WIN32
- /* ========================================
- * === Microsoft Knowledge Base ====
- * ========================================
- */
-
- /*
-
- Format of Each MS Knowledge Base File:
-
- DOCUMENT:Q100650 01-MAR-1994 [O_LANMAN]
- TITLE :PRB: Internal Processing Error at 0270:0Bf8 with LMSM
- PRODUCT :Microsoft Lan Manager
- PROD/VER:2.10 2.10a
- OPER/SYS:OS/2
- KEYWORDS:
-
- ...
- */
-
- long mskbase_getdate(line)
- char *line;
- {
- /* genbank date == 30-SEP-1988*/
- char date[255], *temp;
- int day, month, year;
- char cmonth[25];
-
- strcpy(date, line);
-
- temp = date;
-
- while(!isdigit(*temp)) temp++;
-
- /* sscanf(temp, "%d %s %d", &day, cmonth, &year); */
- sscanf(temp, "%d-%s-%d", &day, cmonth, &year);
-
- for(month = 0; months[month] != NULL; month++)
- /* if(!strcmp(cmonth, months[month])) break; */
- if(!_stricmp(cmonth, months[month])) break; /* was stricmp !! */
-
- if (year > 99) year = year % 100;
-
- if(day > 0 &&
- month < 12 &&
- year > 0) {
- return (10000 * year + 100 * (month+1) + day);
- }
- return 0;
- }
-
- #define mskbase_date_tab 18
-
- long mskbase_date_function(line)
- char *line;
- {
- if ((strlen(line) > mskbase_date_tab) && substrcmp(line, "DOCUMENT:")){
- /* mskbase_getdate processes 01-MAR-1994 format date */
- return(mskbase_getdate(line+mskbase_date_tab));
- }
- else
- return -1;
- }
-
- char mskbase_title[MAX_HEADER_LEN + 1];
-
- void mskbase_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("TITLE :")) &&
- substrcmp(line, "TITLE :") &&
- (strlen(mskbase_title) == 0)){
- s_strncat(mskbase_title, line + strlen("TITLE :"), MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(mskbase_title);
- }
- }
-
- void mskbase_finish_header_function(header)
- char *header;
- {
- if(strlen(mskbase_title) == 0){
- strcpy(header, "Unknown Title");
- }
- else{
- s_strncpy(header, mskbase_title, MAX_HEADER_LEN);
- }
- mskbase_title[0] = '\0';
- }
- #endif /* WIN32 */
-
- /* ========================================
- * === Mail Digest Customizations ====
- * ========================================
- */
-
- boolean mail_digest_separator_function(line)
- char *line;
- {
- if((strlen(line) > strlen("-----------------------------")) &&
- substrcmp(line, "------------------------------")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- /* ========================================
- * === Listserv Digest Customizations ====
- * ========================================
- */
-
- char listserv_from[MAX_HEADER_LEN + 1];
- char listserv_subject[MAX_HEADER_LEN + 1];
-
- boolean listserv_digest_separator_function(line)
- char *line;
- {
- if((strlen(line) > strlen("========================================")) &&
- substrcmp(line,"========================================")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- long listserv_date_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Date: ")) && substrcmp(line, "Date: ")){
- return(my_getdate(line+6));
- }
- else if((strlen(line) > strlen("From: ")) &&
- substrcmp(line, "From: ") && (strlen(listserv_from) == 0)){
- /* this should find the <foo@bar> field in the from list */
- s_strncpy(listserv_from, line + strlen("From: "), MAX_HEADER_LEN);
- trim_trailing_newline(listserv_from);
- }
- else return -1;
- }
-
- void listserv_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Subject: ")) &&
- substrcmp(line, "Subject: ") && (strlen(listserv_subject) == 0)){
- strcpy(listserv_subject, "Re: ");
- s_strncat(listserv_subject, line + strlen("Subject: "), MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(listserv_subject);
- }
- else if((strlen(line) > strlen("From: ")) &&
- substrcmp(line, "From: ") && (strlen(listserv_from) == 0)){
- /* printf("1: ->%s<-\n",line); */
- /* this should find the <foo@bar> field in the from list */
- s_strncpy(listserv_from, line + strlen("From: "), MAX_HEADER_LEN);
- trim_trailing_newline(listserv_from);
- trim_leading_blanks(listserv_from);
- /* printf("2: ->%s<-\n",listserv_from); */
- }
-
- }
-
- void listserv_finish_header_function(header)
- char *header;
- {
- if(strlen(listserv_subject) != 0 && strlen(listserv_from) != 0){
- /* trim the from line if needed */
- if(strlen(listserv_from) > 15){
- listserv_from[15] = '\0';
- }
- trim_leading_blanks(listserv_from);
- s_strncpy(header, listserv_from, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, listserv_subject, MAX_HEADER_LEN, MAX_HEADER_LEN);
- /* printf("%s\n", header); */
- }
- else if(strlen(listserv_subject) != 0){
- s_strncpy(header, listserv_subject, MAX_HEADER_LEN);
- }
- else if(strlen(listserv_from) != 0){
- s_strncpy(header, listserv_from, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Subject");
- }
- listserv_from[0] = '\0';
- listserv_subject[0] = '\0';
- }
-
- /* ========================================
- * === Library Catalog Customizations ===
- * ========================================
- */
-
- #define TITLE_MARKER "Title: "
- #define FIRST_LINE_MARKER "Call No...."
-
- /* just use the title */
-
- boolean catalog_separator_function(line)
- char *line;
- {
- if (strstr(line, FIRST_LINE_MARKER)) {
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char catalog_header[MAX_HEADER_LEN + 1];
-
- void catalog_header_function(line)
- char *line;
- {
- char * title_start;
- if (title_start = strstr(line, TITLE_MARKER))
- {
- strncpy(catalog_header, title_start + strlen(TITLE_MARKER), MAX_HEADER_LEN);
- }
- }
-
- void catalog_finish_header_function(header)
- char *header;
- {
- if(strlen(catalog_header) == 0){
- strcpy(header, "Unknown Title");
- }
- else{
- s_strncpy(header, catalog_header, MAX_HEADER_LEN);
- }
- catalog_header[0] = '\0';
- }
-
-
-
- /* ============================
- * === Bio Customizations ===
- * ============================
- */
-
- /* customizations for a DB of genetic abstracts */
-
- boolean hit_header = false;
-
- boolean bio_separator_function(line)
- char *line;
- {
- if((strlen(line) > strlen(">>>")) &&
- substrcmp(line, ">>>")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char bio_header[MAX_HEADER_LEN + 1];
-
- void bio_header_function(line)
- char *line;
-
- {
- if(hit_header /* we just hit a seperator previous to this */
- && (!bio_separator_function(line)) /* we are not on the separator now */
- && strlen(bio_header) == 0){ /* and we have not saved the headline yet */
- strcpy(bio_header, line);
- waislog(WLOG_MEDIUM, WLOG_INDEX, "storing line: %s", bio_header);
- hit_header = false;
- }
- }
-
- void bio_finish_header_function(header)
- char *header;
-
- {
- hit_header = true; /* turn on the flag */
- if(strlen(bio_header) == 0){
- strcpy(header, "Unknown Title");
- }
- else{
- strcpy(header, bio_header);
- }
- bio_header[0] = '\0';
- }
-
- /* =================================
- * === CMApp Customizations ===
- * =================================
- */
-
- boolean cmapp_separator_function(line)
- char *line;
- {
- if((strlen(line) > strlen("@A")) &&
- substrcmp(line, "@A")){
- /* printf("hit %s\n", line); */
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char cmapp_header[MAX_HEADER_LEN + 1];
-
- void cmapp_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("APPLICATION:")) &&
- substrcmp(line, "APPLICATION:")){
- /* printf("hit %s\n", line); */
- s_strncpy(cmapp_header, line + strlen("APPLICATION:"), MAX_HEADER_LEN);
- }
- }
-
- void cmapp_finish_header_function(header)
- char *header;
- {
- if(strlen(cmapp_header) == 0){
- s_strncpy(header, "Unknown Title", MAX_HEADER_LEN);
- }
- else{
- s_strncpy(header, cmapp_header, MAX_HEADER_LEN);
- }
- cmapp_header[0] = '\0';
- }
-
- /* =================================
- * === Jargon Customizations ===
- * =================================
- *
- * GW - updated for Jargon File 2.9.8
- */
-
- /*
-
- Format of an entry:
-
- [blank line]
- :Title of This entry: first line of text of this entry
- second line of text of this entry
- third line of text of this entry
- [blank line]
-
- Any line which starts with a colon is considered to be the beginning
- of an entry.
-
- -GW
-
- */
-
- static int jargon_seen_entry = 0;
-
- boolean jargon_separator_function(line)
- register char *line;
- {
- if(!jargon_seen_entry && line[0] == ':')
- jargon_seen_entry = 1;
- return line[0] == ':';
- }
-
- char jargon_header[MAX_HEADER_LEN + 1];
-
- void jargon_header_function(line)
- char *line;
- {
- if(line[0] != ':')
- return;
-
- strncpy(jargon_header,line+1,MAX_HEADER_LEN);
- jargon_header[MAX_HEADER_LEN] = '\0';
-
- if(NULL != (line = strchr(jargon_header,':'))){
- if(line[1] == ':')
- line++;
- line++;
- line[0] = '\0';
- }
- }
-
- void jargon_finish_header_function(header)
- char *header;
- {
- if(jargon_seen_entry) {
- strncpy(header, jargon_header, MAX_HEADER_LEN);
- }
- jargon_header[0] = '\0';
- }
-
-
- /* =================================
- * === Internet Resource Guide ===
- * =================================
- */
-
-
- char irg_header[MAX_HEADER_LEN + 1];
- boolean irg_header_set = FALSE;
-
- boolean irg_separator_function(line)
- char *line;
- {
- if(line[0] == 12){ /* control L */
- irg_header_set = FALSE;
- return(true);
- }
- else
- return(false);
- }
-
- void irg_header_function(line)
- char *line;
- {
- if((irg_header_set == FALSE) &&
- (line[0] == 32 )){ /* space */
- s_strncpy(irg_header, line + strspn(line, " "), MAX_HEADER_LEN);
- irg_header_set = TRUE;
- }
-
- }
-
- void irg_finish_header_function(header)
- char *header;
- {
- if(strlen(irg_header) == 0){
- s_strncpy(header, "Unknown Title", MAX_HEADER_LEN);
- }
- else{
- s_strncpy(header, irg_header, MAX_HEADER_LEN);
- }
- irg_header[0] = '\0';
- irg_header_set = FALSE;
- }
-
- /* ========================
- * === Dash Separator ===
- * ========================
- */
-
-
- /*
- * dash-seperate entries
- * used in Introduction to Algorithms bug.list, suggestions, etc.
- * --------------------... at least 20 dashes
- * header
- * item
- * ..
- * --------------------... at least 20 dashes
- */
-
- boolean dash_separator_function(line)
- char *line;
- {
- if((strlen(line) > 20) && substrcmp(line,"--------------------")){
- /* printf("hit %s\n", line); */
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char dash_header[MAX_HEADER_LEN + 1];
-
- void dash_header_function(line)
- char *line;
- {
- if(!dash_separator_function(line) &&
- (strlen(dash_header) < (MAX_HEADER_LEN - 1))){
- s_strncat(dash_header, line,
- MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(dash_header);
- strncat(dash_header, " ", MAX_HEADER_LEN);
- }
- }
-
- void dash_finish_header_function(header)
- char *header;
- {
- if (strlen(dash_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, dash_header, MAX_HEADER_LEN);
- }
- dash_header[0] = '\0';
- }
-
-
- /* ============================
- * === one_line Separator ===
- * ============================
- */
-
- /* this is where each line is a document (good for databases) */
-
- boolean one_line_hit_header = false;
-
- boolean one_line_separator_function(line)
- char *line;
- {
- return(true);
- }
-
- char one_line_header[MAX_HEADER_LEN + 1];
-
- void one_line_header_function(line)
- char *line;
- {
- s_strncpy(one_line_header, line, MAX_HEADER_LEN);
- }
-
- void one_line_finish_header_function(header)
- char *header;
- {
- if (strlen(one_line_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, one_line_header, MAX_HEADER_LEN);
- }
- one_line_header[0] = '\0';
- }
-
- /* =============================
- * === Paragraph Separator ===
- * =============================
- */
-
- /* paragraph files - seperated by a blank line. Next line is the header */
-
- char para_header[MAX_HEADER_LEN +1];
- static boolean para_start = true;
-
- boolean para_separator_function(line)
- char *line;
- {
- if (para_start == true) {
- para_start = false;
- return true;
- }
- if (strlen(line) < 2)
- para_start = true;
- return false;
- }
-
- void para_header_function(line)
- char *line;
- {
- if (para_header[0] == 0)
- s_strncpy(para_header, line, MAX_HEADER_LEN);
- }
-
- void para_finish_header_function(header)
- char *header;
- {
- if (strlen(para_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, para_header, MAX_HEADER_LEN);
- }
- para_header[0] = 0;
- }
- /* ========================================
- * === INRIA DOC Customizations ====
- * a la netnews
- * ========================================
- */
-
- /* ottavj@sophia.inria.fr
- Inria Documents are produced by a texto query, each on a separate file
- */
- char inriadoc_title[MAX_HEADER_LEN + 1];
- char inriadoc_auth[MAX_HEADER_LEN + 1];
-
- void inriadoc_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Titre ")) &&
- substrcmp(line, "Titre ") &&
- (strlen(inriadoc_title) == 0)){
- strcpy(inriadoc_title, " : ");
- s_strncat(inriadoc_title, line + strlen("Titre "), MAX_HEADER_LEN,
- MAX_HEADER_LEN);
- trim_trailing_newline(inriadoc_title);
- }
- else if((strlen(line) > strlen("Auteur(s) ")) &&
- substrcmp(line, "Auteur(s) ") &&
- (strlen(inriadoc_auth) == 0)){
- strncpy(inriadoc_auth, line + strlen("Auteur(s) "), MAX_HEADER_LEN);
- trim_trailing_newline(inriadoc_auth);
- }
- }
-
- void inriadoc_finish_header_function(header)
- char *header;
- {
- if(strlen(inriadoc_title) != 0 &&
- strlen(inriadoc_auth) != 0){
- /* trim the auth line if needed */
- if(strlen(inriadoc_auth) > 40){
- inriadoc_auth[40] = '\0';
- }
- strncpy(header, inriadoc_auth, MAX_HEADER_LEN);
- s_strncat(header, " ", MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat(header, inriadoc_title, MAX_HEADER_LEN, MAX_HEADER_LEN);
- /* printf("%s\n", header); */
- }
- else if(strlen(inriadoc_title) != 0){
- strncpy(header, inriadoc_title, MAX_HEADER_LEN);
- }
- else if(strlen(inriadoc_auth) != 0){
- strncpy(header, inriadoc_auth, MAX_HEADER_LEN);
- }
- else{
- strcpy(header, "Unknown Title");
- }
- inriadoc_auth[0] = '\0';
- inriadoc_title[0] = '\0';
- }
-
-
-
- /* ==========================
- * === Seeker Separator ===
- * ==========================
- */
-
- boolean seeker_separator_function(line)
- char *line;
- {
- return(dash_separator_function(line));
- }
-
- char seeker_header[MAX_HEADER_LEN + 1];
- boolean in_headline = FALSE;
-
- void seeker_header_function(line)
- char *line;
- {
- if(strlen(line) > strlen("Headline:") &&
- substrcmp(line, "Headline:")){
- in_headline = TRUE;
- seeker_header[0] = '\0';
- /* printf("hit headline!\n"); */
- }
- else if(in_headline == TRUE &&
- (strlen(seeker_header) < (MAX_HEADER_LEN - 1))){
- s_strncat(seeker_header, line,
- MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(seeker_header);
- }
- }
-
- void seeker_finish_header_function(header)
- char *header;
- {
- if (strlen(seeker_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, seeker_header, MAX_HEADER_LEN);
- }
- seeker_header[0] = '\0';
- in_headline = TRUE;
- }
-
- /* ==========================
- * === RLIN Separator ===
- * ==========================
- */
-
- boolean rlin_separator_function(line)
- char *line;
- {
- return(dash_separator_function(line));
- }
-
- char rlin_header[MAX_HEADER_LEN + 1];
- boolean rlin_in_headline = FALSE;
-
- void rlin_header_function(line)
- char *line;
- {
- if(rlin_separator_function(line)){
- rlin_in_headline = TRUE;
- rlin_header[0] = '\0';
- /* printf("hit headline!\n"); */
- }
- else if(rlin_in_headline == TRUE &&
- (strlen(rlin_header) < (MAX_HEADER_LEN - 1))){
- s_strncat(rlin_header, line,
- MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(rlin_header);
- }
- }
-
- void rlin_finish_header_function(header)
- char *header;
- {
- if (strlen(rlin_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, rlin_header, MAX_HEADER_LEN);
- }
- rlin_header[0] = '\0';
- in_headline = TRUE;
- }
-
- /* ========================================
- * === MH_BBoard Customizations ====
- * ========================================
- */
-
- /* gcardwel@uci.edu
- MH bboards use a series of control A's to do a blank line.. yuk!
- */
-
- boolean mh_bboard_separator_function(line)
- char *line;
- {
- static boolean blank_line = false;
-
- if((strlen(line) > strlen("BBoard-ID: ")) &&
- substrcmp(line, "BBoard-ID: ") &&
- blank_line == true){
- blank_line = false;
- return(true);
- }
-
- if(!strcmp(line, "\001\001\001\001\n")){
- blank_line = true;
- }
- else{
- blank_line = false;
- }
- return (false);
- }
-
- /*
- * Customization for files saved from within the 'rn' newsreader.
- *
- * These can either be in 'mail' format, or they can be in a similar
- * format which starts each article with the pseudo-header
- * 'Article: 42 of comp.sys.foobar'. Other than that, we treat this
- * just like 'mail'.
- *
- * wollman@uvm.edu, Sun Sep 8 20:12:21 EDT 1991
- *
- * dgg added "Path:" fix for netnews/NNTP fetches (!NOT MAIL, NO "From ")
- * gilbertd@sunflower.bio.indiana.edu
- */
- boolean rn_separator_function(line)
- char *line;
- {
- if(!strncmp(line,"From ",5) ||
- !strncmp(line,"Path: ",6) ||
- !strncmp(line,"Article ",7) ||
- !strncmp(line,"Article: ",9))
- return true;
- return false;
- }
-
- /*
- * Customization for files saved NNTP netnews fetches (!NOT MAIL FORMAT, NO "From ".
- *
- * gilbertd@sunflower.bio.indiana.edu
- */
- boolean netnews_separator_function(line)
- char *line;
- {
- if(!strncmp(line,"From ",5) ||
- !strncmp(line,"Article ",7) ||
- !strncmp(line,"Article: ",9))
- return true;
- return false;
- }
-
- /*
- * Customizations for GNU Emacs Info files
- *
- * When indexing info files, the user must index the files with real text
- * in them, rather than the file with the tag and indirect tables; otherwise
- * you'll end up with lots of garbage in your index.
- *
- * G. Wollman
- */
-
- static int done_headline = 0;
-
- boolean emacs_info_separator_function(line) /* hate K&R-style definitions */
- char *line;
- {
- if(line[0] == (char)31) {
- done_headline = 0;
- return true;
- }
- return false;
- }
-
- static char emacs_info_headline[MAX_HEADER_LEN+1];
-
- void emacs_info_header_function(line)
- register char *line;
- {
- int i;
-
- if(done_headline)
- return;
-
- if(strncmp(line,"File: ",6))
- return;
-
- done_headline = 1;
- line += 6; /* skip over "File: " */
-
- i = 1;
- emacs_info_headline[0] = '(';
- while(*line && *line != ',' && (i < MAX_HEADER_LEN-1))
- emacs_info_headline[i++] = *line++;
-
- emacs_info_headline[i++] = ')';
-
- line += 9; /* skip over ", Node: " */
-
- /* copy the name of the info node into the headline */
- while(*line && (i < MAX_HEADER_LEN) && (*line != ','))
- emacs_info_headline[i++] = *line++;
-
- emacs_info_headline[i++] = '\0';
- }
-
- void emacs_info_finish_header_function(header)
- char *header;
- {
- strcpy(header,emacs_info_headline);
- }
-
- /* ========================================
- * === Medline Customizations ====
- * ========================================
- */
-
- /*
- Francois Schiettecatte
- with help from:
- Tom Emmel
- Karen Phipps
- */
-
- char medline_header[MAX_HEADER_LEN +1];
- char medline_title[MAX_HEADER_LEN + 1];
- char medline_date[MAX_HEADER_LEN + 1];
- char medline_author[MAX_HEADER_LEN + 1];
-
- static boolean medline_start = true;
-
-
- boolean medline_separator_function(line)
- char *line;
- {
- if (medline_start == true) {
- medline_start = false;
- return true;
- }
- if (strlen(line) < 2)
- medline_start = true;
- return false;
- }
-
-
- void medline_header_function(line)
- char *line;
- {
- char *ptr;
-
- if((strlen(line) > strlen("TI ")) &&
- (substrcmp(line, "TI "))){
- strncpy(medline_title, line + strlen("TI "), MAX_HEADER_LEN);
- }
-
- if((strlen(line) > strlen("SO ")) &&
- (substrcmp(line, "SO "))){
- ptr = strchr(line,'1');
- strncpy(medline_date, ptr, MAX_DATE_LEN);
- }
-
- if((strlen(line) > strlen("AU ")) &&
- (substrcmp(line, "AU "))){
- ptr = strtok(line + strlen("AU "),".,");
- strcpy(medline_author,ptr);
- strncat(medline_author, " ", MAX_AUTHOR_LEN);
- }
- }
-
- void medline_finish_header_function(header)
- char *header;
- {
- if(strlen(medline_author) > 0 ){
- strncat(medline_header,medline_author, MAX_HEADER_LEN);
- }
-
- if(strlen(medline_date) > 0 ){
- strncat(medline_header,"(", MAX_HEADER_LEN);
- strncat(medline_header,medline_date, MAX_HEADER_LEN);
- strncat(medline_header,") ", MAX_HEADER_LEN);
- }
-
- if(strlen(medline_title) > 0 ){
- strncat(medline_header,medline_title, MAX_HEADER_LEN);
- }
-
- if(strlen(medline_header) == 0){
- strcpy(header, "No Title");
- }
- else{
- strncpy(header, medline_header, MAX_HEADER_LEN);
- }
-
- medline_header[0] = '\0';
- medline_title[0] = '\0';
- medline_date[0] = '\0';
- medline_author[0] = '\0';
- }
-
-
-
-
- /* ========================================
- * === Refer Customizations ====
- * ========================================
- */
-
-
- /*
- Francois Schiettecatte
- with help from:
- Tom Emmel
- Karen Phipps
- */
-
- char refer_header[MAX_HEADER_LEN +1];
- char refer_title[MAX_HEADER_LEN + 1];
- char refer_date[MAX_HEADER_LEN + 1];
- char refer_author[MAX_HEADER_LEN + 1];
-
- static boolean refer_start = true;
-
-
- boolean refer_separator_function(line)
- char *line;
- {
- if (refer_start == true) {
- refer_start = false;
- return true;
- }
- if (strlen(line) < 2)
- refer_start = true;
- return false;
- }
-
-
- void refer_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("%T ")) &&
- (substrcmp(line, "%T "))){
- strncpy(refer_title, line + strlen("%T "), MAX_HEADER_LEN);
- }
- else if((strlen(line) > strlen("%B ")) &&
- (substrcmp(line, "%B ")) && (strlen(refer_title) == 0)){
- strncpy(refer_title, line + strlen("%B "), MAX_HEADER_LEN);
- }
-
- if((strlen(line) > strlen("%D ")) &&
- (substrcmp(line, "%D "))){
- strncpy(refer_date, line + strlen("%D "), MAX_DATE_LEN);
- }
-
- if((strlen(line) > strlen("%A ")) &&
- (substrcmp(line, "%A ")) && (strlen(refer_author) == 0)){
- strncpy(refer_author, line + strlen("%A "), MAX_AUTHOR_LEN);
- strncat(refer_author, " ", MAX_AUTHOR_LEN);
- }
- else if((strlen(line) > strlen("%E ")) &&
- (substrcmp(line, "%E ")) && (strlen(refer_author) == 0)){
- strncpy(refer_author, line + strlen("%E "), MAX_AUTHOR_LEN);
- strncat(refer_author, " ", MAX_AUTHOR_LEN);
- }
- }
-
- void refer_finish_header_function(header)
- char *header;
- {
- if(strlen(refer_author) > 0 ){
- strncat(refer_header,refer_author, MAX_HEADER_LEN);
- }
-
- if(strlen(refer_date) > 0 ){
- strncat(refer_header,"(", MAX_HEADER_LEN);
- strncat(refer_header,refer_date, MAX_HEADER_LEN);
- strncat(refer_header,") ", MAX_HEADER_LEN);
- }
-
- if(strlen(refer_title) > 0 ){
- strncat(refer_header,refer_title, MAX_HEADER_LEN);
- }
-
- if(strlen(refer_header) == 0){
- strncpy(header, "No Title", MAX_HEADER_LEN);
- }
- else{
- strncpy(header, refer_header, MAX_HEADER_LEN);
- }
-
- refer_header[0] = '\0';
- refer_author[0] = '\0';
- refer_date[0] = '\0';
- refer_title[0] = '\0';
- }
-
- #if 0 /* HTML, we already have such functions */
- /* ===========================================
- * === HTML - grab the header from <TITLE></TITLE>
- * Michael Nelson (m.l.nelson@larc.nasa.gov) 10/26/93
- * ===========================================
- */
-
- char html_header[MAX_HEADER_LEN +1];
-
- boolean html_separator_function(line)
- char *line;
- {
- return false;
- }
-
- void html_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("<TITLE>")) &&
- ((substrcmp(line, "<TITLE>")) || (substrcmp(line, "<title>")))){
- strncpy(html_header, line + strlen("<TITLE>"), MAX_HEADER_LEN);
- }
- }
-
- void html_finish_header_function(header)
- char *header;
- {
- if (strlen(html_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, html_header, MAX_HEADER_LEN);
- }
- html_header[0] = 0;
- }
- #endif /* HTML */
-
- /* ===========================================
- * === First Line Customizations ====
- * ===========================================
- */
-
- /* this means the first line of the file is the headline.
- useful for the lyrics server */
-
- /* paragraph files - seperated by a blank line. Next line is the header */
-
- char first_line_header[MAX_HEADER_LEN +1];
-
- boolean first_line_separator_function(line)
- char *line;
- {
- return false;
- }
-
- void first_line_header_function(line)
- char *line;
- {
- if (first_line_header[0] == '\0')
- s_strncpy(first_line_header, line, MAX_HEADER_LEN);
- }
-
- void first_line_finish_header_function(header)
- char *header;
- {
- if (strlen(first_line_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- s_strncpy(header, first_line_header, MAX_HEADER_LEN);
- }
- first_line_header[0] = 0;
- }
-
- /* =========================
- * === BIBTEX Separator ===
- * =========================
- * S.P.vandeBurgt@research.ptt.nl (Stan)
- *
- * BibTeX entries
- *
- * @......{
- * ......
- * title = header
- * .......}
- *
- */
-
- static char bibtex_header[MAX_HEADER_LEN + 1];
-
- boolean bibtex_separator_function(line)
- char *line;
- {
- char *p = line;
-
- while (isspace(*p)) p++; /* skip space */
- return(*p == '@');
- }
-
- void bibtex_header_function(line)
- char *line;
- {
- char *p = line;
-
- p = strstr(line, "title");
- if (p == NULL) p = strstr(line, "Title");
- if (p == NULL) p = strstr(line, "TITLE");
- if (p != NULL && (p == line || !isalpha(*(p-1))))
- {
- p += 5;
-
- while (isspace(*p)) p++; /* skip space */
- if (*p == '=') /* should be an '=' now */
- {
- p++;
- /* skip bibtex char's */
- while (isspace(*p) || *p == '"' || *p == '{') p++;
- strncpy(bibtex_header, p, MAX_HEADER_LEN);
- for (p = bibtex_header; *p != '\0'; p++)
- {
- /* replace bibtex char's */
- if (*p == '\n' || *p == '"' || *p == '}' || *p == '{')
- {
- *p = ' ';
- }
- }
- }
- }
- }
-
- void bibtex_finish_header_function(header)
- char *header;
- {
- if (bibtex_header[0] == '\0')
- {
- strcpy(header, "Unknown Title");
- }
- else{
- strncpy(header, bibtex_header, MAX_HEADER_LEN);
- }
- bibtex_header[0] = '\0';
- }
-
-
- /* =========================
- * === NHYP Separator ===
- * =========================
- * S.P.vandeBurgt@research.ptt.nl (Stan)
- * Nhyp entries
- *
- * ?:? header
- * ......
- * ......
- *
- */
-
- static char nhyp_header[MAX_HEADER_LEN + 1];
-
- boolean nhyp_separator_function(line)
- char *line;
- {
- return(strstr(line, "?:?") != NULL);
- }
-
- void nhyp_header_function(line)
- char *line;
- {
- char *p = line;
-
- p = strstr(line, "?:?");
- if (p != NULL)
- {
- p += 3;
- while (isspace(*p)) p++; /* skip space */
- strncpy(nhyp_header, p, MAX_HEADER_LEN);
- trim_trailing_newline(nhyp_header);
- }
- }
-
- void nhyp_finish_header_function(header)
- char *header;
- {
- if (nhyp_header[0] == '\0')
- {
- strcpy(header, "Unknown Title");
- }
- else{
- strncpy(header, nhyp_header, MAX_HEADER_LEN);
- }
- nhyp_header[0] = '\0';
- }
-
-
-
- /* ==========================
- * === Objective-C code ===
- * ==========================
- */
-
-
-
- /*----------------------- FSA -------------------*/
- #define fsa_max_edges 4
- #define fsa_error_state (-1)
-
-
- typedef struct
- {
- int if_input;
- int then_goto;
- }
- fsa_edge;
-
-
- /* action (if non-NULL) is excuted before transfer to next state is made */
- /* action takes as arg the int input that will decide the next state */
- typedef struct
- {
- int default_goto;
- int n_edges;
- fsa_edge edges[fsa_max_edges];
- int (*action)();
- }
- fsa_vertex;
-
-
- int fsa_step(input, state_p, table)
- int input;
- int *state_p;
- fsa_vertex *table;
- {
- int next_state, e;
- int (*this_action)();
-
-
- if(*state_p < 0) return(*state_p = fsa_error_state);
- this_action = table[*state_p].action;
- if(this_action) this_action(input);
- for(e=0; e<table[*state_p].n_edges; e++)
- if(input == table[*state_p].edges[e].if_input)
- { next_state = table[*state_p].edges[e].then_goto; break; }
- if(e >= table[*state_p].n_edges) next_state = table[*state_p].default_goto;
- if(next_state < 0) next_state = fsa_error_state;
- return(*state_p = next_state);
- }
-
-
- /* sends null char as last input, returns final state */
- int fsa_run(s, state_p, table)
- char *s;
- int *state_p;
- fsa_vertex *table;
- {
- char *p;
-
-
- for(p=s; *p; p++)
- fsa_step((int) *p, state_p, table);
- fsa_step(0, state_p, table);
- return(*state_p);
- }
-
-
- /*----------------------- end FSA -------------------*/
-
-
- static int wobjc_brace_level = 0;
- static int wobjc_paren_level = 0;
- static int wobjc_strip_state = 0;
- static int wobjc_context = 0;
- static boolean wobjc_separator = false;
- static char wobjc_class[MAX_HEADER_LEN+1];
- static char *wobjc_class_end = 0;
- static char wobjc_header[MAX_HEADER_LEN+1];
- static char *wobjc_header_end = 0;
-
-
- #define WOBJC_BLANK " \t\n\r"
- #define WOBJC_WORD "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_0123456789"
-
-
- /* Flag next line as separator, when context fsa says so. */
- static int wobjc_separate(input)
- int input;
- {
- return(wobjc_separator = true);
- }
-
-
- /* FSA to parse objective-C constructs. */
- static fsa_vertex wobjc_context_fsa[] =
- {
- { 0, 1, {{ '@', 1 }}}, /* look for objc constructs */
- { 0, 1, {{ 'i', 20 }}},
- { 3, 1, {{ ' ', 2 }}}, /* look for @imp class */
- { 4, 1, {{ 'A', 3 }}},
- { 4, 3, {{ '+', 6 },{ '-', 8 },{ '@', 10 }}},/* in @imp */
- { 4, 3, {{ '+', 6 },{ '-', 8 },{ '@', 10 }}, wobjc_separate},
- { 6, 1, {{ '{', 7 }}}, /* look for -method: */
- { 5, 1, {{ '{', 7 }}},
- { 8, 1, {{ '{', 9 }}}, /* look for +method: */
- { 5, 1, {{ '{', 9 }}},
- { 4, 1, {{ 'e', 11 }}}, /* look for @end of @imp */
- { 4, 1, {{ 'n', 12 }}},
- { 4, 1, {{ 'd', 0 }}},
- { 14, 1, {{ ' ', 13 }}}, /* look for @intf class */
- { 15, 1, {{ 'A', 14 }}},
- { 15, 1, {{ '@', 16 }}}, /* in @intf */
- { 15, 1, {{ 'e', 17 }}}, /* look for @end of @intf */
- { 15, 1, {{ 'n', 18 }}},
- { 15, 1, {{ 'd', 19 }}},
- { 0, 1, {{ '@', 1 }}, wobjc_separate},
- { 0, 2, {{ 'm', 21 },{ 'n', 33 }}}, /* look for @impl */
- { 0, 1, {{ 'p', 22 }}},
- { 0, 1, {{ 'l', 23 }}},
- { 0, 1, {{ 'e', 24 }}},
- { 0, 1, {{ 'm', 25 }}},
- { 0, 1, {{ 'e', 26 }}},
- { 0, 1, {{ 'n', 27 }}},
- { 0, 1, {{ 't', 28 }}},
- { 0, 1, {{ 'a', 29 }}},
- { 0, 1, {{ 't', 30 }}},
- { 0, 1, {{ 'i', 31 }}},
- { 0, 1, {{ 'o', 32 }}},
- { 0, 1, {{ 'n', 2 }}},
- { 0, 1, {{ 't', 34 }}}, /* look for @intf */
- { 0, 1, {{ 'e', 35 }}},
- { 0, 1, {{ 'r', 36 }}},
- { 0, 1, {{ 'f', 37 }}},
- { 0, 1, {{ 'a', 38 }}},
- { 0, 1, {{ 'c', 39 }}},
- { 0, 1, {{ 'e', 13 }}}
- };
-
-
- /* Action to be used by stripping fsa in non-commented, non-quoted state. */
- /* This runs context fsa. */
- static int wobjc_process_stripped_code(input)
- int input;
- {
- int context_input;
-
-
- switch(input)
- {
- /* Increment brace/paren levels as appropriate. */
- case '{': wobjc_brace_level++; break;
- case '}': if(wobjc_brace_level > 0) wobjc_brace_level--; break;
- case '(': wobjc_paren_level++; break;
- case ')': if(wobjc_paren_level > 0) wobjc_paren_level--; break;
- case '\"': break;
- case '\'': break;
- case '/': break;
-
- default:
- /* If in correct context and not in brace/paren/comment/quote, */
- /* then record header info. */
- if(wobjc_brace_level==0 && wobjc_paren_level==0)
- {
- /* Recording class or instance method. Ignore multiple blanks. */
- if(wobjc_context==6 || wobjc_context==8)
- {
- if(!wobjc_header_end || wobjc_header_end==wobjc_header)
- {
- strcpy(wobjc_header, (wobjc_context==6 ? "+[" : "-["));
- strcat(wobjc_header, wobjc_class);
- strcat(wobjc_header, " ");
- wobjc_header_end = wobjc_header+strlen(wobjc_header);
- }
- if((wobjc_header_end - wobjc_header)<(MAX_HEADER_LEN-5)
- && !(strchr(WOBJC_BLANK, *(wobjc_header_end-1))
- && strchr(WOBJC_BLANK, input)))
- { *wobjc_header_end++ = input; *wobjc_header_end = 0; }
- }
-
-
- /* Recording class name for @implementation or @interface. */
- if(strchr(WOBJC_WORD, input)
- && (wobjc_context==2 || wobjc_context==3
- || wobjc_context==13 || wobjc_context==14))
- {
- if(wobjc_context==2 || wobjc_context==13 || !wobjc_class_end)
- wobjc_class_end = wobjc_class;
- if(wobjc_context==13
- || (wobjc_context==14 && !wobjc_header_end))
- wobjc_header_end = wobjc_header;
- if((wobjc_class_end - wobjc_class_end)<(MAX_HEADER_LEN/2))
- { *wobjc_class_end++ = input; *wobjc_class_end = 0; }
- if((wobjc_context==13 || wobjc_context==14)
- && (wobjc_header_end-wobjc_header_end)<(MAX_HEADER_LEN/2))
- { *wobjc_header_end++ = input; *wobjc_header_end = 0; }
- }
- }
-
- /* Since not in comment/quote, run context fsa. */
- /* Input is modified like this: */
- /* Non-zero brace level => '{'. */
- /* Else spaces => ' '. */
- /* Else if in correct contexts, word letters => 'A'. */
- context_input = input;
- if(wobjc_brace_level>0) context_input = '{';
- else if(strchr(WOBJC_BLANK, input)) context_input = ' ';
- else if((wobjc_context==3 || wobjc_context==14)
- && strchr(WOBJC_WORD, input))
- context_input = 'A';
- fsa_step(context_input, &wobjc_context, wobjc_context_fsa);
- break;
- }
- return(true);
- }
-
-
- /* FSA to strip out comments and quotes. */
- static fsa_vertex wobjc_strip_fsa[] =
- {
- { 0, 3, {{ '/', 1 },{ '\"', 5 },{ '\'', 7 }}, wobjc_process_stripped_code},
- { 0, 2, {{ '*', 2 },{ '/', 4 }}}, /* look for comment */
- { 2, 1, {{ '*', 3 }}}, /* in /* comment */
- { 2, 2, {{ '/', 0 },{ '*', 3 }}},
- { 4, 1, {{ '\n', 0 }, { '\0', 0 }}}, /* in // comment */
- { 5, 2, {{ '\\', 6 },{ '\"', 0 }}}, /* in " quote */
- { 5, 0, },
- { 7, 2, {{ '\\', 8 },{ '\'', 0 }}}, /* in ' quote */
- { 7, 0, }
- };
-
-
- boolean wobjc_separator_function(line)
- char *line;
- {
- if(wobjc_separator) { wobjc_separator = false; return true; }
- else return false;
- }
-
-
- void wobjc_header_function(line)
- char *line;
- {
- /* Run stripping fsa, which will run context fsa. */
- fsa_run(line, &wobjc_strip_state, wobjc_strip_fsa);
- return;
- }
-
-
- void wobjc_finish_header_function(header)
- char *header;
- {
- char *p;
-
-
- /* Flush terminal blanks and balance opening '[' if any. */
- for(p=wobjc_header+strlen(wobjc_header);
- p>wobjc_header && strchr(WOBJC_BLANK, *(p-1)); p--);
- if(wobjc_header[0]=='+' || wobjc_header[0]=='-') *p++ = ']';
- *p = 0;
-
-
- /* Copy out final header. */
- strcpy(header, wobjc_header);
- wobjc_header[0] = 0;
- wobjc_header_end = wobjc_header;
- return;
- }
-
-
- /* ==============================
- * === Ziff computer select ===
- * ==============================
- */
-
- /* these filters index ziff computer select cd-rom files that
- have been offloaded from the CDROM. This is for indexing
- the CACM files that have been explicitly ok'ed by ACM.
- All other uses would violate the no lan access restrictions
- of Ziff */
-
-
- #define ZIFF_TITLE_MARKER_1 "Title: "
- #define ZIFF_TITLE_MARKER_2 "Company: "
- #define ZIFF_FIRST_LINE_MARKER " *****"
-
- /* just use the title */
-
- boolean ziff_separator_function(line)
- char *line;
- {
- if (strstr(line, ZIFF_FIRST_LINE_MARKER)) {
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char ziff_header[MAX_HEADER_LEN + 1];
-
- void ziff_header_function(line)
- char *line;
- {
- if (strstr(line, ZIFF_TITLE_MARKER_1) ||
- strstr(line, ZIFF_TITLE_MARKER_2))
- {
- strncpy(ziff_header, line + strlen(ZIFF_TITLE_MARKER_1),
- MAX_HEADER_LEN);
- }
- }
-
- void ziff_finish_header_function(header)
- char *header;
- {
- if(strlen(ziff_header) == 0){
- strcpy(header, "Unknown Title");
- }
- else{
- s_strncpy(header, ziff_header, MAX_HEADER_LEN);
- }
- ziff_header[0] = '\0';
- }
-
- /* special header function for filename only type */
-
- void filename_finish_header_function(header)
- char* header;
- {
- char *p = strrchr(current_filename, '/');
-
- if (p != NULL) {
- p++;
- } else {
- p = current_filename;
- }
-
- s_strncpy(header, p, MAX_HEADER_LEN);
- }
-
- #ifdef BIBDB
- /* ============================
- * === Bibdb Separator ===
- * ============================
- */
-
-
- /*
- * formfedd seperate entries
- * each page is one entry
- */
-
- boolean bibdb_hit_header = 0;
-
- boolean bibdb_separator_function(line)
- char *line;
- {
- if((strlen(line) < 3) && substrcmp(line,"")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char bibdb_header[MAX_HEADER_LEN + 1];
-
- void bibdb_header_function(line)
- char *line;
- {
-
- /*
- printf("bibdb_header_function: %s %d %d %d\n", line, bibdb_hit_header,
- bibdb_separator_function(line), strncmp(line, "CK: ", 4));
- */
- if((bibdb_hit_header<2)
- && (!bibdb_separator_function(line)))
- if (!strncmp(line, "CK: ", 4)) {
- strncpy(bibdb_header, line+4, MAX_HEADER_LEN);
- bibdb_hit_header++;
- } else if (!strncmp(line, "TI: ", 4)) {
- int i;
- for (i=0;i<21;i++) if (!bibdb_header[i]) bibdb_header[i]=' ';
- strncpy(&(bibdb_header[20]), line+4, MAX_HEADER_LEN-21);
- bibdb_hit_header++;
- }
- }
-
- void bibdb_finish_header_function(header)
- char *header;
-
- {
- bibdb_hit_header = 0; /* turn on the flag */
- if (strlen(bibdb_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- strncpy(header, bibdb_header, MAX_HEADER_LEN);
- }
- bibdb_header[0] = '\0';
- }
-
- long bgetdate(s)
- char *s;
- {
- int year, month, day;
- sscanf(s,"%2d%2d%2d", &year, &month, &day);
- return (10000 * year + 100 * month + day);
- }
-
- long bibdb_date_function(line)
- char *line;
- {
- if((strlen(line) > strlen("ED: ")) &&
- substrcmp(line, "ED: ")){
- return(bgetdate(line+4));
- }
- else return -1;
- }
-
- /* ============================
- * === Formfeed Separator ===
- * ============================
- */
-
-
- /*
- * formfeed-seperate entries
- * each page is one entry
- */
-
- boolean formfeed_hit_header = false;
-
- boolean formfeed_separator_function(line)
- char *line;
- {
- if((strlen(line) < 3) && substrcmp(line,"")){
- /* printf("hit %s\n", line); */
- return(true);
- }
- else{
- return(false);
- }
- }
-
- char formfeed_header[MAX_HEADER_LEN + 1];
-
- void formfeed_header_function(line)
- char *line;
- {
- if(formfeed_hit_header
- && (!formfeed_separator_function(line))
- && strlen(formfeed_header) == 0) {
- strncpy(formfeed_header, line, MAX_HEADER_LEN);
- formfeed_hit_header = false;
- }
- }
-
- void formfeed_finish_header_function(header)
- char *header;
-
- {
- formfeed_hit_header = true; /* turn on the flag */
- if (strlen(formfeed_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- strncpy(header, formfeed_header, MAX_HEADER_LEN);
- }
- formfeed_header[0] = '\0';
- }
-
-
-
- /* ==============================
- * === Bibinf Separator ===
- * ==============================
- */
-
- /* each section is one entry */
-
- int bibinf_hit_header = 0;
-
- boolean bibinf_separator_function(line)
- char *line;
- {
- if((strlen(line) < 3) && ((*line == '\n')
- || (*line == '\0'))) {
- return(true);
- }
- else{
- return(false);
- }
- }
-
- #ifdef SIMPLE_BIBINF
- char bibinf_autor[MAX_HEADER_LEN + 1];
- char bibinf_title[MAX_HEADER_LEN + 1];
-
- void bibinf_header_function(line)
- char *line;
- {
-
- if((bibinf_hit_header<2)
- && (!bibinf_separator_function(line))) {
- if (!strncmp(line, "Titel: ", 7)) {
- strncpy(bibinf_title, line+7, MAX_HEADER_LEN);
- bibinf_hit_header++;
- } else if (!strncmp(line, "Autor: ", 7)) {
- strncpy(bibinf_autor, line+7, MAX_HEADER_LEN);
- bibinf_hit_header++;
- }
- }
- }
- void bibinf_finish_header_function(header)
- char *header;
-
- {
- if (bibinf_hit_header == 0) {
- strcpy(header, "No Title");
- }
- else {
- int i;
- if (strlen(bibinf_autor)>0) {
- strncpy(header, bibinf_autor, 25);
- } else {
- strncpy(header, "No author given", 25);
- }
- for (i=strlen(header);i<MAX_HEADER_LEN;header[i++]=' ');
- if (strlen(bibinf_title)>0) {
- strncpy(&(header[26]), bibinf_title, MAX_HEADER_LEN-26);
- } else {
- strncpy(&(header[26]), "No title given", MAX_HEADER_LEN-26);
- }
- bibinf_hit_header = 0; /* turn on the flag */
- }
- bibinf_autor[0] = '\0';
- bibinf_title[0] = '\0';
- }
-
- #else
-
- char bibinf_header[MAX_HEADER_LEN + 1];
- boolean titel = false;
- boolean autor = false;
- boolean hrsgb = false;
-
- void bibinf_header_function(line)
- char *line;
- {
- char *word;
- char bibinf_header_copy[30];
- int i;
-
- if(bibinf_hit_header /* begin of Autor-line */
- && (!bibinf_separator_function(line))
- && (!strncmp(line, "Autor: ", 7))){
- s_strncpy(bibinf_header_copy, line+7, 26);
- word = strtok(bibinf_header_copy," ");
- while(word){
- if(isalnum(*word)){
- trim_trailing_newline(word);
- s_strncat(bibinf_header,word,26,26);
- s_strncat(bibinf_header," ",26,26);
- }
- else if(word[0] == '|'){
- bibinf_header[strlen(bibinf_header)-1] = '\0';
- s_strncat(bibinf_header,"; ",26,26);
- }
- word = strtok(NULL," ");
- }
- autor = true;
- }
- else if(autor && bibinf_hit_header /* next words of Autor-line */
- && (!bibinf_separator_function(line))
- && (strchr(line, ':') == NULL)){
- s_strncpy(bibinf_header_copy, line, 26);
- word = strtok(bibinf_header_copy," ");
- while(word){
- if(isalnum(*word)){
- trim_trailing_newline(word);
- s_strncat(bibinf_header,word,26,26);
- s_strncat(bibinf_header," ",26,26);
- }
- else if(word[0] == '|'){
- bibinf_header[strlen(bibinf_header)-1] = '\0';
- s_strncat(bibinf_header,"; ",26,26);
- }
- word = strtok(NULL," ");
- }
- } /* end of Autor-line */
- /* begin of Herausgeber-line */
- else if(bibinf_hit_header
- && (!bibinf_separator_function(line))
- && (!strncmp(line, "Herausgeber: ", strlen("Herausgeber: ")))){
- s_strncpy(bibinf_header_copy, line+strlen("Herausgeber: "), 26);
- word = strtok(bibinf_header_copy," ");
- while(word){
- if(isalnum(*word)){
- trim_trailing_newline(word);
- s_strncat(bibinf_header,word,26,26);
- s_strncat(bibinf_header," ",26,26);
- }
- else if(word[0] == '|'){
- bibinf_header[strlen(bibinf_header)-1] = '\0';
- s_strncat(bibinf_header,"; ",26,26);
- }
- word = strtok(NULL," ");
- }
- hrsgb = true;
- }
- else if(hrsgb && bibinf_hit_header /* next words of Hrsgb-line */
- && (!bibinf_separator_function(line))
- && (strchr(line, ':') == NULL)){
- s_strncpy(bibinf_header_copy, line, 26);
- word = strtok(bibinf_header_copy," ");
- while(word){
- if(isalnum(*word)){
- trim_trailing_newline(word);
- s_strncat(bibinf_header,word,26,26);
- s_strncat(bibinf_header," ",26,26);
- }
- else if(word[0] == '|'){
- bibinf_header[strlen(bibinf_header)-1] = '\0';
- s_strncat(bibinf_header,"; ",26,26);
- }
- word = strtok(NULL," ");
- }
- } /* end of Hrsgb-line */
-
- else if(bibinf_hit_header /* begin of Titel-line */
- && (!bibinf_separator_function(line))
- && (!strncmp(line, "Titel: ", 7))) {
- autor = false;
- hrsgb = false;
- for(i=strlen(bibinf_header); i < 25; i++)
- s_strncat(bibinf_header," ",MAX_HEADER_LEN,MAX_HEADER_LEN);
- s_strncat(bibinf_header,": ",MAX_HEADER_LEN,MAX_HEADER_LEN);
- s_strncat(bibinf_header, line+7,MAX_HEADER_LEN,MAX_HEADER_LEN);
- titel = true;
- }
- else if /* next words of Titel-line */
- (titel && bibinf_hit_header
- && (!bibinf_separator_function(line))
- && (strchr(line, ':') == NULL)){
- trim_trailing_newline(bibinf_header);
- for(i=strlen(bibinf_header) - 1; isspace(bibinf_header[i]); i--)
- bibinf_header[i] = '\0';
- s_strncat(bibinf_header," ",MAX_HEADER_LEN,MAX_HEADER_LEN);
- s_strncat(bibinf_header,line,MAX_HEADER_LEN,MAX_HEADER_LEN);
- if(strlen(bibinf_header) == MAX_HEADER_LEN -1)
- bibinf_header[MAX_HEADER_LEN-2] = '\n';
- }
- else if
- (titel && bibinf_hit_header
- && (!bibinf_separator_function(line))
- && (strchr(line, ':') != NULL)){
- titel = false;
- bibinf_hit_header = false;
- }
- else if (titel){
- titel = false;
- bibinf_hit_header = false;
- }
- }
-
- void bibinf_finish_header_function(header)
- char *header;
-
- {
- bibinf_hit_header = true; /* turn on the flag */
- if (strlen(bibinf_header) == 0) {
- strcpy(header, "No Title");
- }
- else {
- strncpy(header, bibinf_header, MAX_HEADER_LEN);
- /* s_strncpy(header, bibinf_header,60);
- if(header[strlen(header)-1] != '\n')
- strcat(header,"...\n"); */
- }
- bibinf_header[0] = '\0';
- }
-
-
- #endif /* SIMPLE_BIBINF */
-
- long binfgetdate(s)
- char *s;
- {
- int year, month, day;
-
- sscanf(s, "%4d-%2d-%2d", &year, &month, &day);
- return (10000 * (year - 1900) + 100 * month + day);
- }
-
- long bibinf_date_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Erfasst: ")) &&
- substrcmp(line, "Erfasst: ")){
- return(binfgetdate(line+9));
- }
- else return -1;
- }
-
-
- /* ========================================
- * === Irlist Digest Customizations ====
- * ========================================
- */
-
- boolean irlist_separator_function(line)
- char *line;
- {
- if (mail_or_rmail_separator(line)) return(true);
-
- if((strlen(line) > strlen("*********")) &&
- substrcmp(line, "**********")){
- return(true);
- }
- else{
- return(false);
- }
- }
-
- void irlist_header_function(line)
- char *line;
- {
- if((strlen(line) > strlen("Re: ")) &&
- substrcmp(line, "Re: ") &&
- (strlen(mail_subject) == 0)){
- strcpy(mail_subject, "Re: ");
- s_strncat(mail_subject, line + strlen("Re: "), MAX_HEADER_LEN, MAX_HEADER_LEN);
- trim_trailing_newline(mail_subject);
- }
- else if((strlen(line) > strlen("Fr: ")) &&
- substrcmp(line, "Fr: ") &&
- (strlen(mail_from) == 0)){
- /* this should find the <foo@bar> field in the from list */
- strncpy(mail_from, line + strlen("Fr: "), MAX_HEADER_LEN);
- trim_trailing_newline(mail_from);
- }
- else mail_header_function(line);
- }
-
- long irlist_date_function(line)
- char *line;
- {
- static long last_date = 0;
- long this_date;
-
- if ((this_date = mail_date_function(line)) > 0)
- return(last_date = this_date);
- else
- return(last_date);
- }
-
- #endif
-
- #ifdef STELAR
-
- /*=================================
- *
- * STELAR Abstracts
- *
- *=================================*/
- char stelar_header[MAX_HEADER_LEN+1];
-
- boolean stelar_separator_function(line)
- char *line;
- {
- return(false);
- }
-
- void stelar_header_function(line)
- char *line;
- {
- char *p;
-
- if(stelar_header[0]=='\0' && isspace(line[0]) && strlen(line)>3){
- p=line;
- while(isspace(*p)) ++p;
- strncpy(stelar_header,p,MAX_HEADER_LEN);
- }
- }
-
- void stelar_finish_header_function(header)
- char *header;
- {
- if(strlen(stelar_header)==0){
- strcpy(header,"No Title");
- } else {
- strncpy(header,stelar_header,MAX_HEADER_LEN);
- }
- stelar_header[0]='\0';
- }
- #endif /* STELAR */
- /*=================================
- *
- * AAS Meeting Abstracts
- * (using AAS abstract LaTeX macros)
- *
- *=================================*/
- #ifdef AAS
-
- #define AASAB_TITLE_MARKER "\\title{"
- #define AASAB_AUTHOR_MARKER "\\author{"
-
- char aasab_header[MAX_HEADER_LEN+1];
- char aasab_author[MAX_HEADER_LEN+1];
- char aasab_title[MAX_HEADER_LEN+1];
-
- boolean aasab_separator_function(line)
- char *line;
- {
- if ((strlen(line) > strlen("\\documentstyle["))
- && (substrcmp(line, "aasab"))) {
- return(true);
- } else {
- return(false);
- }
- }
-
- void aasab_header_function(line)
- char *line;
- {
- char *aas_start;
- if (aas_start = strstr(line, AASAB_TITLE_MARKER)) {
- s_strncpy( aasab_title, aas_start+strlen(AASAB_TITLE_MARKER), MAX_HEADER_LEN);
- } else
- if (aas_start = strstr(line, AASAB_AUTHOR_MARKER)) {
- s_strncpy( aasab_author, aas_start+strlen(AASAB_AUTHOR_MARKER), MAX_HEADER_LEN);
- }
- }
-
- void aasab_finish_header_function(line)
- char *line;
- {
- int nchars;
- char *p = strrchr(current_filename, '/');
-
- if (strlen(aasab_title) == 0) {
- strcpy( line, "Unknown Title");
- } else {
- nchars = 0;
- while ((nchars < 20) && (aasab_author[nchars] != '}')) {
- aasab_header[nchars] = aasab_author[nchars];
- nchars++;
- }
- aasab_header[nchars] = '\0';
-
- if (p != NULL) {
- p++;
- } else {
- p = current_filename;
- }
-
- s_strncpy( line, p, MAX_HEADER_LEN);
- s_strncat( line, " ", strlen(" "), MAX_HEADER_LEN);
- s_strncat( aasab_header, "<> RE: ", strlen("<> RE: "), MAX_HEADER_LEN);
- s_strncat( aasab_header, aasab_title, MAX_HEADER_LEN, MAX_HEADER_LEN);
- s_strncat( line, aasab_header, MAX_HEADER_LEN, MAX_HEADER_LEN);
- }
- aasab_title[0] = '\0';
- }
-
- #endif /* AAS */
-
-
- char *URL_prefix=NULL;
- char *URL_trim=NULL;
-
- #ifdef HTML
- /* David J. Bianco <bianco@cs.odu.edu> */
-
- char html_header[MAX_HEADER_LEN + 1];
- static int html_title_found = 0;
- #ifdef WIN32
- #define MAX_LINE_LENGTH 1000
- #endif
-
- void html_header_function(line)
- char *line;
- {
- #ifdef WIN32
- char conv_line[MAX_LINE_LENGTH];
- #else
- char conv_line[BUFSIZ];
- #endif
- int linkflag = 0,i;
- char * begin, * end;
-
- /* convert entities to lower case to simplify comparisons */
- #ifdef WIN32
- for(i=0;i <= (int)strlen(line);i++) {
- #else
- for(i=0;i <= strlen(line);i++) {
- #endif
- if(line[i] == '<') {
- linkflag = 1;
- } else if(line[i] == '>') {
- linkflag = 0;
- }
- if(linkflag == 1) {
- conv_line[i] = tolower(line[i]);
- } else conv_line[i] = line[i];
- }
-
- begin = strstr(conv_line, "<title>");
- end = strstr(conv_line, "</title>");
-
- if(begin != NULL)
- html_title_found = 1;
-
- if((begin == NULL) && (html_title_found == 0)) {
- return;
- }
-
- if((begin == NULL) && (html_title_found == 1)) {
- begin = conv_line;
- }
-
- if((end != NULL) && (strncmp(end, "</title>", 8) == 0)) {
- html_title_found = 0;
- }
- if(strncmp(begin, "<title>", 7) == 0) {
- begin += 7;
- }
- if(end == NULL){
- end = &(conv_line[strlen(conv_line) + 1]);
- }
-
- *end = '\0';
- strcat(html_header, begin);
- }
-
- void html_finish_header_function(header)
- char *header;
- {
- if(strlen(html_header) == 0){
- strcpy(header, "Untitled HTML Document");
- }
- else{
- s_strncpy(header, html_header, MAX_HEADER_LEN);
- }
- html_header[0] = '\0';
- }
- #endif /* HTML */
-
-
-