Source Code 1994 March

home *** CD-ROM | disk | FTP | other *** search

/ Source Code 1994 March / Source_Code_CD-ROM_Walnut_Creek_March_1994.iso / compsrcs / misc / volume36 / translit / part03 < prev next >

Wrap

Text File | 1993-03-21 | 59.4 KB | 1,868 lines

Newsgroups: comp.sources.misc From: jkl@osc.edu (Jan Labanowski) Subject: v36i025: translit - transliterate foreign alphabets, Part03/10 Message-ID: <1993Mar19.224349.11873@sparky.imd.sterling.com> X-Md4-Signature: e266dff38e849f207e0bbd3b969b2c3f Date: Fri, 19 Mar 1993 22:43:49 GMT Approved: kent@sparky.imd.sterling.com Submitted-by: jkl@osc.edu (Jan Labanowski) Posting-number: Volume 36, Issue 25 Archive-name: translit/part03 Environment: UNIX, MS-DOS, VMS #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # Contents: order.txt translit.c # Wrapped by kent@sparky on Fri Mar 19 16:00:10 1993 PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive 3 (of 10)."' if test -f 'order.txt' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'order.txt'\" else echo shar: Extracting \"'order.txt'\" $2315 characters$ sed "s/^X//" >'order.txt' <<'END_OF_FILE' X From: X X X X X To: JKL ENTERPRISES, INC. X P.O.Box 21821 X Upper Arlington, OH 43221-0821 X X X Please send me the executable, ready to run TRANSLIT program on X a diskette for the IBM-PC computer or compatible for MS-DOS 2.1 X or higher [see footnote]. X X I request the following medium (choose one): X X _____ 5.25 inch 360 kByte diskette X X _____ 3.5 inch 720 kByte diskette X with installation instructions. I understand that I will also receive X the transliteration tables and complete source of the program and X documentation as disk files. X X I understand that the TRANSLIT program comes without any warranty, and X the only claim which I can make towards JKL ENTERPRISES, INC. is to X replace a defective diskette. I also understand that this offer may be X withdrawn at any time. X X Prepaid orders (i.e., orders accompanied with a check or money order) X for the program are $15 (fifteen US Dollars). If an invoice is requested X there will be additional charge of $5 (five US Dollars) for processing. X There is no shipping and handling charge for orders within US. Outside X US please add the $1 dollar for Shipping and Handling. Diskette with the X program will be sent via 1st Class Mail or Air Mail, whichever applies. X I understand that the shipment will be made when funds are received. X X I enclose (fill in appropriate blanks): X X for the program ($15 per disk): ______ X X for the invoice to be billed later ($5) ______ X X Shipping/Handling if outside USA ($1 per disk) ______ X ------------------------------------------------------------- X X Total: ______ X X Ohio residents must add 5.75% tax of the Total: ______ X X Total+Tax(if Ohio Resident) ______ X X Please send the program to the following address: X X Name: __________________________________ X X Organization: __________________________________ X X Address: __________________________________ X X Town, State, Zip-code: __________________________________ X X_____________________________________________________ X [Footnote]: inquire for other computers/operating systems. END_OF_FILE if test 2315 -ne `wc -c <'order.txt'`; then echo shar: \"'order.txt'\" unpacked with wrong size! fi # end of 'order.txt' fi if test -f 'translit.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'translit.c'\" else echo shar: Extracting \"'translit.c'\" $54601 characters$ sed "s/^X//" >'translit.c' <<'END_OF_FILE' X/* This is a program fom transliterating files from one character set to X another. X TRANSLIT --- Version 1.0, Jan. 10, 1993. X Copyright (c) by Jan Labanowski, 1993 and JKL Enterprises, Inc. X Permission is given to disribute this program freely in accordance with X the rules and conditions spelled out in the program documentation. If you X got this program without the documentation, or if some files were missing, X somebody must have violated the rules. In this case, please delete the the X program and obtain the information on how to get the complete distribution X from the author. The rules require that the whole package is distributed X (i.e., the source code, the transliteration tables, and the documentation). X X Author: Jan Labanowski, P.O.Box 21821, Columbus, OH 43221-0821, USA X E-mail: jkl@osc.edu, JKL@OHSTPY.BITNET X */ X X#include "paths.h" /* for local definitions */ X#include "reg_exp.h" /* for regexp package */ X X X#define OPTIONS "i:o:t:d" /* allowed options on command line */ X#define MAXPAIRS 1000 /* maximum number of conversion pairs */ X#define MAXSETS 10 /* maximum number of shift in/out sets */ X#define MAXLEVEL 10 /* maximum set nesting level */ X#define MAXBUFF 1000 /* maximum size of the buffer */ X#define MAXMATCH 100 /* maximum length of match to regular exp. */ X X X/* define all local functions as static if compiler likes it */ X#if STATICFUN X#define STATIC static X#else X#define STATIC X#endif X X/* types to hold the translation maps for single chars. If inp_maps is of X * type IMAPP, then character of code c in set k will correspond to X * a string pointed by (*inp_maps[k])[c]; X */ X X/* ========== now include definitions for paths and regexp */ X X Xtypedef char* IMAP[256]; /* type IMAP is a 256 element array X of pointers to string */ Xtypedef IMAP* IMAPP[MAXSETS]; /* array of pointers to IMAP */ X X/* types to hold output set number for a single char. if out_sets is of type X * OSETP and c is a code, and k is the set of input character, then output X * set number is (*out_sets[k])[c]; */ Xtypedef int OSET[256]; Xtypedef OSET* OSETP[MAXSETS]; X X Xtypedef union { X char *seq; /* pointer to a string */ X reg_exp *re; /* pointer to a regular expression "program */ X } ADDR; X X Xtypedef struct { X int typ; /* type of pointer in ADDR union: X 0-string (seq), 1-input regexp (re), X 2-output regexp (seq) (output regexp X is a string !)*/ X int len; /* length of a string if present */ X int set; /* character set number for the string */ X ADDR ad; /* string or regexp program */ X } SDATA; X X X/* Some compiler represent character codes > 127 as negative numbers, i.e., X * character 255 is -1, char 254 is -2, etc. X * The flag SIGNED_CHAR_TYPE is set by the program, (program checks which X * convention is used. It is set to 1, if characters have sign (i.e., 255=-1) X * and is set to 0 if characters are unsigned (i.e., 255=255). Do not touch X * this declaration, unless you know what you are doing. X */ Xint SIGNED_CHAR_TYPE; X X char tabline[MAXBUFF]; /* line of text from conversion table file */ X char last_tab_line[MAXBUFF]; X char *lineptr; /* pointer to the first unread character of tabline */ X int n_line_chars; /* number of characters in tabline buffer */ X int chars_left; /* no. of chars left in input buffer */ X X int memleft; /* tells how much memory is left in allocated area */ X char *memptr; /* pointer for memory allocation area */ X char regerrstr[100]; /* string to hold error message from regular exp */ X reg_exp *regauxptr; /* aux pointer for regular expresion structure */ X int debug_flg=0; /* if 1, then additional info sent to stderr */ X X FILE *inpf; /* input file pointer */ X FILE *outf; /* output file pointer */ X FILE *tabl; /* file with translation table */ X X int n_conv_seq; /* number of conversion sequences */ X SDATA inp_data[MAXPAIRS]; /* structure to hold types, lengths and pointers X for input sequences */ X SDATA out_data[MAXPAIRS]; /* structure to hold types lengths and pointers X for output sequences */ X SDATA inp_SO_data[MAXSETS]; /* structure with types, lens, ptrs for inp_SO*/ X SDATA inp_SO_subs[MAXSETS]; /* holds substitution string/regexp for inp_SO*/ X SDATA inp_SI_data[MAXSETS]; /* structure with types, lens, ptrs for inp_SI*/ X SDATA inp_SI_subs[MAXSETS]; /* holds substitution string/regexp for inp SI*/ X SDATA inp_nest_open[MAXSETS]; /* for sick transliteration cases, like TeX */ X SDATA inp_nest_close[MAXSETS]; /* where you need to count {} pairs */ X X SDATA *junky; X X IMAPP inp_maps; /* maps for single character sequences, X array of pointers. Element of the array is a pointer X to the array of pointers which point at strings */ X OSETP out_sets; /* output set numbers corresponding to inp_maps X array of pointers to integer pointers */ X int n_inp_sets; /* number of input sets */ X int n_out_sets; /* number of output sets */ X char *out_SI[MAXSETS]; /* pointer to output shift in sequences */ X char *out_SO[MAXSETS]; /* pointer to output shift out sequences */ X int out_SI_len[MAXSETS]; /* out SI sequence length */ X int out_SO_len[MAXSETS]; /* out SO sequence length */ X X char *begseq, /* sequence to be written at the beginning of output*/ X *endseq; /* sequence to be written at the end of output file */ X X int file_version; /* conversion table version number */ X int strstart, strend, /* codes delimiting strings */ X liststart, listend, /* codes delimiting lists */ X regexstart, regexend, /* codes delimiting expressions */ X curst1, curend1, X curst2, curend2; /* auxiliary */ X X char scr1[MAXBUFF], scr2[MAXBUFF], /* scratch space */ X scr1a[MAXBUFF], scr2a[MAXBUFF]; /* scratch space */ X char *scr1ptr, *scr2ptr, *scr1pt, *scr2pt, X *scrauxptr, *scrcurptr; /* aux string pointers */ X X int inp_seq_length; /* length of input sequence */ X int out_seq_length; /* length of output sequence */ X int out_set_number; /* number of output set */ X char *out_seq_ptr; /* pointer to output sequence */ X X reg_exp *reg_comp(); X int reg_try(); X void reg_sub(); X void reg_error(); X X/* fix if no strchr routine in the libarry */ X#if STRCHR X#else X#define strchr indexfun X#endif X X/* this is index which is equivalent to strchr */ Xchar *indexfun (s, c) Xchar *s; Xint c; X { X while (*s) X if (c == *s) return (s); X else s++; X return (NULL); X } X X X/* ============================================================ */ X X/* include code for getopt() if not known to compiler */ X X#if GETOPT X#else X X/* X This is a some getopt I took from the net and do not remember X who actually wrote this X*/ X X#define ARGCH (int)':' X#define BADCH (int)'?' X#define EMSG "" X#define ENDARGS "--" X X/* X * get option letter from argument vector X */ Xint opterr = 1, /* useless, never set or used */ X optind = 1, /* index into parent argv vector */ X optopt; /* character checked for validity */ Xchar *optarg; /* argument associated with option */ X X#define tell(s) fputs(*nargv,stderr);fputs(s,stderr); \ X fputc(optopt,stderr);fputc('\n',stderr);return(BADCH); X X XSTATIC int getopt(nargc,nargv,ostr) Xint nargc; Xchar **nargv, X *ostr; X{ X static char *place = EMSG; /* option letter processing */ X register char *oli; /* option letter list index */ X X if(!*place) { /* update scanning pointer */ X if(optind >= nargc || *(place = nargv[optind]) != '-' || X !*++place) return(EOF); X if (*place == '-') { /* found "--" */ X ++optind; X return(EOF); X } X } /* option letter okay? */ X if ((optopt = (int)*place++) == ARGCH || !(oli = strchr(ostr,optopt))) { X if(!*place) ++optind; X tell(": illegal option -- "); X } X if (*++oli != ARGCH) { /* don't need argument */ X optarg = NULL; X if (!*place) ++optind; X } X else { /* need an argument */ X if (*place) optarg = place; /* no white space */ X else if (nargc <= ++optind) { /* no arg */ X place = EMSG; X tell(": option requires an argument -- "); X } X else optarg = nargv[optind]; /* white space */ X place = EMSG; X ++optind; X } X return(optopt); /* dump back option letter */ X} X X#endif X X X/* ================= charcode ====================== X * returns the code of the character given its integer code. If global X * variable SIGNED_CHAR_TYPE flag is 1, then character code is negative X * for chars >= 128, otherwise they are passed through. X * ================================================== */ XSTATIC char charcode (intcde) Xint intcde; X{ X if(SIGNED_CHAR_TYPE == 1) { /* if signed chars used */ X if(intcde >= 128) { /* if integer code is larger than 128 */ X return((char)(intcde - 256)); /* make it negative complement */ X } X else { /* return the original code */ X return((char)intcde); X } X } X else { X return((char)intcde); X } X} X/* ================= intcode ===================== X * returns integer code of a character depending on the value of X * SIGNED_CHAR_TYPE flag X * =============================================== */ Xint intcode(charcde) Xchar charcde; X{ X if(SIGNED_CHAR_TYPE == 1) { /* if signed chars used */ X if((int)charcde < (int)0) { /* if negative code */ X return((int)((int)charcde + 256)); /* convert to positive */ X } X else { /* return the original code */ X return((int)charcde); X } X } X else { X return((int)charcde); X } X} X X/* ================= tablerr ====================== X * terminates the program with a message to stderr and contents of X * the buffer. num - exit status, errmsg - message X * ================================================ */ XSTATIC int tablerr(num, errmsg) Xint num; Xchar *errmsg; X{ X fprintf(stderr,"%s\n", errmsg); X fprintf(stderr, X "Current contents of the input buffer for conversion table file:\n"); X fprintf(stderr,"%s\n", last_tab_line); X exit(num); X return(0); /* to keep compiler happy that there is return from function */ X} X X/* ================= getnblkline ==================== X * gets a nonblank line from tabl file and resets pointers if clearflg == 1, X * otherwise, appends the line to the current buffer. X * Line is stored in the global variable tabline. The global *lineptr X * is reset to line beginning. X * If EOF reached, or line too long, returns -1, else a number of chars in X * the line. If line starts with # or ! in first column, it is skipped. X * ================================================== */ XSTATIC int getnblkline(fileptr, clearflg) XFILE *fileptr; Xint clearflg; X{ X int l, maxc; X char *auxptr; X if(clearflg == 1) { X n_line_chars = 0; X lineptr = tabline; X } X maxc = MAXBUFF - n_line_chars -2; /* how much space in the buffer */ X while (fgets (lineptr, maxc, fileptr) != NULL) { X strcpy(last_tab_line, lineptr); /* save current line for error messages */ X if((*lineptr == '#') || (*lineptr == '!')) { /* skip comment lines */ X continue; X } X l = strlen(lineptr); /* how many chars we read ? */ X n_line_chars += l; /* how many chars in the buffer */ X X if(n_line_chars > MAXBUFF-5) { /* if line too long */ X return(-1); X } X if(clearflg == 1) { /* do it only if first line is fetched */ X auxptr = lineptr; X while ((isspace(*auxptr) != 0) && (*auxptr != '\0')) { X auxptr++; /* skip front spaces */ X } X X if(*auxptr == '\0') { /* if blank line */ X continue; X } X } X return(l); /* return length of line just read */ X } /* end while */ X return(-1); /* end of file found */ X} X/* ================= chknblk ========================== X * returns a code of the first nonblank character at the current position X * of tabline buffer. The lineptr is left at this char (NOT AT THE NEXT CHAR ! X * If no nonblank * character, returns -1. X * ==================================================== */ XSTATIC int chknblk(fileptr) XFILE *fileptr; X{ X int ch; X X Fetch_next: X while (*lineptr != '\0') { X if(isspace(*lineptr) == 0) { X ch = intcode(*lineptr); X return(ch); X } X lineptr++; X } X if(getnblkline(fileptr, 0) > 0) { X goto Fetch_next; X } X else { X return(-1); X } X} X X/* ================= getnumber =========================== X * retrieves integer nonnegative decimal number from a the current X * line (tabline). X * Returns the number, or -9999 if no good number in the line X * only number < 1000 allowed X * ========================================================== */ XSTATIC int getnumber(fileptr) XFILE *fileptr; X{ X int num, flg, sign; X num = 0; X flg = 0; X sign = 0; X XNext_line: X while (*lineptr != '\0') { X if(flg == 0) { /* only spaces found till now */ X if(isspace(*lineptr) != 0) { X lineptr++; X continue; X } X else { X flg = 1; /* the nonblank char found */ X } X } X X if(flg == 1) { /* the nonblank char was found */ X if(sign == 0) { /* sign may only be located before the number */ X if(*lineptr == '-') { X sign = -1; X lineptr++; X } X else if(*lineptr == '+') { X sign = 1; X lineptr++; X } X else { /* set it to +1, so it is checked only once */ X sign = 1; X } X } X if(isdigit(*lineptr) != 0) { X num = 10*num + *lineptr - '0'; X if(num > 1000) { X return(-9999); /* number too large */ X } X } X else if(isspace(*lineptr) != 0) { /* end of number */ X return(num*sign); X } X else { X return(-9999); /* some strange character */ X } X } X lineptr++; /* to next character */ X } X if(flg == 1) { /* if valid number collected before '\0' */ X return(num*sign); X } X else { X if(getnblkline(fileptr, 0) > 0) { X goto Next_line; X } X else { X return(-9999); /* if no number before end of file, error */ X } X } X} X X X/* ================= getstring ========================== X * returns the pointer to the string from the tabline. The pointer is X * volatile, and will point to garbage after next getnblkline call, so X * you need to copy it (or use it) immedaitely after the call. X * Returns a pointer to string if successful, and NULL pointer if not. X * startcode --- character code which starts the string (it is not X * included in the string. If startcode = '\0', the X * string is collected from the curent pointer to a buffer. X * endcode --- character which ends the string. It is not included in X * the string. If endcode = '\0', then string is collected X * until first blank or end of string found. X * If no startcode found or no endcode found, the NULL string is returned. X * ========================================================= */ X XSTATIC char *getstring(startcode, endcode, fileptr) Xint startcode, endcode; XFILE *fileptr; X{ X int flg; X char *startptr; X X flg = 0; XRead_next_line: X while (*lineptr != '\0') { X if(flg == 0) { /* if startcode not found yet */ X if(startcode != 0) { X if(charcode(startcode) == *lineptr) { X flg = 1; /* the startcode found */ X lineptr++; X startptr = lineptr; /* skip startcode */ X continue; X } X else { X lineptr++; X continue; X } X } X else { /* startcode is 0 */ X flg = 1; X startptr = lineptr; X lineptr++; X continue; X } X } /* end flg == 0 */ X X if(flg == 1) { /* the 1st char was found */ X if(endcode == 0) { /* if stop on blank requested */ X if(isspace(*lineptr) != 0) { /* if space found */ X *lineptr = '\0'; /* mark string end */ X lineptr++; /* advance pointer */ X return(startptr); X } X else { /* collect chars */ X lineptr++; X continue; X } X } X else if(charcode(*lineptr) == endcode) { /* if stop at endcode */ X *lineptr = '\0'; X lineptr++; X return(startptr); X } X else { /* if not endcode , collect next characters */ X lineptr++; X continue; X } X } /* end flg == 1 */ X } /* end while */ X X /* the buffer was exhausted */ X if(endcode == 0) { X return(startptr); X } X else { X if(getnblkline(fileptr, 0) > 0) { X goto Read_next_line; X } X else { X return((char *)NULL); X } X } X} X X/* ============================ convnum =================== X * returns a nonegative number based on str. Scans the string X * from position posbeg, and returns first invalid character position X * in posend. If error, returns -1 (less than 2 characters, num > 255). X * str - scanned string X * digits string of allowed ordered digits in lowercase X * posbeg - start X * ======================================================== */ X XSTATIC int convnum(buff, digits, posbeg) Xchar *buff, *digits; Xint posbeg; X{ X int num, i, l, d, base; X X base = strlen(digits); X num = 0; X i = posbeg; X X while (buff[i] != '\0') { X d = -1; X for(l = 0; l < base; l++) { X if(buff[i] == digits[l]) { X d = l; X break; X } X } X if(d >= 0) { X num = d + num*base; X if(num > 255) { /* if code too large */ X return(-1); X } X i++; X } X else { X break; X } X } X if((i - posbeg) < 2) { /* if less than two characters in a number */ X return(-1); X } X buff[i] = '\0'; X return(num); X} X X X/* ============================ str2code ================== X * str2code returns a code specified in buff. The valid numbers must have at X * least 2 digits. Here is a format of the code string X * (n represents valid digit for a given base). X * nnn (up to 3 decimal digits, first is not zero) X * 0nnn (up to 3 octal digits) X * 0xnn (up to 3 hex digits) X * 0onnn (up to 3 octal digits) X * 0dnnn (up to 3 decimal digits) X * The buff string will have '\0' put at the position after a valid number X * If no valid number can be parsed, or number is greater than 255, -1 X * is returned. X * ======================================================== */ XSTATIC int str2code(buff) Xchar *buff; X{ X int i, l, num; X static char decdig[]="0123456789", X octdig[]="01234567", X hexdig[]="0123456789abcdef"; X X l = strlen(buff); /* get length */ X if(l < 2) { /* string too short */ X return(-1); X } X X for(i = 0; i < l; i++) { /* convert to lowercase */ X if(isalpha(buff[i]) != 0) { /* if letter */ X buff[i] = tolower(buff[i]); X } X } X X if(isdigit(buff[0]) == 0) { /* if first char not a digit */ X return(-1); X } X X if(buff[0] == '0') { /*if starting char is 0, then octal */ X if((num = convnum(buff, octdig, 0)) != -1) { /* check if no base */ X return(num); X } X } X else { /* this has to be a decimal number */ X if((num = convnum(buff, decdig, 0)) != -1) { X return(num); X } X else { /* error in decimal number */ X return(-1); X } X } X /* the base is specified at buff[1] */ X if(buff[1] == 'o') { X num = convnum(buff, octdig, 2); X } X else if(buff[1] == 'd') { X num = convnum(buff, decdig, 2); X } X else if(buff[1] == 'x') { X num = convnum(buff, hexdig, 2); X } X else { /* no base found */ X return(-1); X } X return(num); X} X X/* ================= convstr ======================== X * copies inp_string to out_strings, and when codes are given as \xxx X * converts them to characters. X * Returns: X * 0 if OK X * 1 if character zero (e.g., \00 or \0x0) is found (it ends the string X * processing, since it is string terminator). X * =================================================== */ XSTATIC int convstr(inp_string, out_string) Xchar *inp_string, *out_string; X{ X int ch, ch1, i, l, num, n; X char buff[8]; X X n = 0; X while ((ch = *inp_string) != '\0') { X n++; /* count characters */ X if(ch == '\\') { X /* skip blank sequence */ X ch1 = *(inp_string + 1); /* charcode following "\" */ X if(isspace(ch1) != 0) { /* if "\" followed by blanks */ X inp_string++; /* skip over "\" */ X n++; X while( isspace((*inp_string)) != 0) { /* skip all spaces */ X n++; X inp_string++; X } X n--; /* it will be advanced at the top of loop */ X ch = *inp_string; X if(ch == '\0') { X *out_string = '\0'; X return(0); X } X continue; /* start new loop turn */ X } /* ch is space */ X X /* now check is \020, etc., i.e., codes */ X for(i = 1; i <= 6; i++) { /* copy possible number to a buffer */ X buff[i-1] = *(inp_string+i); X } X buff[6] = '\0'; /* terminate buff */ X if((num = str2code(buff)) >= 0) { X *out_string++ = charcode(num); X /* find how many characters have beed used ( number + \ ) */ X if(num == 0) { X *out_string = '\0'; X return(1); X } X l = strlen(buff) + 1; X inp_string += l; X continue; X } X } X *out_string++ = *inp_string; X inp_string++; X } /* end while */ X *out_string = '\0'; X return(0); X} X X X/* ====================== compstr ======================== X * returns 1 if str1 is located at the beginning of str2 and 0 otherwise X * ======================================================= */ XSTATIC int compstr(str1, str2) Xchar str1[], str2[]; X{ X int i; X if(str1[0] == '\0') { /* empty sequence never matches */ X return(0); X } X for(i = 0; str1[i] != '\0'; i++) { X if(str1[i] != str2[i]) { X return(0); X } X } X return(1); X} X X/* ====================== chkseqs ============================ X * returns the sequence number if sequence is present at the beginning X * of buffer and -1 otherwise (first sequence has number 0); X * If regular expression, then SDATA.len is set to the length of X * the string which matches the regular expession. X * ============================================================= */ XSTATIC int chkseqs(n_seq, seqstruc, buff) Xint n_seq; XSDATA *seqstruc; Xchar *buff; X{ X int i, j, l; X char *sp, *ep, *str; X reg_exp *reaux; X X if(n_seq == 0) { X return(-1); X } X for (i = 0; i < n_seq; i++) { X if(seqstruc->typ == 0) { /* if plain string */ X str = (seqstruc->ad).seq; X if(*str != '\0') { X l = 1; X for(j = 0; *str != '\0'; j++) { X if(*(buff + j) != *str++) { X l = 0; X break; X } X } X if(l == 1) { X return(i); X } X } X } X else if(seqstruc->typ == 1) { /* regexp */ X if(reg_try((seqstruc->ad).re, buff) == 1) { /* if anchored match found */ X reaux = (seqstruc->ad).re; /* get address of search program */ X sp = reaux->startp[0]; /* address of 1st char of match */ X ep = reaux->endp[0]; /* next char after match */ X if(sp != buff) { /* matches are anchored, at the buff beginning ! */ X tablerr(10, "Internal error in regexp package\n"); X } X l = seqstruc->len = ep - sp; /* match length */ X if(l <= 0) { X fprintf(stderr,"Error when matching regular expression %d\n", i+1); X exit(10); X } X return(i); X } X } X seqstruc++; X } /* end for */ X return(-1); X} X X/* =================== rdelim ================== X * read delimiters from tabl file X * ============================================= */ XSTATIC int rdelim(startd, endd) Xint *startd, *endd; X{ X if(getnblkline(tabl, 1) < 0) { X tablerr(10, "Could not read left delimiter code"); X } X if((*startd = chknblk(tabl)) < 0) { X tablerr(10, "Could not read left delimiter code"); X } X lineptr++; /* point at next char */ X if(isspace(*lineptr) == 0) { X tablerr(10, X "(Left Delimiter):Delimiters should be single chars separated by spaces"); X } X if((*endd = chknblk(tabl)) < 0) { X tablerr(10, "Could not read right delimiter code"); X } X lineptr++; /* point at next char */ X if(isspace(*lineptr) == 0) { X tablerr(10, X "(Right Delimiter):Delimiters should be single chars separated by spaces"); X } X return(0); X} X X/* ================== beseq ============== X * read starting or ending sequence for output X * and return pointer X * ======================================= */ XSTATIC char* beseq() X{ X char *scr1pt, *scr2pt; X int l; X X if((getnblkline(tabl, 1) < 0) || X ((scr1pt = getstring(strstart,strend,tabl)) == (char*)NULL)) { X tablerr(10, "Error when reading starting/ending sequence"); X } X l = strlen(scr1pt) + 1; X if((scr2pt = (char*)malloc(l*sizeof(char))) == NULL) { X tablerr(10, "Out of memory"); X } X convstr(scr1pt,scr2pt); X return(scr2pt); X} X X/* ================= allomaps =============== X * Allocate space for maps X * ============================================ */ XSTATIC int allomaps(n) Xint n; X{ X int i; X /* Allocate space for inp_maps and out_sets for input set 0 */ X if((inp_maps[n] = (IMAP*)malloc(256*sizeof(char*))) == NULL) { X /*if failed */ X tablerr(10, "Out of memory for storing sequences"); X } X X if((out_sets[n] = (OSET*)malloc(256*sizeof(int))) == NULL) { /* if failed */ X tablerr(10, "Out of memory for storing sequences"); X } X X for(i = 0; i < 256; i++) { /* zero allocated memory */ X (*inp_maps[n])[i] = (char*)NULL; X (*out_sets[n])[i] = 0; X } X return(0); X} X X/* =================== savestring ================= X * saves string in the allocated storage and returns pointer to it X * does all the housekeeping X * ================================================== */ XSTATIC char *savestring(str) Xchar *str; X{ X int l; X char *retptr; X X l = strlen(str)+1; X if(memleft < l) { X memleft = 5*MAXPAIRS; X if((memptr = (char*)malloc(memleft*sizeof(char))) X == NULL) { X tablerr(10,"Out of memory for allocation"); X } X } X strcpy(memptr, str); X retptr = memptr; X memptr += l; X memleft -= l; X return(retptr); X} X X/* ================= splitlist ================= X * unfolds the list [] to a list of characters (i.e. [a-d] = [abcd]) X * =============================================== */ XSTATIC int splitlist(inlist, unflist) Xchar *inlist, *unflist; X{ X int ch, ch1, ch2, i, len; X X convstr(inlist, inlist); /* convert codes */ X len = strlen(inlist); X if(len == 0) { X tablerr(10, "Empty list specified"); X } X X *unflist++ = *inlist++; /* save first character */ X while ( *inlist != '\0') { X ch = *inlist; X if((ch != '-') || (*(inlist+1) == '\0')) { X *unflist++ = ch; X } X else { /* the minus is inside */ X ch1 = intcode(*(inlist-1)); X ch2 = intcode(*(inlist+1)); X if(ch2 <= ch1) { X tablerr(10, "The limits in the range within the list are reversed"); X } X for(i = ch1+1; i < ch2; i++) { X *unflist++ = charcode(i); X } X } X inlist++; X } X *unflist = '\0'; X return(0); X} X X/* ======================== regerror ================== X * regerror --- routine called from within a regexp package. Aborts X * program with message X * ==================================================== */ Xvoid reg_error(s) Xchar *s; X{ X strcat(regerrstr,s); X tablerr(11,regerrstr); X} X X/* ======================== rdinshift ================== X * reads in a shift sequence, assuming that the getnblkline was called X * Fills in structure SDATA. If typ = 1, it is assumed that it is data X * for matching, if typ = 2, this is data to be output X * If OK, returns 0, else dies X * ======================================================= */ XSTATIC int rdinshift(sdstr, sttyp) XSDATA *sdstr; Xint sttyp; X{ X int mode1; X ADDR ads; X curst1 = chknblk(tabl); /* check what type delimiter */ X if(curst1 == strstart) { X mode1 = 1; X curend1 = strend; X } X else if(curst1 == liststart) { X mode1 = 2; X tablerr(10, "Lists not allowed for input SHIFT sequences"); X } X else if(curst1 == regexstart) { X mode1 = 3; X curend1 = regexend; X } X else { X tablerr(10, "Error when reading SHIFT input sequences"); X } X X if((scr1pt = getstring(curst1, curend1, tabl)) == (char*)NULL) { X tablerr(10, "Error when reading input SHIFT sequences"); X } X X convstr(scr1pt, scr1); /* convert codes in the sequence */ X scr1pt = savestring(scr1); /* save sequence in memory */ X strcpy(regerrstr, "Error in regexp for input SHIFT sequences:"); X X if(mode1 == 1) { X sdstr->typ = 0; /* common string */ X sdstr->len = strlen(scr1pt); X ads.seq = scr1pt; /* save string address */ X sdstr->ad = ads; X } X else if(mode1 == 3) { X if(sttyp == 1) { X sdstr->typ = 1; X regauxptr = reg_comp(scr1pt); X if(regauxptr == (reg_exp *)NULL) { X tablerr(10, "Error in regular expression"); X } X ads.re = regauxptr; X sdstr->ad = ads; X } X else { X sdstr->typ = 2; X sdstr->len = strlen(scr1pt); X ads.seq = scr1pt; X sdstr->ad = ads; X } X } X return(0); X} X X/* ========================= match_subs ========================== X * match_subs matches the match_data sequence description to the X * current position of the input file string (scrcurptr) and if match X * is found, finds the replacement string and puts it in scr1 buffer. X * it sets the global variables out_seq_length, out_seq_ptr, inp_seq_length X * out_set_number. Returns 1 on success, and 0 if match was not found. X * ================================================================== */ XSTATIC int match_subs(match_data, repl_data) XSDATA *match_data, *repl_data; X{ X if(chkseqs(1, match_data, scrcurptr) >= 0) { X inp_seq_length = match_data->len; /*chkseqs sets it for inp.typ 1 */ X out_set_number = repl_data->set; X if(repl_data->typ == 2) { /* if regexp substitution */ X /* find a substitution string */ X regauxptr = (match_data->ad).re; /* pointer to regexp prog */ X /* scr contains the substitution string */ X reg_sub(regauxptr, (repl_data->ad).seq, scr1); X out_seq_length = strlen(scr1); /*number of chars in substitute */ X out_seq_ptr = scr1; /* pointer to substitute string */ X } X else { /* if plain string (type = 0) */ X out_seq_length = repl_data->len; X out_seq_ptr = (repl_data->ad).seq; X } X if(out_seq_length > MAXMATCH) { X fprintf(stderr, X "The substitution string is too long (%d chararacters):\n%s\n", X out_seq_length, out_seq_ptr); X exit(1); X } X return(1); X } X else { X return(0); X } X} X X/* =================== repl_inp ============================= X * replaces matching portion of an input text with a substitute string. X * ========================================================== */ Xint repl_inp() X{ X int k, l, i; X X if(out_seq_length > MAXMATCH) { X fprintf(stderr, X "The output substitution sequence is too long (%d characters):\n%s\n", X out_seq_length, out_seq_ptr); X exit(1); X } X if(inp_seq_length >= out_seq_length) { /* do not have to copy strings */ X k = inp_seq_length - out_seq_length; /* diff in lengths */ X scrcurptr += k; /* move forwarde by the diff */ X chars_left -= k; X for (i = 0; i < out_seq_length; i++) { /* copy chars */ X *(scrcurptr + i) = *(out_seq_ptr + i); X } X } X else { /* have to push remaining chars to the right to make space */ X k = out_seq_length - inp_seq_length; /* diff in lengths */ X l = strlen(scrcurptr); /* length of input text */ X /* memmove could be used, but it is not in all libaries */ X for (i = l; i >= 0; i--) { /* move to right, start with terminating '\0' */ X *(scrcurptr + i + k) = *(scrcurptr + i); X } X for (i = 0; i < out_seq_length; i++) { /* place the output string */ X *(scrcurptr + i) = *(out_seq_ptr + i); X } X chars_left += k; /* update chars_left, scrcurptr not changed */ X } X return(0); X} X X/*======================== main ================================== */ X Xint main(argc, argv) Xint argc; Xchar **argv; X{ X char *tabl_file; /* name of file with conversion table */ X static char deftablfile[200]= X DEFCONVNAME; /* default conversion file name */ X static char deftablpath[200]= X TPATH; /* default conversion file path */ X char table_name[300]; /* working array for conversion table */ X X int level; /* input set nesting level */ X int inp_cur_set[MAXLEVEL];/* set input number being processed */ X int inp_cur_nest[MAXLEVEL]; /* current nesting count for input set */ X int cur_inp_set; /* current input set, same as inp_cur_set[level] */ X int out_cur_set; /* output set level being processed */ X int buffer_size; /* size of input buffer string */ X int opt; /* option letter */ X int mode1, mode2; /* type of string (1=str, 2=list, 3=regex) */ X X int flg, ch, i, j, k, l, n; /* aux variables */ X X X#if GETOPT X extern char *optarg; /* option argument from getopt */ X extern int optind, opterr; /* needed for getopt */ X#endif X X static char usage[]= X "Usage: translit [-i inpfil] [-o outfil] [-t convtabfil] [convtabfil]\n"; X X X inpf = stdin; /* initialize input to standard input */ /*UNIX*/ X outf = stdout; /* initialize output to standard output */ /*UNIX*/ X X /* set SIGNED_CHAR_TYPE flag */ X scr1[0] = '\372'; X if((int)scr1[0] < 0) { X SIGNED_CHAR_TYPE = 1; X } X else { X SIGNED_CHAR_TYPE = 0; X } X X/* if environment is supported */ X#if GETENV X /* if TRANSPATH variable defined, take its contents */ X if((scr1pt = getenv(TRANSPATH)) != (char *)NULL) { X strcpy(deftablpath, scr1pt); X } X if((scr1pt = getenv(DEFNAME)) != (char *)NULL) { X strcpy(deftablfile, scr1pt); X } X#endif X X tabl_file = deftablfile; /* default table file name */ X X flg = 0; /* set to no conv table given as an argument */ X i = j = k = 0; /* flags, for files specified: i-inp, j-out, k-tabl */ X while ((opt = getopt(argc, argv, OPTIONS)) != EOF) { X switch (opt) { X case 'd': X debug_flg = 1; X break; X case 'i': X if(i != 0) { X fprintf(stderr, "You specified option -i twice\n"); X return(1); X } X if((inpf = fopen(optarg, "r")) == NULL) { X fprintf(stderr,"Error: Could not find input file: %s\n", optarg); X return(1); X } X i = 1; X break; X case 'o': X if(j != 0) { X fprintf(stderr, "You specified option -o twice\n"); X return(1); X } X if((outf = fopen(optarg, "r")) != NULL) { X fprintf(stderr, X "Error: Output file: %s already exists! Delete it first.\n", optarg); X exit(3); X } X if((outf = fopen(optarg, "w")) == NULL) { X fprintf(stderr,"Error: Could not open output file: %s\n", optarg); X exit(2); X } X j = 1; X break; X X case 't': X if(k != 0) { X fprintf(stderr, "You specified option -t twice\n"); X return(1); X } X tabl_file = optarg; X flg = 1; X k = 1; X break; X case '?': X fprintf(stderr,"Error: %s\n", usage); X exit(3); X } /* end switch */ X } /* end while */ X X if(optind < argc) { /* check if translation table given w/o option -t */ X if(flg == 1) { X fprintf (stderr,"Error: You specified conversion table file twice\n"); X exit(4); X } X tabl_file = argv[optind]; X if(argc > optind + 1) { X fprintf (stderr,"Error: %s\n", usage); X exit(5); X } X } X X if((tabl = fopen(tabl_file, "r")) == NULL) { /* try to open file with table */ X strcpy(table_name, deftablpath); /* copy path to scratch string */ X strcat(table_name, tabl_file); X if((tabl = fopen(table_name, "r")) == NULL) { /* try to open path/file */ X fprintf(stderr,"Could not find the conversion table file: %s\n", X tabl_file); X exit(6); X } X } X X /* read in file version number */ X if((getnblkline(tabl, 1) < 0) || ((file_version = getnumber(tabl)) < 0)) { X tablerr(7, "Could not read file format number"); X } X if(file_version != 1) { X tablerr(10, "This format of conversion file is not supported"); X } X X /* read in delimiters */ X X rdelim(&strstart, &strend); X rdelim(&liststart, &listend); X rdelim(®exstart, ®exend); X X /* read in starting and ending sequences */ X begseq = beseq(); X endseq = beseq(); X X /* reserve memory for sequences */ X chars_left = 5*MAXPAIRS; /* size of allocated block */ X if((scr1ptr = (char*)malloc(chars_left*sizeof(char))) X == NULL) { X tablerr(10, "Out of memory for storing sequences"); X } X X /* Allocate space for inp_maps and out_sets for input set 0 */ X X allomaps(0); X X /* Read number of input sets */ X if((getnblkline(tabl, 1) <= 0) || ((n_inp_sets = getnumber(tabl)) < 0)) { X tablerr(10, "Error when reading input set count"); X } X X if(n_inp_sets >= MAXSETS) { X tablerr(10, "Too many input shift sequences"); X } X X /* read input SI/SO sequences */ X for (i = 0; i < n_inp_sets; i++) { X /* Allocate space for inp_maps and out_sets for input set i+1 */ X allomaps(i+1); X X /* read in input SHIFTs seq */ X if(getnblkline(tabl, 1) <= 0) { X tablerr(10, "Error when reading output shift sequences"); X } X rdinshift(&inp_SO_data[i], 1); X rdinshift(&inp_SO_subs[i], 2); X if((inp_SO_subs[i].typ == 2) && (inp_SO_data[i].typ == 0)) { X tablerr(10, X "Plain string type for matching and substitution expression for output"); X } X rdinshift(&inp_nest_open[i], 1); X rdinshift(&inp_nest_close[i], 1); X rdinshift(&inp_SI_data[i], 1); X rdinshift(&inp_SI_subs[i], 2); X if((inp_SI_subs[i].typ == 2) && (inp_SI_data[i].typ == 0)) { X tablerr(10, X "Plain string type for matching and substitution expression for output"); X } X X if(debug_flg == 1) { X if(inp_SO_data[i].typ == 0) { X fprintf(stderr,"%2d) inp_SO =|%s| ", i, (inp_SO_data[i].ad).seq); X } X else { X fprintf(stderr,"%2d) inp_SO =%d ", i, inp_SO_data[i].typ); X } X if((inp_SO_subs[i].typ == 0) || (inp_SO_subs[i].typ == 2)) { X fprintf(stderr,"%2d) inp_SOsub =|%s| ", i, (inp_SO_subs[i].ad).seq); X } X else { X fprintf(stderr,"%2d) inp_SOsub =%d ", i, inp_SO_subs[i].typ); X } X if(inp_nest_open[i].typ == 0) { X fprintf(stderr,"nest_open =|%s| ", (inp_nest_open[i].ad).seq); X } X else { X fprintf(stderr,"nest_open =%d ", inp_nest_open[i].typ); X } X if(inp_nest_close[i].typ == 0) { X fprintf(stderr,"nest_close =|%s| ", (inp_nest_close[i].ad).seq); X } X else { X fprintf(stderr,"nest_close =%d ", inp_nest_close[i].typ); X } X if(inp_SI_data[i].typ == 0) { X fprintf(stderr,"inp_SI =|%s|\n", (inp_SI_data[i].ad).seq); X } X else { X fprintf(stderr,"inp_SI =%d\n", inp_SI_data[i].typ); X } X if((inp_SI_subs[i].typ == 0) || (inp_SI_subs[i].typ == 2)) { X fprintf(stderr,"%2d) inp_SIsub =|%s| ", i, (inp_SI_subs[i].ad).seq); X } X else { X fprintf(stderr,"%2d) inp_SIsub =%d ", i, inp_SI_subs[i].typ); X } X } /* end debug_flg */ X } X X if((getnblkline(tabl, 1) <= 0) || ((n_out_sets = getnumber(tabl)) < 0)) { X /* read in out SHIFTs count */ X tablerr(10, "Error when reading output set count"); X } X if(n_out_sets > MAXSETS) { X tablerr(10, "Too many output SHIFT sequences requested"); X } X X for (i = 0; i < n_out_sets; i++) { X /* read in out SHIFTs seq */ X if((getnblkline(tabl, 1) <= 0) || X ((scr1pt = getstring(strstart, strend, tabl)) == (char*)NULL) || X ((scr2pt = getstring(strstart, strend, tabl)) == (char*)NULL)) { X tablerr(10, "Error when reading output shift sequences"); X } X convstr(scr1pt, scr1a); X out_SO_len[i]= strlen(scr1a); X out_SO[i] = savestring(scr1a); X X convstr(scr2pt, scr2a); X out_SI_len[i] = strlen(scr2a); X out_SI[i] = savestring(scr2a); X if(debug_flg == 1) { X fprintf(stderr,"%2d) out_SO string=|%s| out_SI string=|%s|\n", X i, out_SO[i], out_SI[i]); X } X } /* end for */ X X i = 0; X while (getnblkline(tabl, 1) > 0) { X if((inp_data[i].set = getnumber(tabl)) < 0) { /* get inp set number */ X tablerr(10, "Set number for input sequences is wrong"); X } X if(((k = inp_data[i].set) > n_inp_sets) || (k < 0) ) { X tablerr(10,"Input set number for a sequence wrong"); X } X curst1 = chknblk(tabl); /* check what type of string follows */ X if(curst1 == strstart) { X mode1 = 1; X curend1 = strend; X } X else if(curst1 == liststart) { X mode1 = 2; X curend1 = listend; X } X else if(curst1 == regexstart) { X mode1 = 3; X curend1 = regexend; X } X else { X tablerr(10, "Delimiter wrong when reading input sequences"); X } X /* get input sequence */ X if((scr1pt = getstring(curst1, curend1, tabl)) == (char*)NULL) { X tablerr(10, "Error reading input sequence"); X } X scr1pt = savestring(scr1pt); /* Save the string */ X X if((out_data[i].set = getnumber(tabl)) < -3) { /* get inp set number */ X tablerr(10, "Wrong code for the output set number"); X } X X if(out_data[i].set > n_out_sets) { X tablerr(10, "Output set number for a sequence is wrong"); X } X X curst2 = chknblk(tabl); /* check what type of string follows */ X if(curst2 == strstart) { X mode2 = 1; X curend2 = strend; X } X else if(curst2 == liststart) { X mode2 = 2; X curend2 = listend; X } X else if(curst2 == regexstart) { X mode2 = 3; X curend2 = regexend; X } X else { X tablerr(10, "Delimiter wrong when reading sequences"); X } X X if((scr2pt = getstring(curst2, curend2, tabl)) == (char*)NULL) { X tablerr(10, "Error reading input sequence"); X } X scr2pt = savestring(scr2pt); X X /* check if acceptable types for sequences */ X if((mode2 == 3) && (mode1 != 3)) { /* no regular expressions for output */ X tablerr(10, X "Regular expression as output sequence and input not a regular expression"); X } X else if((mode1 == 1) && (mode2 == 2)) { /* inp string, out list */ X tablerr(10, "You specified list for output and string for input"); X } X else if((mode1 == 3) && (mode2 == 2)) { /* inp regex, out list */ X tablerr(10, "You specified string for input and list for output"); X } X else if((mode1 == 2) && (out_data[i].set < 0)) { X tablerr(10, X "Input LIST and output set code -1/-2/-3 is not supported at this moment"); X } X X if(mode1 == 2) { /* if list for input expression */ X /* split string at - sign */ X splitlist(scr1pt, scr1); X if(mode2 == 2) { X splitlist(scr2pt, scr2); X if(strlen(scr1) != strlen(scr2)) { X tablerr(10, X "The number of codes in the input and output list is different"); X } X } X } /* end mode 2 */ X else { /* for all other modes, convert the codes */ X convstr(scr1pt, scr1a); /* convert codes in input string */ X } X X if((mode1 == 1) && (strlen(scr1a) == 1)) { /* single inp char */ X /* it is like list with a single character, so cheat */ X if(out_data[i].set >= 0) { X mode1 = 2; X strcpy(scr1, scr1a); /* make it a list */ X } X else { X tablerr(10, X "One-character input strings and output codes -1/-2/-3 are not supported\n"); X } X } X X if(mode1 == 2) { /* fill the lists for mode 2 */ X if(mode2 == 1) { /* if normal string as output sequence */ X convstr(scr2pt, scr2); X scr2pt = savestring(scr2); X } X else { /* if mode2 = 2 */ X scr2pt = scr2; X } X /* now fill the maps */ X k = inp_data[i].set; X l = out_data[i].set; X scr1pt = scr1; /* points at input list */ X scr2ptr = scr2pt; /* points at output list or string */ X while (*scr1pt != '\0') { X if(mode2 == 2) { /* prepare string with code */ X scr1a[0] = *scr2ptr++; X scr1a[1] = '\0'; X scr2pt = savestring(scr1a); X } X ch = intcode(*scr1pt); X if((*inp_maps[k])[ch] != (char *)NULL) { X fprintf(stderr, X"You have entered the character |%c| with code \\0d%d for input set %d\n", X charcode(ch), ch, k); X tablerr(10, "Delete previous references if not needed"); X } X X (*inp_maps[k])[ch] = scr2pt; /* save output sequence */ X (*out_sets[k])[ch] = l; /* save output set number */ X scr1pt++; /* next code for output */ X } X i--; /* do not save this line in inp_str and out */ X } /* end if mode1 = 2*/ X else if(mode1 == 1) { /* if multicharacter input string */ X scr1pt = savestring(scr1a); X convstr(scr2pt, scr2a); X scr2pt = savestring(scr2a); X inp_data[i].typ = 0; X inp_data[i].len = strlen(scr1pt); X (inp_data[i].ad).seq = scr1pt; X out_data[i].typ = 0; X out_data[i].len = strlen(scr2pt); X (out_data[i].ad).seq = scr2pt; X } X else if(mode1 == 3) { /* if regular expression for input */ X inp_data[i].typ = 1; X l = strlen(scr1a); /* length of converted input expression */ X if(scr1a[0] == '^') { X tablerr(10, X "The ^ (beginning anchor) is not supported"); X } X if((scr1a[l-1] == '$') && (scr1a[l-1] != '\\')) { X tablerr(10, "The $ (end anchor) is not supported"); X } X X strcpy(regerrstr, "Error in input regular expression sequence: "); X X if((regauxptr = reg_comp(scr1a)) == NULL) { X tablerr(10, "Error in the input regular expression sequence"); X } X (inp_data[i].ad).re = regauxptr; X convstr(scr2pt, scr1a); /* convert codes in out string */ X scr2pt = savestring(scr1a); X if(mode2 == 3) { /* mark type of expression plan(0)/substit string(2) */ X out_data[i].typ = 2; X } X else { X out_data[i].typ = 0; X } X out_data[i].len = strlen(scr2pt); X (out_data[i].ad).seq = scr2pt; X } X X /* advance pointers */ X X n_conv_seq = ++i; X if(n_conv_seq >= (MAXPAIRS-1)) { X tablerr(10, X "Too many transliteration sequences. Recompile program with larger MAXPAIRS"); X } X } /* end while getnblkline */ X X if(debug_flg == 1) { X fprintf(stderr,"Multicharacter input sequences \n"); X for(i=0; i < n_conv_seq; i++) { X fprintf(stderr,"%2d) inp.type=%2d inp.set=%2d out.type=%2d out.set=%2d\n", X i, inp_data[i].typ, inp_data[i].set, out_data[i].typ, out_data[i].set); X if(inp_data[i].typ == 0) { X fprintf(stderr," Inp.str=|%s| ", (inp_data[i].ad).seq); X } X fprintf(stderr, "Out.str=|%s|\n", (out_data[i].ad).seq); X } X fprintf(stderr, X "input_set charcode input_character --> output_set output_string/\n"); X for(i = 0; i <= n_inp_sets; i++) { X for(k = 0; k < 256; k++) { X if((*inp_maps[i])[k] != (char *)NULL) { X fprintf(stderr," %2d \\%04o %c --> %2d %s\n", X i, k, charcode(k), (*out_sets[i])[k], (*inp_maps[i])[k]); X } X } X } X } X X X fprintf(outf,"%s",begseq); /* output starting sequence */ X X /* transliterate input file to output file */ X X level = 0; X if(n_inp_sets > 0) { X cur_inp_set = 1; X } X else { X cur_inp_set = 0; X } X inp_cur_set[level] = cur_inp_set; /* 1st input set is a default */ X inp_cur_nest[level] = 0; X scr1ptr = scr1a; X scr1ptr[0] = '\0'; X scr2ptr = scr2a; X scr2ptr[0] = '\0'; X scrcurptr = scr2ptr; X chars_left = 0; X buffer_size = MAXBUFF/2; /* will be set to 0 if EOF */ X out_cur_set = 1; /* no output set yet */ X X X while ( buffer_size > 0) { X /* swap input buffer pointers */ X scrauxptr = scr2ptr; X scr2ptr = scr1ptr; X scr1ptr = scrauxptr; X X scrauxptr = scrcurptr; /* old buffer last pointer */ X scrcurptr = scr1ptr; /* new buffer start */ X X /* copy remains of old buffer to new one */ X strcpy(scrcurptr, scrauxptr); X X l = chars_left; X for(i = 0; i < buffer_size; i++) { /* append input chars to scr1a */ X if((k = fgetc(inpf)) == EOF) { X buffer_size = 0; /* end of file */ X break; X } X else if(k == '\0') { /* skip zero characters */ X i--; X continue; X } X else { /* if normal character */ X *(scrcurptr + l++) = k; X } X } X *(scrcurptr + l) = '\0'; /* terminate buffer with 0 */ X chars_left = l; ; /* length of combined string */ X if(buffer_size == 0) { /* if EOF */ X chars_left += MAXMATCH+1; /* fool the program that there is more */ X } X X while (chars_left > MAXMATCH) { X /* check if end of scrcurptr --- it means end of input file, since only X then it can get to the end of the string, otherwise it stops X MAXMATCH before */ X if(*scrcurptr == '\0') { /* end of file */ X if(n_out_sets > 0) { /* if multiple output sets */ X l = out_SI_len[out_cur_set - 1]; X for(i = 0; i < l; i++) { X k = out_SI[out_cur_set-1][i]; X fputc(k, outf); X } X break; X } X } X XBackstep2: X /* check if new set of input chars started */ X l = -1; X for (i = 0; i < n_inp_sets; i++) { X if(match_subs(&inp_SO_data[i], &inp_SO_subs[i]) > 0) { X l = i; X break; X } X } X if(l >= 0) { /* is SO matched */ X repl_inp(); /* substitute SO_data with SO_seqs */ X if((inp_SI_data[l].len > 0) || (inp_SI_data[l].typ == 1)){ X /* increase level only is SHIFT IN present */ X level++; /* increase number of "opened" input sets */ X inp_cur_nest[level] = 0; /* It is new level,zero nesting sequences */ X if(level > MAXLEVEL) { X fprintf(stderr, X "Too many nested input character sets in input file\n"); X exit(39); X } X } X l++; /* sets in arrays are saved starting from 0, X i.e., set nr 1 corresponds to element [0], 2 --> [1], etc. */ X inp_cur_set[level] = l; /* save set number at current nesting level */ X cur_inp_set = l; X continue; X } X /* check if SHIFT IN sequence for current input set */ X if(n_inp_sets > 0) { X /* check SI sequence only when nesting count is 0 */ X if(inp_cur_nest[level] == 0) { X if(match_subs(&inp_SI_data[cur_inp_set-1], X &inp_SO_subs[cur_inp_set-1]) > 0) { /* is SI */ X repl_inp(); X level--; X if(level < 0) { X level = 0; X fprintf(stderr, X"More SHIFT_IN sequences than corresponding SHIFT_OUT sequences in text\n"); X } X cur_inp_set = inp_cur_set[level]; /* set previous inp set number */ X continue; X } X } X } X X /* Now check if the input sequence corresponding to cur_inp_set X matches the string */ X XBackstep1: /* if output set number is -1, start again */ X flg = -1; X for(i = 0; i < n_conv_seq; i++) { X k = inp_data[i].set; /*get set number for current transliteration seq */ X if((k == cur_inp_set) || (k == 0)) { /* if equal to current or 0 */ X if(match_subs(&inp_data[i], &out_data[i]) > 0) { X if(out_set_number < 0) { /* if backsteping */ X repl_inp(); /* replace */ X if (out_set_number == -1) { X flg = -1; X } X else if (out_set_number == -2) { X goto Backstep1; X } X else if (out_set_number == -3) { X goto Backstep2; X } X } X else { /* if set number >= 0 */ X flg = i; X break; X } X } X } X } X X if(flg < 0) { /* if no matching input multichar sequence found */ X ch = intcode(*scrcurptr); /* current input character */ X if((out_seq_ptr = (*inp_maps[cur_inp_set])[ch]) != NULL) { X /* if out_seq exists for current input set */ X out_set_number = (*out_sets[cur_inp_set])[ch]; X flg = 1; X } X else if((out_seq_ptr = (*inp_maps[0])[ch]) != NULL) { X /* if out_seq exitst for set number 0 */ X flg = 1; X out_set_number = (*out_sets[0])[ch]; X } X if(flg >= 0) { /* set other things */ X out_seq_length = strlen(out_seq_ptr); X inp_seq_length = 1; X } X } X X if(flg < 0) { /* if no match found, copy the input char to output */ X scr1[0] = *scrcurptr; X scr1[1] = '\0'; X if(*scrcurptr != '\0') { X out_seq_length = 1; X } X else { X out_seq_length = 0; X } X X inp_seq_length = 1; X out_set_number = 0; X out_seq_ptr = scr1; X } X X /* At this point all matches and substitutuions have been done */ X X /* check if nesting sequences found for a given set and increase or X decrease nesting if needed */ X if((n_inp_sets > 0) && (out_cur_set > 0)) { X for(i = 0; i < inp_seq_length; i++) { X if(chkseqs(1, &inp_nest_close[cur_inp_set-1], scrcurptr+i) >= 0) { X inp_cur_nest[level]--; X } X if(chkseqs(1, &inp_nest_open[cur_inp_set-1], scrcurptr+i) >= 0) { X inp_cur_nest[level]++; X } X } X } X X /* output the SI/SO sequences if output set changed */ X if((n_out_sets > 0) && X (out_set_number > 0)) { /* check if multiple output sets */ X if(out_cur_set != out_set_number) { /* if new set starts */ X if(out_cur_set > 0) { /* put SHIFT IN for a previous set */ X l = out_SI_len[out_cur_set-1]; /* old SHIFT IN seq length */ X for(i = 0; i < l; i++) { /* output old SHIFT IN */ X k = out_SI[out_cur_set-1][i]; X fputc(k, outf); X } X } X out_cur_set = out_set_number; /* make it current now */ X if(out_cur_set > 0) { X l = out_SO_len[out_cur_set-1]; /* length of SHIFT OUT sequence */ X for(i = 0; i < l; i++) { /* output SHIFT OUT seq for this set */ X k = out_SO[out_cur_set-1][i]; X fputc(k, outf); X } X } X } /* end out_set changes */ X } /* if multiple output sets specified */ X X /* now output the corresponding sequence */ X for(i = 0; i < out_seq_length; i++) { X k = *(out_seq_ptr+i); X fputc(k,outf); X } X X /* move past processed input text */ X scrcurptr += inp_seq_length; X chars_left -= inp_seq_length; X X X } /* while scanning input characters */ X X } /* end while reading input file */ X fprintf(outf,"%s",endseq); /* output ending sequence */ X fclose(inpf); X fclose(outf); X exit(0); X} X END_OF_FILE if test 54601 -ne `wc -c <'translit.c'`; then echo shar: \"'translit.c'\" unpacked with wrong size! fi # end of 'translit.c' fi echo shar: End of archive 3 $of 10$. cp /dev/null ark3isdone MISSING="" for I in 1 2 3 4 5 6 7 8 9 10 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 10 archives. rm -f ark[1-9]isdone ark[1-9][0-9]isdone else echo You still must unpack the following archives: echo " " ${MISSING} fi exit 0 exit 0 # Just in case...