home *** CD-ROM | disk | FTP | other *** search
Text File | 1993-03-21 | 59.4 KB | 1,868 lines |
- Newsgroups: comp.sources.misc
- From: jkl@osc.edu (Jan Labanowski)
- Subject: v36i025: translit - transliterate foreign alphabets, Part03/10
- Message-ID: <1993Mar19.224349.11873@sparky.imd.sterling.com>
- X-Md4-Signature: e266dff38e849f207e0bbd3b969b2c3f
- Date: Fri, 19 Mar 1993 22:43:49 GMT
- Approved: kent@sparky.imd.sterling.com
-
- Submitted-by: jkl@osc.edu (Jan Labanowski)
- Posting-number: Volume 36, Issue 25
- Archive-name: translit/part03
- Environment: UNIX, MS-DOS, VMS
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # Contents: order.txt translit.c
- # Wrapped by kent@sparky on Fri Mar 19 16:00:10 1993
- PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH
- echo If this archive is complete, you will see the following message:
- echo ' "shar: End of archive 3 (of 10)."'
- if test -f 'order.txt' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'order.txt'\"
- else
- echo shar: Extracting \"'order.txt'\" \(2315 characters\)
- sed "s/^X//" >'order.txt' <<'END_OF_FILE'
- X From:
- X
- X
- X
- X
- X To: JKL ENTERPRISES, INC.
- X P.O.Box 21821
- X Upper Arlington, OH 43221-0821
- X
- X
- X Please send me the executable, ready to run TRANSLIT program on
- X a diskette for the IBM-PC computer or compatible for MS-DOS 2.1
- X or higher [see footnote].
- X
- X I request the following medium (choose one):
- X
- X _____ 5.25 inch 360 kByte diskette
- X
- X _____ 3.5 inch 720 kByte diskette
- X with installation instructions. I understand that I will also receive
- X the transliteration tables and complete source of the program and
- X documentation as disk files.
- X
- X I understand that the TRANSLIT program comes without any warranty, and
- X the only claim which I can make towards JKL ENTERPRISES, INC. is to
- X replace a defective diskette. I also understand that this offer may be
- X withdrawn at any time.
- X
- X Prepaid orders (i.e., orders accompanied with a check or money order)
- X for the program are $15 (fifteen US Dollars). If an invoice is requested
- X there will be additional charge of $5 (five US Dollars) for processing.
- X There is no shipping and handling charge for orders within US. Outside
- X US please add the $1 dollar for Shipping and Handling. Diskette with the
- X program will be sent via 1st Class Mail or Air Mail, whichever applies.
- X I understand that the shipment will be made when funds are received.
- X
- X I enclose (fill in appropriate blanks):
- X
- X for the program ($15 per disk): ______
- X
- X for the invoice to be billed later ($5) ______
- X
- X Shipping/Handling if outside USA ($1 per disk) ______
- X -------------------------------------------------------------
- X
- X Total: ______
- X
- X Ohio residents must add 5.75% tax of the Total: ______
- X
- X Total+Tax(if Ohio Resident) ______
- X
- X Please send the program to the following address:
- X
- X Name: __________________________________
- X
- X Organization: __________________________________
- X
- X Address: __________________________________
- X
- X Town, State, Zip-code: __________________________________
- X
- X_____________________________________________________
- X [Footnote]: inquire for other computers/operating systems.
- END_OF_FILE
- if test 2315 -ne `wc -c <'order.txt'`; then
- echo shar: \"'order.txt'\" unpacked with wrong size!
- fi
- # end of 'order.txt'
- fi
- if test -f 'translit.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'translit.c'\"
- else
- echo shar: Extracting \"'translit.c'\" \(54601 characters\)
- sed "s/^X//" >'translit.c' <<'END_OF_FILE'
- X/* This is a program fom transliterating files from one character set to
- X another.
- X TRANSLIT --- Version 1.0, Jan. 10, 1993.
- X Copyright (c) by Jan Labanowski, 1993 and JKL Enterprises, Inc.
- X Permission is given to disribute this program freely in accordance with
- X the rules and conditions spelled out in the program documentation. If you
- X got this program without the documentation, or if some files were missing,
- X somebody must have violated the rules. In this case, please delete the the
- X program and obtain the information on how to get the complete distribution
- X from the author. The rules require that the whole package is distributed
- X (i.e., the source code, the transliteration tables, and the documentation).
- X
- X Author: Jan Labanowski, P.O.Box 21821, Columbus, OH 43221-0821, USA
- X E-mail: jkl@osc.edu, JKL@OHSTPY.BITNET
- X */
- X
- X#include "paths.h" /* for local definitions */
- X#include "reg_exp.h" /* for regexp package */
- X
- X
- X#define OPTIONS "i:o:t:d" /* allowed options on command line */
- X#define MAXPAIRS 1000 /* maximum number of conversion pairs */
- X#define MAXSETS 10 /* maximum number of shift in/out sets */
- X#define MAXLEVEL 10 /* maximum set nesting level */
- X#define MAXBUFF 1000 /* maximum size of the buffer */
- X#define MAXMATCH 100 /* maximum length of match to regular exp. */
- X
- X
- X/* define all local functions as static if compiler likes it */
- X#if STATICFUN
- X#define STATIC static
- X#else
- X#define STATIC
- X#endif
- X
- X/* types to hold the translation maps for single chars. If inp_maps is of
- X * type IMAPP, then character of code c in set k will correspond to
- X * a string pointed by (*inp_maps[k])[c];
- X */
- X
- X/* ========== now include definitions for paths and regexp */
- X
- X
- Xtypedef char* IMAP[256]; /* type IMAP is a 256 element array
- X of pointers to string */
- Xtypedef IMAP* IMAPP[MAXSETS]; /* array of pointers to IMAP */
- X
- X/* types to hold output set number for a single char. if out_sets is of type
- X * OSETP and c is a code, and k is the set of input character, then output
- X * set number is (*out_sets[k])[c]; */
- Xtypedef int OSET[256];
- Xtypedef OSET* OSETP[MAXSETS];
- X
- X
- Xtypedef union {
- X char *seq; /* pointer to a string */
- X reg_exp *re; /* pointer to a regular expression "program */
- X } ADDR;
- X
- X
- Xtypedef struct {
- X int typ; /* type of pointer in ADDR union:
- X 0-string (seq), 1-input regexp (re),
- X 2-output regexp (seq) (output regexp
- X is a string !)*/
- X int len; /* length of a string if present */
- X int set; /* character set number for the string */
- X ADDR ad; /* string or regexp program */
- X } SDATA;
- X
- X
- X/* Some compiler represent character codes > 127 as negative numbers, i.e.,
- X * character 255 is -1, char 254 is -2, etc.
- X * The flag SIGNED_CHAR_TYPE is set by the program, (program checks which
- X * convention is used. It is set to 1, if characters have sign (i.e., 255=-1)
- X * and is set to 0 if characters are unsigned (i.e., 255=255). Do not touch
- X * this declaration, unless you know what you are doing.
- X */
- Xint SIGNED_CHAR_TYPE;
- X
- X char tabline[MAXBUFF]; /* line of text from conversion table file */
- X char last_tab_line[MAXBUFF];
- X char *lineptr; /* pointer to the first unread character of tabline */
- X int n_line_chars; /* number of characters in tabline buffer */
- X int chars_left; /* no. of chars left in input buffer */
- X
- X int memleft; /* tells how much memory is left in allocated area */
- X char *memptr; /* pointer for memory allocation area */
- X char regerrstr[100]; /* string to hold error message from regular exp */
- X reg_exp *regauxptr; /* aux pointer for regular expresion structure */
- X int debug_flg=0; /* if 1, then additional info sent to stderr */
- X
- X FILE *inpf; /* input file pointer */
- X FILE *outf; /* output file pointer */
- X FILE *tabl; /* file with translation table */
- X
- X int n_conv_seq; /* number of conversion sequences */
- X SDATA inp_data[MAXPAIRS]; /* structure to hold types, lengths and pointers
- X for input sequences */
- X SDATA out_data[MAXPAIRS]; /* structure to hold types lengths and pointers
- X for output sequences */
- X SDATA inp_SO_data[MAXSETS]; /* structure with types, lens, ptrs for inp_SO*/
- X SDATA inp_SO_subs[MAXSETS]; /* holds substitution string/regexp for inp_SO*/
- X SDATA inp_SI_data[MAXSETS]; /* structure with types, lens, ptrs for inp_SI*/
- X SDATA inp_SI_subs[MAXSETS]; /* holds substitution string/regexp for inp SI*/
- X SDATA inp_nest_open[MAXSETS]; /* for sick transliteration cases, like TeX */
- X SDATA inp_nest_close[MAXSETS]; /* where you need to count {} pairs */
- X
- X SDATA *junky;
- X
- X IMAPP inp_maps; /* maps for single character sequences,
- X array of pointers. Element of the array is a pointer
- X to the array of pointers which point at strings */
- X OSETP out_sets; /* output set numbers corresponding to inp_maps
- X array of pointers to integer pointers */
- X int n_inp_sets; /* number of input sets */
- X int n_out_sets; /* number of output sets */
- X char *out_SI[MAXSETS]; /* pointer to output shift in sequences */
- X char *out_SO[MAXSETS]; /* pointer to output shift out sequences */
- X int out_SI_len[MAXSETS]; /* out SI sequence length */
- X int out_SO_len[MAXSETS]; /* out SO sequence length */
- X
- X char *begseq, /* sequence to be written at the beginning of output*/
- X *endseq; /* sequence to be written at the end of output file */
- X
- X int file_version; /* conversion table version number */
- X int strstart, strend, /* codes delimiting strings */
- X liststart, listend, /* codes delimiting lists */
- X regexstart, regexend, /* codes delimiting expressions */
- X curst1, curend1,
- X curst2, curend2; /* auxiliary */
- X
- X char scr1[MAXBUFF], scr2[MAXBUFF], /* scratch space */
- X scr1a[MAXBUFF], scr2a[MAXBUFF]; /* scratch space */
- X char *scr1ptr, *scr2ptr, *scr1pt, *scr2pt,
- X *scrauxptr, *scrcurptr; /* aux string pointers */
- X
- X int inp_seq_length; /* length of input sequence */
- X int out_seq_length; /* length of output sequence */
- X int out_set_number; /* number of output set */
- X char *out_seq_ptr; /* pointer to output sequence */
- X
- X reg_exp *reg_comp();
- X int reg_try();
- X void reg_sub();
- X void reg_error();
- X
- X/* fix if no strchr routine in the libarry */
- X#if STRCHR
- X#else
- X#define strchr indexfun
- X#endif
- X
- X/* this is index which is equivalent to strchr */
- Xchar *indexfun (s, c)
- Xchar *s;
- Xint c;
- X {
- X while (*s)
- X if (c == *s) return (s);
- X else s++;
- X return (NULL);
- X }
- X
- X
- X/* ============================================================ */
- X
- X/* include code for getopt() if not known to compiler */
- X
- X#if GETOPT
- X#else
- X
- X/*
- X This is a some getopt I took from the net and do not remember
- X who actually wrote this
- X*/
- X
- X#define ARGCH (int)':'
- X#define BADCH (int)'?'
- X#define EMSG ""
- X#define ENDARGS "--"
- X
- X/*
- X * get option letter from argument vector
- X */
- Xint opterr = 1, /* useless, never set or used */
- X optind = 1, /* index into parent argv vector */
- X optopt; /* character checked for validity */
- Xchar *optarg; /* argument associated with option */
- X
- X#define tell(s) fputs(*nargv,stderr);fputs(s,stderr); \
- X fputc(optopt,stderr);fputc('\n',stderr);return(BADCH);
- X
- X
- XSTATIC int getopt(nargc,nargv,ostr)
- Xint nargc;
- Xchar **nargv,
- X *ostr;
- X{
- X static char *place = EMSG; /* option letter processing */
- X register char *oli; /* option letter list index */
- X
- X if(!*place) { /* update scanning pointer */
- X if(optind >= nargc || *(place = nargv[optind]) != '-' ||
- X !*++place) return(EOF);
- X if (*place == '-') { /* found "--" */
- X ++optind;
- X return(EOF);
- X }
- X } /* option letter okay? */
- X if ((optopt = (int)*place++) == ARGCH || !(oli = strchr(ostr,optopt))) {
- X if(!*place) ++optind;
- X tell(": illegal option -- ");
- X }
- X if (*++oli != ARGCH) { /* don't need argument */
- X optarg = NULL;
- X if (!*place) ++optind;
- X }
- X else { /* need an argument */
- X if (*place) optarg = place; /* no white space */
- X else if (nargc <= ++optind) { /* no arg */
- X place = EMSG;
- X tell(": option requires an argument -- ");
- X }
- X else optarg = nargv[optind]; /* white space */
- X place = EMSG;
- X ++optind;
- X }
- X return(optopt); /* dump back option letter */
- X}
- X
- X#endif
- X
- X
- X/* ================= charcode ======================
- X * returns the code of the character given its integer code. If global
- X * variable SIGNED_CHAR_TYPE flag is 1, then character code is negative
- X * for chars >= 128, otherwise they are passed through.
- X * ================================================== */
- XSTATIC char charcode (intcde)
- Xint intcde;
- X{
- X if(SIGNED_CHAR_TYPE == 1) { /* if signed chars used */
- X if(intcde >= 128) { /* if integer code is larger than 128 */
- X return((char)(intcde - 256)); /* make it negative complement */
- X }
- X else { /* return the original code */
- X return((char)intcde);
- X }
- X }
- X else {
- X return((char)intcde);
- X }
- X}
- X/* ================= intcode =====================
- X * returns integer code of a character depending on the value of
- X * SIGNED_CHAR_TYPE flag
- X * =============================================== */
- Xint intcode(charcde)
- Xchar charcde;
- X{
- X if(SIGNED_CHAR_TYPE == 1) { /* if signed chars used */
- X if((int)charcde < (int)0) { /* if negative code */
- X return((int)((int)charcde + 256)); /* convert to positive */
- X }
- X else { /* return the original code */
- X return((int)charcde);
- X }
- X }
- X else {
- X return((int)charcde);
- X }
- X}
- X
- X/* ================= tablerr ======================
- X * terminates the program with a message to stderr and contents of
- X * the buffer. num - exit status, errmsg - message
- X * ================================================ */
- XSTATIC int tablerr(num, errmsg)
- Xint num;
- Xchar *errmsg;
- X{
- X fprintf(stderr,"%s\n", errmsg);
- X fprintf(stderr,
- X "Current contents of the input buffer for conversion table file:\n");
- X fprintf(stderr,"%s\n", last_tab_line);
- X exit(num);
- X return(0); /* to keep compiler happy that there is return from function */
- X}
- X
- X/* ================= getnblkline ====================
- X * gets a nonblank line from tabl file and resets pointers if clearflg == 1,
- X * otherwise, appends the line to the current buffer.
- X * Line is stored in the global variable tabline. The global *lineptr
- X * is reset to line beginning.
- X * If EOF reached, or line too long, returns -1, else a number of chars in
- X * the line. If line starts with # or ! in first column, it is skipped.
- X * ================================================== */
- XSTATIC int getnblkline(fileptr, clearflg)
- XFILE *fileptr;
- Xint clearflg;
- X{
- X int l, maxc;
- X char *auxptr;
- X if(clearflg == 1) {
- X n_line_chars = 0;
- X lineptr = tabline;
- X }
- X maxc = MAXBUFF - n_line_chars -2; /* how much space in the buffer */
- X while (fgets (lineptr, maxc, fileptr) != NULL) {
- X strcpy(last_tab_line, lineptr); /* save current line for error messages */
- X if((*lineptr == '#') || (*lineptr == '!')) { /* skip comment lines */
- X continue;
- X }
- X l = strlen(lineptr); /* how many chars we read ? */
- X n_line_chars += l; /* how many chars in the buffer */
- X
- X if(n_line_chars > MAXBUFF-5) { /* if line too long */
- X return(-1);
- X }
- X if(clearflg == 1) { /* do it only if first line is fetched */
- X auxptr = lineptr;
- X while ((isspace(*auxptr) != 0) && (*auxptr != '\0')) {
- X auxptr++; /* skip front spaces */
- X }
- X
- X if(*auxptr == '\0') { /* if blank line */
- X continue;
- X }
- X }
- X return(l); /* return length of line just read */
- X } /* end while */
- X return(-1); /* end of file found */
- X}
- X/* ================= chknblk ==========================
- X * returns a code of the first nonblank character at the current position
- X * of tabline buffer. The lineptr is left at this char (NOT AT THE NEXT CHAR !
- X * If no nonblank * character, returns -1.
- X * ==================================================== */
- XSTATIC int chknblk(fileptr)
- XFILE *fileptr;
- X{
- X int ch;
- X
- X Fetch_next:
- X while (*lineptr != '\0') {
- X if(isspace(*lineptr) == 0) {
- X ch = intcode(*lineptr);
- X return(ch);
- X }
- X lineptr++;
- X }
- X if(getnblkline(fileptr, 0) > 0) {
- X goto Fetch_next;
- X }
- X else {
- X return(-1);
- X }
- X}
- X
- X/* ================= getnumber ===========================
- X * retrieves integer nonnegative decimal number from a the current
- X * line (tabline).
- X * Returns the number, or -9999 if no good number in the line
- X * only number < 1000 allowed
- X * ========================================================== */
- XSTATIC int getnumber(fileptr)
- XFILE *fileptr;
- X{
- X int num, flg, sign;
- X num = 0;
- X flg = 0;
- X sign = 0;
- X
- XNext_line:
- X while (*lineptr != '\0') {
- X if(flg == 0) { /* only spaces found till now */
- X if(isspace(*lineptr) != 0) {
- X lineptr++;
- X continue;
- X }
- X else {
- X flg = 1; /* the nonblank char found */
- X }
- X }
- X
- X if(flg == 1) { /* the nonblank char was found */
- X if(sign == 0) { /* sign may only be located before the number */
- X if(*lineptr == '-') {
- X sign = -1;
- X lineptr++;
- X }
- X else if(*lineptr == '+') {
- X sign = 1;
- X lineptr++;
- X }
- X else { /* set it to +1, so it is checked only once */
- X sign = 1;
- X }
- X }
- X if(isdigit(*lineptr) != 0) {
- X num = 10*num + *lineptr - '0';
- X if(num > 1000) {
- X return(-9999); /* number too large */
- X }
- X }
- X else if(isspace(*lineptr) != 0) { /* end of number */
- X return(num*sign);
- X }
- X else {
- X return(-9999); /* some strange character */
- X }
- X }
- X lineptr++; /* to next character */
- X }
- X if(flg == 1) { /* if valid number collected before '\0' */
- X return(num*sign);
- X }
- X else {
- X if(getnblkline(fileptr, 0) > 0) {
- X goto Next_line;
- X }
- X else {
- X return(-9999); /* if no number before end of file, error */
- X }
- X }
- X}
- X
- X
- X/* ================= getstring ==========================
- X * returns the pointer to the string from the tabline. The pointer is
- X * volatile, and will point to garbage after next getnblkline call, so
- X * you need to copy it (or use it) immedaitely after the call.
- X * Returns a pointer to string if successful, and NULL pointer if not.
- X * startcode --- character code which starts the string (it is not
- X * included in the string. If startcode = '\0', the
- X * string is collected from the curent pointer to a buffer.
- X * endcode --- character which ends the string. It is not included in
- X * the string. If endcode = '\0', then string is collected
- X * until first blank or end of string found.
- X * If no startcode found or no endcode found, the NULL string is returned.
- X * ========================================================= */
- X
- XSTATIC char *getstring(startcode, endcode, fileptr)
- Xint startcode, endcode;
- XFILE *fileptr;
- X{
- X int flg;
- X char *startptr;
- X
- X flg = 0;
- XRead_next_line:
- X while (*lineptr != '\0') {
- X if(flg == 0) { /* if startcode not found yet */
- X if(startcode != 0) {
- X if(charcode(startcode) == *lineptr) {
- X flg = 1; /* the startcode found */
- X lineptr++;
- X startptr = lineptr; /* skip startcode */
- X continue;
- X }
- X else {
- X lineptr++;
- X continue;
- X }
- X }
- X else { /* startcode is 0 */
- X flg = 1;
- X startptr = lineptr;
- X lineptr++;
- X continue;
- X }
- X } /* end flg == 0 */
- X
- X if(flg == 1) { /* the 1st char was found */
- X if(endcode == 0) { /* if stop on blank requested */
- X if(isspace(*lineptr) != 0) { /* if space found */
- X *lineptr = '\0'; /* mark string end */
- X lineptr++; /* advance pointer */
- X return(startptr);
- X }
- X else { /* collect chars */
- X lineptr++;
- X continue;
- X }
- X }
- X else if(charcode(*lineptr) == endcode) { /* if stop at endcode */
- X *lineptr = '\0';
- X lineptr++;
- X return(startptr);
- X }
- X else { /* if not endcode , collect next characters */
- X lineptr++;
- X continue;
- X }
- X } /* end flg == 1 */
- X } /* end while */
- X
- X /* the buffer was exhausted */
- X if(endcode == 0) {
- X return(startptr);
- X }
- X else {
- X if(getnblkline(fileptr, 0) > 0) {
- X goto Read_next_line;
- X }
- X else {
- X return((char *)NULL);
- X }
- X }
- X}
- X
- X/* ============================ convnum ===================
- X * returns a nonegative number based on str. Scans the string
- X * from position posbeg, and returns first invalid character position
- X * in posend. If error, returns -1 (less than 2 characters, num > 255).
- X * str - scanned string
- X * digits string of allowed ordered digits in lowercase
- X * posbeg - start
- X * ======================================================== */
- X
- XSTATIC int convnum(buff, digits, posbeg)
- Xchar *buff, *digits;
- Xint posbeg;
- X{
- X int num, i, l, d, base;
- X
- X base = strlen(digits);
- X num = 0;
- X i = posbeg;
- X
- X while (buff[i] != '\0') {
- X d = -1;
- X for(l = 0; l < base; l++) {
- X if(buff[i] == digits[l]) {
- X d = l;
- X break;
- X }
- X }
- X if(d >= 0) {
- X num = d + num*base;
- X if(num > 255) { /* if code too large */
- X return(-1);
- X }
- X i++;
- X }
- X else {
- X break;
- X }
- X }
- X if((i - posbeg) < 2) { /* if less than two characters in a number */
- X return(-1);
- X }
- X buff[i] = '\0';
- X return(num);
- X}
- X
- X
- X/* ============================ str2code ==================
- X * str2code returns a code specified in buff. The valid numbers must have at
- X * least 2 digits. Here is a format of the code string
- X * (n represents valid digit for a given base).
- X * nnn (up to 3 decimal digits, first is not zero)
- X * 0nnn (up to 3 octal digits)
- X * 0xnn (up to 3 hex digits)
- X * 0onnn (up to 3 octal digits)
- X * 0dnnn (up to 3 decimal digits)
- X * The buff string will have '\0' put at the position after a valid number
- X * If no valid number can be parsed, or number is greater than 255, -1
- X * is returned.
- X * ======================================================== */
- XSTATIC int str2code(buff)
- Xchar *buff;
- X{
- X int i, l, num;
- X static char decdig[]="0123456789",
- X octdig[]="01234567",
- X hexdig[]="0123456789abcdef";
- X
- X l = strlen(buff); /* get length */
- X if(l < 2) { /* string too short */
- X return(-1);
- X }
- X
- X for(i = 0; i < l; i++) { /* convert to lowercase */
- X if(isalpha(buff[i]) != 0) { /* if letter */
- X buff[i] = tolower(buff[i]);
- X }
- X }
- X
- X if(isdigit(buff[0]) == 0) { /* if first char not a digit */
- X return(-1);
- X }
- X
- X if(buff[0] == '0') { /*if starting char is 0, then octal */
- X if((num = convnum(buff, octdig, 0)) != -1) { /* check if no base */
- X return(num);
- X }
- X }
- X else { /* this has to be a decimal number */
- X if((num = convnum(buff, decdig, 0)) != -1) {
- X return(num);
- X }
- X else { /* error in decimal number */
- X return(-1);
- X }
- X }
- X /* the base is specified at buff[1] */
- X if(buff[1] == 'o') {
- X num = convnum(buff, octdig, 2);
- X }
- X else if(buff[1] == 'd') {
- X num = convnum(buff, decdig, 2);
- X }
- X else if(buff[1] == 'x') {
- X num = convnum(buff, hexdig, 2);
- X }
- X else { /* no base found */
- X return(-1);
- X }
- X return(num);
- X}
- X
- X/* ================= convstr ========================
- X * copies inp_string to out_strings, and when codes are given as \xxx
- X * converts them to characters.
- X * Returns:
- X * 0 if OK
- X * 1 if character zero (e.g., \00 or \0x0) is found (it ends the string
- X * processing, since it is string terminator).
- X * =================================================== */
- XSTATIC int convstr(inp_string, out_string)
- Xchar *inp_string, *out_string;
- X{
- X int ch, ch1, i, l, num, n;
- X char buff[8];
- X
- X n = 0;
- X while ((ch = *inp_string) != '\0') {
- X n++; /* count characters */
- X if(ch == '\\') {
- X /* skip blank sequence */
- X ch1 = *(inp_string + 1); /* charcode following "\" */
- X if(isspace(ch1) != 0) { /* if "\" followed by blanks */
- X inp_string++; /* skip over "\" */
- X n++;
- X while( isspace((*inp_string)) != 0) { /* skip all spaces */
- X n++;
- X inp_string++;
- X }
- X n--; /* it will be advanced at the top of loop */
- X ch = *inp_string;
- X if(ch == '\0') {
- X *out_string = '\0';
- X return(0);
- X }
- X continue; /* start new loop turn */
- X } /* ch is space */
- X
- X /* now check is \020, etc., i.e., codes */
- X for(i = 1; i <= 6; i++) { /* copy possible number to a buffer */
- X buff[i-1] = *(inp_string+i);
- X }
- X buff[6] = '\0'; /* terminate buff */
- X if((num = str2code(buff)) >= 0) {
- X *out_string++ = charcode(num);
- X /* find how many characters have beed used ( number + \ ) */
- X if(num == 0) {
- X *out_string = '\0';
- X return(1);
- X }
- X l = strlen(buff) + 1;
- X inp_string += l;
- X continue;
- X }
- X }
- X *out_string++ = *inp_string;
- X inp_string++;
- X } /* end while */
- X *out_string = '\0';
- X return(0);
- X}
- X
- X
- X/* ====================== compstr ========================
- X * returns 1 if str1 is located at the beginning of str2 and 0 otherwise
- X * ======================================================= */
- XSTATIC int compstr(str1, str2)
- Xchar str1[], str2[];
- X{
- X int i;
- X if(str1[0] == '\0') { /* empty sequence never matches */
- X return(0);
- X }
- X for(i = 0; str1[i] != '\0'; i++) {
- X if(str1[i] != str2[i]) {
- X return(0);
- X }
- X }
- X return(1);
- X}
- X
- X/* ====================== chkseqs ============================
- X * returns the sequence number if sequence is present at the beginning
- X * of buffer and -1 otherwise (first sequence has number 0);
- X * If regular expression, then SDATA.len is set to the length of
- X * the string which matches the regular expession.
- X * ============================================================= */
- XSTATIC int chkseqs(n_seq, seqstruc, buff)
- Xint n_seq;
- XSDATA *seqstruc;
- Xchar *buff;
- X{
- X int i, j, l;
- X char *sp, *ep, *str;
- X reg_exp *reaux;
- X
- X if(n_seq == 0) {
- X return(-1);
- X }
- X for (i = 0; i < n_seq; i++) {
- X if(seqstruc->typ == 0) { /* if plain string */
- X str = (seqstruc->ad).seq;
- X if(*str != '\0') {
- X l = 1;
- X for(j = 0; *str != '\0'; j++) {
- X if(*(buff + j) != *str++) {
- X l = 0;
- X break;
- X }
- X }
- X if(l == 1) {
- X return(i);
- X }
- X }
- X }
- X else if(seqstruc->typ == 1) { /* regexp */
- X if(reg_try((seqstruc->ad).re, buff) == 1) { /* if anchored match found */
- X reaux = (seqstruc->ad).re; /* get address of search program */
- X sp = reaux->startp[0]; /* address of 1st char of match */
- X ep = reaux->endp[0]; /* next char after match */
- X if(sp != buff) { /* matches are anchored, at the buff beginning ! */
- X tablerr(10, "Internal error in regexp package\n");
- X }
- X l = seqstruc->len = ep - sp; /* match length */
- X if(l <= 0) {
- X fprintf(stderr,"Error when matching regular expression %d\n", i+1);
- X exit(10);
- X }
- X return(i);
- X }
- X }
- X seqstruc++;
- X } /* end for */
- X return(-1);
- X}
- X
- X/* =================== rdelim ==================
- X * read delimiters from tabl file
- X * ============================================= */
- XSTATIC int rdelim(startd, endd)
- Xint *startd, *endd;
- X{
- X if(getnblkline(tabl, 1) < 0) {
- X tablerr(10, "Could not read left delimiter code");
- X }
- X if((*startd = chknblk(tabl)) < 0) {
- X tablerr(10, "Could not read left delimiter code");
- X }
- X lineptr++; /* point at next char */
- X if(isspace(*lineptr) == 0) {
- X tablerr(10,
- X "(Left Delimiter):Delimiters should be single chars separated by spaces");
- X }
- X if((*endd = chknblk(tabl)) < 0) {
- X tablerr(10, "Could not read right delimiter code");
- X }
- X lineptr++; /* point at next char */
- X if(isspace(*lineptr) == 0) {
- X tablerr(10,
- X "(Right Delimiter):Delimiters should be single chars separated by spaces");
- X }
- X return(0);
- X}
- X
- X/* ================== beseq ==============
- X * read starting or ending sequence for output
- X * and return pointer
- X * ======================================= */
- XSTATIC char* beseq()
- X{
- X char *scr1pt, *scr2pt;
- X int l;
- X
- X if((getnblkline(tabl, 1) < 0) ||
- X ((scr1pt = getstring(strstart,strend,tabl)) == (char*)NULL)) {
- X tablerr(10, "Error when reading starting/ending sequence");
- X }
- X l = strlen(scr1pt) + 1;
- X if((scr2pt = (char*)malloc(l*sizeof(char))) == NULL) {
- X tablerr(10, "Out of memory");
- X }
- X convstr(scr1pt,scr2pt);
- X return(scr2pt);
- X}
- X
- X/* ================= allomaps ===============
- X * Allocate space for maps
- X * ============================================ */
- XSTATIC int allomaps(n)
- Xint n;
- X{
- X int i;
- X /* Allocate space for inp_maps and out_sets for input set 0 */
- X if((inp_maps[n] = (IMAP*)malloc(256*sizeof(char*))) == NULL) {
- X /*if failed */
- X tablerr(10, "Out of memory for storing sequences");
- X }
- X
- X if((out_sets[n] = (OSET*)malloc(256*sizeof(int))) == NULL) { /* if failed */
- X tablerr(10, "Out of memory for storing sequences");
- X }
- X
- X for(i = 0; i < 256; i++) { /* zero allocated memory */
- X (*inp_maps[n])[i] = (char*)NULL;
- X (*out_sets[n])[i] = 0;
- X }
- X return(0);
- X}
- X
- X/* =================== savestring =================
- X * saves string in the allocated storage and returns pointer to it
- X * does all the housekeeping
- X * ================================================== */
- XSTATIC char *savestring(str)
- Xchar *str;
- X{
- X int l;
- X char *retptr;
- X
- X l = strlen(str)+1;
- X if(memleft < l) {
- X memleft = 5*MAXPAIRS;
- X if((memptr = (char*)malloc(memleft*sizeof(char)))
- X == NULL) {
- X tablerr(10,"Out of memory for allocation");
- X }
- X }
- X strcpy(memptr, str);
- X retptr = memptr;
- X memptr += l;
- X memleft -= l;
- X return(retptr);
- X}
- X
- X/* ================= splitlist =================
- X * unfolds the list [] to a list of characters (i.e. [a-d] = [abcd])
- X * =============================================== */
- XSTATIC int splitlist(inlist, unflist)
- Xchar *inlist, *unflist;
- X{
- X int ch, ch1, ch2, i, len;
- X
- X convstr(inlist, inlist); /* convert codes */
- X len = strlen(inlist);
- X if(len == 0) {
- X tablerr(10, "Empty list specified");
- X }
- X
- X *unflist++ = *inlist++; /* save first character */
- X while ( *inlist != '\0') {
- X ch = *inlist;
- X if((ch != '-') || (*(inlist+1) == '\0')) {
- X *unflist++ = ch;
- X }
- X else { /* the minus is inside */
- X ch1 = intcode(*(inlist-1));
- X ch2 = intcode(*(inlist+1));
- X if(ch2 <= ch1) {
- X tablerr(10, "The limits in the range within the list are reversed");
- X }
- X for(i = ch1+1; i < ch2; i++) {
- X *unflist++ = charcode(i);
- X }
- X }
- X inlist++;
- X }
- X *unflist = '\0';
- X return(0);
- X}
- X
- X/* ======================== regerror ==================
- X * regerror --- routine called from within a regexp package. Aborts
- X * program with message
- X * ==================================================== */
- Xvoid reg_error(s)
- Xchar *s;
- X{
- X strcat(regerrstr,s);
- X tablerr(11,regerrstr);
- X}
- X
- X/* ======================== rdinshift ==================
- X * reads in a shift sequence, assuming that the getnblkline was called
- X * Fills in structure SDATA. If typ = 1, it is assumed that it is data
- X * for matching, if typ = 2, this is data to be output
- X * If OK, returns 0, else dies
- X * ======================================================= */
- XSTATIC int rdinshift(sdstr, sttyp)
- XSDATA *sdstr;
- Xint sttyp;
- X{
- X int mode1;
- X ADDR ads;
- X curst1 = chknblk(tabl); /* check what type delimiter */
- X if(curst1 == strstart) {
- X mode1 = 1;
- X curend1 = strend;
- X }
- X else if(curst1 == liststart) {
- X mode1 = 2;
- X tablerr(10, "Lists not allowed for input SHIFT sequences");
- X }
- X else if(curst1 == regexstart) {
- X mode1 = 3;
- X curend1 = regexend;
- X }
- X else {
- X tablerr(10, "Error when reading SHIFT input sequences");
- X }
- X
- X if((scr1pt = getstring(curst1, curend1, tabl)) == (char*)NULL) {
- X tablerr(10, "Error when reading input SHIFT sequences");
- X }
- X
- X convstr(scr1pt, scr1); /* convert codes in the sequence */
- X scr1pt = savestring(scr1); /* save sequence in memory */
- X strcpy(regerrstr, "Error in regexp for input SHIFT sequences:");
- X
- X if(mode1 == 1) {
- X sdstr->typ = 0; /* common string */
- X sdstr->len = strlen(scr1pt);
- X ads.seq = scr1pt; /* save string address */
- X sdstr->ad = ads;
- X }
- X else if(mode1 == 3) {
- X if(sttyp == 1) {
- X sdstr->typ = 1;
- X regauxptr = reg_comp(scr1pt);
- X if(regauxptr == (reg_exp *)NULL) {
- X tablerr(10, "Error in regular expression");
- X }
- X ads.re = regauxptr;
- X sdstr->ad = ads;
- X }
- X else {
- X sdstr->typ = 2;
- X sdstr->len = strlen(scr1pt);
- X ads.seq = scr1pt;
- X sdstr->ad = ads;
- X }
- X }
- X return(0);
- X}
- X
- X/* ========================= match_subs ==========================
- X * match_subs matches the match_data sequence description to the
- X * current position of the input file string (scrcurptr) and if match
- X * is found, finds the replacement string and puts it in scr1 buffer.
- X * it sets the global variables out_seq_length, out_seq_ptr, inp_seq_length
- X * out_set_number. Returns 1 on success, and 0 if match was not found.
- X * ================================================================== */
- XSTATIC int match_subs(match_data, repl_data)
- XSDATA *match_data, *repl_data;
- X{
- X if(chkseqs(1, match_data, scrcurptr) >= 0) {
- X inp_seq_length = match_data->len; /*chkseqs sets it for inp.typ 1 */
- X out_set_number = repl_data->set;
- X if(repl_data->typ == 2) { /* if regexp substitution */
- X /* find a substitution string */
- X regauxptr = (match_data->ad).re; /* pointer to regexp prog */
- X /* scr contains the substitution string */
- X reg_sub(regauxptr, (repl_data->ad).seq, scr1);
- X out_seq_length = strlen(scr1); /*number of chars in substitute */
- X out_seq_ptr = scr1; /* pointer to substitute string */
- X }
- X else { /* if plain string (type = 0) */
- X out_seq_length = repl_data->len;
- X out_seq_ptr = (repl_data->ad).seq;
- X }
- X if(out_seq_length > MAXMATCH) {
- X fprintf(stderr,
- X "The substitution string is too long (%d chararacters):\n%s\n",
- X out_seq_length, out_seq_ptr);
- X exit(1);
- X }
- X return(1);
- X }
- X else {
- X return(0);
- X }
- X}
- X
- X/* =================== repl_inp =============================
- X * replaces matching portion of an input text with a substitute string.
- X * ========================================================== */
- Xint repl_inp()
- X{
- X int k, l, i;
- X
- X if(out_seq_length > MAXMATCH) {
- X fprintf(stderr,
- X "The output substitution sequence is too long (%d characters):\n%s\n",
- X out_seq_length, out_seq_ptr);
- X exit(1);
- X }
- X if(inp_seq_length >= out_seq_length) { /* do not have to copy strings */
- X k = inp_seq_length - out_seq_length; /* diff in lengths */
- X scrcurptr += k; /* move forwarde by the diff */
- X chars_left -= k;
- X for (i = 0; i < out_seq_length; i++) { /* copy chars */
- X *(scrcurptr + i) = *(out_seq_ptr + i);
- X }
- X }
- X else { /* have to push remaining chars to the right to make space */
- X k = out_seq_length - inp_seq_length; /* diff in lengths */
- X l = strlen(scrcurptr); /* length of input text */
- X /* memmove could be used, but it is not in all libaries */
- X for (i = l; i >= 0; i--) { /* move to right, start with terminating '\0' */
- X *(scrcurptr + i + k) = *(scrcurptr + i);
- X }
- X for (i = 0; i < out_seq_length; i++) { /* place the output string */
- X *(scrcurptr + i) = *(out_seq_ptr + i);
- X }
- X chars_left += k; /* update chars_left, scrcurptr not changed */
- X }
- X return(0);
- X}
- X
- X/*======================== main ================================== */
- X
- Xint main(argc, argv)
- Xint argc;
- Xchar **argv;
- X{
- X char *tabl_file; /* name of file with conversion table */
- X static char deftablfile[200]=
- X DEFCONVNAME; /* default conversion file name */
- X static char deftablpath[200]=
- X TPATH; /* default conversion file path */
- X char table_name[300]; /* working array for conversion table */
- X
- X int level; /* input set nesting level */
- X int inp_cur_set[MAXLEVEL];/* set input number being processed */
- X int inp_cur_nest[MAXLEVEL]; /* current nesting count for input set */
- X int cur_inp_set; /* current input set, same as inp_cur_set[level] */
- X int out_cur_set; /* output set level being processed */
- X int buffer_size; /* size of input buffer string */
- X int opt; /* option letter */
- X int mode1, mode2; /* type of string (1=str, 2=list, 3=regex) */
- X
- X int flg, ch, i, j, k, l, n; /* aux variables */
- X
- X
- X#if GETOPT
- X extern char *optarg; /* option argument from getopt */
- X extern int optind, opterr; /* needed for getopt */
- X#endif
- X
- X static char usage[]=
- X "Usage: translit [-i inpfil] [-o outfil] [-t convtabfil] [convtabfil]\n";
- X
- X
- X inpf = stdin; /* initialize input to standard input */ /*UNIX*/
- X outf = stdout; /* initialize output to standard output */ /*UNIX*/
- X
- X /* set SIGNED_CHAR_TYPE flag */
- X scr1[0] = '\372';
- X if((int)scr1[0] < 0) {
- X SIGNED_CHAR_TYPE = 1;
- X }
- X else {
- X SIGNED_CHAR_TYPE = 0;
- X }
- X
- X/* if environment is supported */
- X#if GETENV
- X /* if TRANSPATH variable defined, take its contents */
- X if((scr1pt = getenv(TRANSPATH)) != (char *)NULL) {
- X strcpy(deftablpath, scr1pt);
- X }
- X if((scr1pt = getenv(DEFNAME)) != (char *)NULL) {
- X strcpy(deftablfile, scr1pt);
- X }
- X#endif
- X
- X tabl_file = deftablfile; /* default table file name */
- X
- X flg = 0; /* set to no conv table given as an argument */
- X i = j = k = 0; /* flags, for files specified: i-inp, j-out, k-tabl */
- X while ((opt = getopt(argc, argv, OPTIONS)) != EOF) {
- X switch (opt) {
- X case 'd':
- X debug_flg = 1;
- X break;
- X case 'i':
- X if(i != 0) {
- X fprintf(stderr, "You specified option -i twice\n");
- X return(1);
- X }
- X if((inpf = fopen(optarg, "r")) == NULL) {
- X fprintf(stderr,"Error: Could not find input file: %s\n", optarg);
- X return(1);
- X }
- X i = 1;
- X break;
- X case 'o':
- X if(j != 0) {
- X fprintf(stderr, "You specified option -o twice\n");
- X return(1);
- X }
- X if((outf = fopen(optarg, "r")) != NULL) {
- X fprintf(stderr,
- X "Error: Output file: %s already exists! Delete it first.\n", optarg);
- X exit(3);
- X }
- X if((outf = fopen(optarg, "w")) == NULL) {
- X fprintf(stderr,"Error: Could not open output file: %s\n", optarg);
- X exit(2);
- X }
- X j = 1;
- X break;
- X
- X case 't':
- X if(k != 0) {
- X fprintf(stderr, "You specified option -t twice\n");
- X return(1);
- X }
- X tabl_file = optarg;
- X flg = 1;
- X k = 1;
- X break;
- X case '?':
- X fprintf(stderr,"Error: %s\n", usage);
- X exit(3);
- X } /* end switch */
- X } /* end while */
- X
- X if(optind < argc) { /* check if translation table given w/o option -t */
- X if(flg == 1) {
- X fprintf (stderr,"Error: You specified conversion table file twice\n");
- X exit(4);
- X }
- X tabl_file = argv[optind];
- X if(argc > optind + 1) {
- X fprintf (stderr,"Error: %s\n", usage);
- X exit(5);
- X }
- X }
- X
- X if((tabl = fopen(tabl_file, "r")) == NULL) { /* try to open file with table */
- X strcpy(table_name, deftablpath); /* copy path to scratch string */
- X strcat(table_name, tabl_file);
- X if((tabl = fopen(table_name, "r")) == NULL) { /* try to open path/file */
- X fprintf(stderr,"Could not find the conversion table file: %s\n",
- X tabl_file);
- X exit(6);
- X }
- X }
- X
- X /* read in file version number */
- X if((getnblkline(tabl, 1) < 0) || ((file_version = getnumber(tabl)) < 0)) {
- X tablerr(7, "Could not read file format number");
- X }
- X if(file_version != 1) {
- X tablerr(10, "This format of conversion file is not supported");
- X }
- X
- X /* read in delimiters */
- X
- X rdelim(&strstart, &strend);
- X rdelim(&liststart, &listend);
- X rdelim(®exstart, ®exend);
- X
- X /* read in starting and ending sequences */
- X begseq = beseq();
- X endseq = beseq();
- X
- X /* reserve memory for sequences */
- X chars_left = 5*MAXPAIRS; /* size of allocated block */
- X if((scr1ptr = (char*)malloc(chars_left*sizeof(char)))
- X == NULL) {
- X tablerr(10, "Out of memory for storing sequences");
- X }
- X
- X /* Allocate space for inp_maps and out_sets for input set 0 */
- X
- X allomaps(0);
- X
- X /* Read number of input sets */
- X if((getnblkline(tabl, 1) <= 0) || ((n_inp_sets = getnumber(tabl)) < 0)) {
- X tablerr(10, "Error when reading input set count");
- X }
- X
- X if(n_inp_sets >= MAXSETS) {
- X tablerr(10, "Too many input shift sequences");
- X }
- X
- X /* read input SI/SO sequences */
- X for (i = 0; i < n_inp_sets; i++) {
- X /* Allocate space for inp_maps and out_sets for input set i+1 */
- X allomaps(i+1);
- X
- X /* read in input SHIFTs seq */
- X if(getnblkline(tabl, 1) <= 0) {
- X tablerr(10, "Error when reading output shift sequences");
- X }
- X rdinshift(&inp_SO_data[i], 1);
- X rdinshift(&inp_SO_subs[i], 2);
- X if((inp_SO_subs[i].typ == 2) && (inp_SO_data[i].typ == 0)) {
- X tablerr(10,
- X "Plain string type for matching and substitution expression for output");
- X }
- X rdinshift(&inp_nest_open[i], 1);
- X rdinshift(&inp_nest_close[i], 1);
- X rdinshift(&inp_SI_data[i], 1);
- X rdinshift(&inp_SI_subs[i], 2);
- X if((inp_SI_subs[i].typ == 2) && (inp_SI_data[i].typ == 0)) {
- X tablerr(10,
- X "Plain string type for matching and substitution expression for output");
- X }
- X
- X if(debug_flg == 1) {
- X if(inp_SO_data[i].typ == 0) {
- X fprintf(stderr,"%2d) inp_SO =|%s| ", i, (inp_SO_data[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"%2d) inp_SO =%d ", i, inp_SO_data[i].typ);
- X }
- X if((inp_SO_subs[i].typ == 0) || (inp_SO_subs[i].typ == 2)) {
- X fprintf(stderr,"%2d) inp_SOsub =|%s| ", i, (inp_SO_subs[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"%2d) inp_SOsub =%d ", i, inp_SO_subs[i].typ);
- X }
- X if(inp_nest_open[i].typ == 0) {
- X fprintf(stderr,"nest_open =|%s| ", (inp_nest_open[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"nest_open =%d ", inp_nest_open[i].typ);
- X }
- X if(inp_nest_close[i].typ == 0) {
- X fprintf(stderr,"nest_close =|%s| ", (inp_nest_close[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"nest_close =%d ", inp_nest_close[i].typ);
- X }
- X if(inp_SI_data[i].typ == 0) {
- X fprintf(stderr,"inp_SI =|%s|\n", (inp_SI_data[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"inp_SI =%d\n", inp_SI_data[i].typ);
- X }
- X if((inp_SI_subs[i].typ == 0) || (inp_SI_subs[i].typ == 2)) {
- X fprintf(stderr,"%2d) inp_SIsub =|%s| ", i, (inp_SI_subs[i].ad).seq);
- X }
- X else {
- X fprintf(stderr,"%2d) inp_SIsub =%d ", i, inp_SI_subs[i].typ);
- X }
- X } /* end debug_flg */
- X }
- X
- X if((getnblkline(tabl, 1) <= 0) || ((n_out_sets = getnumber(tabl)) < 0)) {
- X /* read in out SHIFTs count */
- X tablerr(10, "Error when reading output set count");
- X }
- X if(n_out_sets > MAXSETS) {
- X tablerr(10, "Too many output SHIFT sequences requested");
- X }
- X
- X for (i = 0; i < n_out_sets; i++) {
- X /* read in out SHIFTs seq */
- X if((getnblkline(tabl, 1) <= 0) ||
- X ((scr1pt = getstring(strstart, strend, tabl)) == (char*)NULL) ||
- X ((scr2pt = getstring(strstart, strend, tabl)) == (char*)NULL)) {
- X tablerr(10, "Error when reading output shift sequences");
- X }
- X convstr(scr1pt, scr1a);
- X out_SO_len[i]= strlen(scr1a);
- X out_SO[i] = savestring(scr1a);
- X
- X convstr(scr2pt, scr2a);
- X out_SI_len[i] = strlen(scr2a);
- X out_SI[i] = savestring(scr2a);
- X if(debug_flg == 1) {
- X fprintf(stderr,"%2d) out_SO string=|%s| out_SI string=|%s|\n",
- X i, out_SO[i], out_SI[i]);
- X }
- X } /* end for */
- X
- X i = 0;
- X while (getnblkline(tabl, 1) > 0) {
- X if((inp_data[i].set = getnumber(tabl)) < 0) { /* get inp set number */
- X tablerr(10, "Set number for input sequences is wrong");
- X }
- X if(((k = inp_data[i].set) > n_inp_sets) || (k < 0) ) {
- X tablerr(10,"Input set number for a sequence wrong");
- X }
- X curst1 = chknblk(tabl); /* check what type of string follows */
- X if(curst1 == strstart) {
- X mode1 = 1;
- X curend1 = strend;
- X }
- X else if(curst1 == liststart) {
- X mode1 = 2;
- X curend1 = listend;
- X }
- X else if(curst1 == regexstart) {
- X mode1 = 3;
- X curend1 = regexend;
- X }
- X else {
- X tablerr(10, "Delimiter wrong when reading input sequences");
- X }
- X /* get input sequence */
- X if((scr1pt = getstring(curst1, curend1, tabl)) == (char*)NULL) {
- X tablerr(10, "Error reading input sequence");
- X }
- X scr1pt = savestring(scr1pt); /* Save the string */
- X
- X if((out_data[i].set = getnumber(tabl)) < -3) { /* get inp set number */
- X tablerr(10, "Wrong code for the output set number");
- X }
- X
- X if(out_data[i].set > n_out_sets) {
- X tablerr(10, "Output set number for a sequence is wrong");
- X }
- X
- X curst2 = chknblk(tabl); /* check what type of string follows */
- X if(curst2 == strstart) {
- X mode2 = 1;
- X curend2 = strend;
- X }
- X else if(curst2 == liststart) {
- X mode2 = 2;
- X curend2 = listend;
- X }
- X else if(curst2 == regexstart) {
- X mode2 = 3;
- X curend2 = regexend;
- X }
- X else {
- X tablerr(10, "Delimiter wrong when reading sequences");
- X }
- X
- X if((scr2pt = getstring(curst2, curend2, tabl)) == (char*)NULL) {
- X tablerr(10, "Error reading input sequence");
- X }
- X scr2pt = savestring(scr2pt);
- X
- X /* check if acceptable types for sequences */
- X if((mode2 == 3) && (mode1 != 3)) { /* no regular expressions for output */
- X tablerr(10,
- X "Regular expression as output sequence and input not a regular expression");
- X }
- X else if((mode1 == 1) && (mode2 == 2)) { /* inp string, out list */
- X tablerr(10, "You specified list for output and string for input");
- X }
- X else if((mode1 == 3) && (mode2 == 2)) { /* inp regex, out list */
- X tablerr(10, "You specified string for input and list for output");
- X }
- X else if((mode1 == 2) && (out_data[i].set < 0)) {
- X tablerr(10,
- X "Input LIST and output set code -1/-2/-3 is not supported at this moment");
- X }
- X
- X if(mode1 == 2) { /* if list for input expression */
- X /* split string at - sign */
- X splitlist(scr1pt, scr1);
- X if(mode2 == 2) {
- X splitlist(scr2pt, scr2);
- X if(strlen(scr1) != strlen(scr2)) {
- X tablerr(10,
- X "The number of codes in the input and output list is different");
- X }
- X }
- X } /* end mode 2 */
- X else { /* for all other modes, convert the codes */
- X convstr(scr1pt, scr1a); /* convert codes in input string */
- X }
- X
- X if((mode1 == 1) && (strlen(scr1a) == 1)) { /* single inp char */
- X /* it is like list with a single character, so cheat */
- X if(out_data[i].set >= 0) {
- X mode1 = 2;
- X strcpy(scr1, scr1a); /* make it a list */
- X }
- X else {
- X tablerr(10,
- X "One-character input strings and output codes -1/-2/-3 are not supported\n");
- X }
- X }
- X
- X if(mode1 == 2) { /* fill the lists for mode 2 */
- X if(mode2 == 1) { /* if normal string as output sequence */
- X convstr(scr2pt, scr2);
- X scr2pt = savestring(scr2);
- X }
- X else { /* if mode2 = 2 */
- X scr2pt = scr2;
- X }
- X /* now fill the maps */
- X k = inp_data[i].set;
- X l = out_data[i].set;
- X scr1pt = scr1; /* points at input list */
- X scr2ptr = scr2pt; /* points at output list or string */
- X while (*scr1pt != '\0') {
- X if(mode2 == 2) { /* prepare string with code */
- X scr1a[0] = *scr2ptr++;
- X scr1a[1] = '\0';
- X scr2pt = savestring(scr1a);
- X }
- X ch = intcode(*scr1pt);
- X if((*inp_maps[k])[ch] != (char *)NULL) {
- X fprintf(stderr,
- X"You have entered the character |%c| with code \\0d%d for input set %d\n",
- X charcode(ch), ch, k);
- X tablerr(10, "Delete previous references if not needed");
- X }
- X
- X (*inp_maps[k])[ch] = scr2pt; /* save output sequence */
- X (*out_sets[k])[ch] = l; /* save output set number */
- X scr1pt++; /* next code for output */
- X }
- X i--; /* do not save this line in inp_str and out */
- X } /* end if mode1 = 2*/
- X else if(mode1 == 1) { /* if multicharacter input string */
- X scr1pt = savestring(scr1a);
- X convstr(scr2pt, scr2a);
- X scr2pt = savestring(scr2a);
- X inp_data[i].typ = 0;
- X inp_data[i].len = strlen(scr1pt);
- X (inp_data[i].ad).seq = scr1pt;
- X out_data[i].typ = 0;
- X out_data[i].len = strlen(scr2pt);
- X (out_data[i].ad).seq = scr2pt;
- X }
- X else if(mode1 == 3) { /* if regular expression for input */
- X inp_data[i].typ = 1;
- X l = strlen(scr1a); /* length of converted input expression */
- X if(scr1a[0] == '^') {
- X tablerr(10,
- X "The ^ (beginning anchor) is not supported");
- X }
- X if((scr1a[l-1] == '$') && (scr1a[l-1] != '\\')) {
- X tablerr(10, "The $ (end anchor) is not supported");
- X }
- X
- X strcpy(regerrstr, "Error in input regular expression sequence: ");
- X
- X if((regauxptr = reg_comp(scr1a)) == NULL) {
- X tablerr(10, "Error in the input regular expression sequence");
- X }
- X (inp_data[i].ad).re = regauxptr;
- X convstr(scr2pt, scr1a); /* convert codes in out string */
- X scr2pt = savestring(scr1a);
- X if(mode2 == 3) { /* mark type of expression plan(0)/substit string(2) */
- X out_data[i].typ = 2;
- X }
- X else {
- X out_data[i].typ = 0;
- X }
- X out_data[i].len = strlen(scr2pt);
- X (out_data[i].ad).seq = scr2pt;
- X }
- X
- X /* advance pointers */
- X
- X n_conv_seq = ++i;
- X if(n_conv_seq >= (MAXPAIRS-1)) {
- X tablerr(10,
- X "Too many transliteration sequences. Recompile program with larger MAXPAIRS");
- X }
- X } /* end while getnblkline */
- X
- X if(debug_flg == 1) {
- X fprintf(stderr,"Multicharacter input sequences \n");
- X for(i=0; i < n_conv_seq; i++) {
- X fprintf(stderr,"%2d) inp.type=%2d inp.set=%2d out.type=%2d out.set=%2d\n",
- X i, inp_data[i].typ, inp_data[i].set, out_data[i].typ, out_data[i].set);
- X if(inp_data[i].typ == 0) {
- X fprintf(stderr," Inp.str=|%s| ", (inp_data[i].ad).seq);
- X }
- X fprintf(stderr, "Out.str=|%s|\n", (out_data[i].ad).seq);
- X }
- X fprintf(stderr,
- X "input_set charcode input_character --> output_set output_string/\n");
- X for(i = 0; i <= n_inp_sets; i++) {
- X for(k = 0; k < 256; k++) {
- X if((*inp_maps[i])[k] != (char *)NULL) {
- X fprintf(stderr," %2d \\%04o %c --> %2d %s\n",
- X i, k, charcode(k), (*out_sets[i])[k], (*inp_maps[i])[k]);
- X }
- X }
- X }
- X }
- X
- X
- X fprintf(outf,"%s",begseq); /* output starting sequence */
- X
- X /* transliterate input file to output file */
- X
- X level = 0;
- X if(n_inp_sets > 0) {
- X cur_inp_set = 1;
- X }
- X else {
- X cur_inp_set = 0;
- X }
- X inp_cur_set[level] = cur_inp_set; /* 1st input set is a default */
- X inp_cur_nest[level] = 0;
- X scr1ptr = scr1a;
- X scr1ptr[0] = '\0';
- X scr2ptr = scr2a;
- X scr2ptr[0] = '\0';
- X scrcurptr = scr2ptr;
- X chars_left = 0;
- X buffer_size = MAXBUFF/2; /* will be set to 0 if EOF */
- X out_cur_set = 1; /* no output set yet */
- X
- X
- X while ( buffer_size > 0) {
- X /* swap input buffer pointers */
- X scrauxptr = scr2ptr;
- X scr2ptr = scr1ptr;
- X scr1ptr = scrauxptr;
- X
- X scrauxptr = scrcurptr; /* old buffer last pointer */
- X scrcurptr = scr1ptr; /* new buffer start */
- X
- X /* copy remains of old buffer to new one */
- X strcpy(scrcurptr, scrauxptr);
- X
- X l = chars_left;
- X for(i = 0; i < buffer_size; i++) { /* append input chars to scr1a */
- X if((k = fgetc(inpf)) == EOF) {
- X buffer_size = 0; /* end of file */
- X break;
- X }
- X else if(k == '\0') { /* skip zero characters */
- X i--;
- X continue;
- X }
- X else { /* if normal character */
- X *(scrcurptr + l++) = k;
- X }
- X }
- X *(scrcurptr + l) = '\0'; /* terminate buffer with 0 */
- X chars_left = l; ; /* length of combined string */
- X if(buffer_size == 0) { /* if EOF */
- X chars_left += MAXMATCH+1; /* fool the program that there is more */
- X }
- X
- X while (chars_left > MAXMATCH) {
- X /* check if end of scrcurptr --- it means end of input file, since only
- X then it can get to the end of the string, otherwise it stops
- X MAXMATCH before */
- X if(*scrcurptr == '\0') { /* end of file */
- X if(n_out_sets > 0) { /* if multiple output sets */
- X l = out_SI_len[out_cur_set - 1];
- X for(i = 0; i < l; i++) {
- X k = out_SI[out_cur_set-1][i];
- X fputc(k, outf);
- X }
- X break;
- X }
- X }
- X
- XBackstep2:
- X /* check if new set of input chars started */
- X l = -1;
- X for (i = 0; i < n_inp_sets; i++) {
- X if(match_subs(&inp_SO_data[i], &inp_SO_subs[i]) > 0) {
- X l = i;
- X break;
- X }
- X }
- X if(l >= 0) { /* is SO matched */
- X repl_inp(); /* substitute SO_data with SO_seqs */
- X if((inp_SI_data[l].len > 0) || (inp_SI_data[l].typ == 1)){
- X /* increase level only is SHIFT IN present */
- X level++; /* increase number of "opened" input sets */
- X inp_cur_nest[level] = 0; /* It is new level,zero nesting sequences */
- X if(level > MAXLEVEL) {
- X fprintf(stderr,
- X "Too many nested input character sets in input file\n");
- X exit(39);
- X }
- X }
- X l++; /* sets in arrays are saved starting from 0,
- X i.e., set nr 1 corresponds to element [0], 2 --> [1], etc. */
- X inp_cur_set[level] = l; /* save set number at current nesting level */
- X cur_inp_set = l;
- X continue;
- X }
- X /* check if SHIFT IN sequence for current input set */
- X if(n_inp_sets > 0) {
- X /* check SI sequence only when nesting count is 0 */
- X if(inp_cur_nest[level] == 0) {
- X if(match_subs(&inp_SI_data[cur_inp_set-1],
- X &inp_SO_subs[cur_inp_set-1]) > 0) { /* is SI */
- X repl_inp();
- X level--;
- X if(level < 0) {
- X level = 0;
- X fprintf(stderr,
- X"More SHIFT_IN sequences than corresponding SHIFT_OUT sequences in text\n");
- X }
- X cur_inp_set = inp_cur_set[level]; /* set previous inp set number */
- X continue;
- X }
- X }
- X }
- X
- X /* Now check if the input sequence corresponding to cur_inp_set
- X matches the string */
- X
- XBackstep1: /* if output set number is -1, start again */
- X flg = -1;
- X for(i = 0; i < n_conv_seq; i++) {
- X k = inp_data[i].set; /*get set number for current transliteration seq */
- X if((k == cur_inp_set) || (k == 0)) { /* if equal to current or 0 */
- X if(match_subs(&inp_data[i], &out_data[i]) > 0) {
- X if(out_set_number < 0) { /* if backsteping */
- X repl_inp(); /* replace */
- X if (out_set_number == -1) {
- X flg = -1;
- X }
- X else if (out_set_number == -2) {
- X goto Backstep1;
- X }
- X else if (out_set_number == -3) {
- X goto Backstep2;
- X }
- X }
- X else { /* if set number >= 0 */
- X flg = i;
- X break;
- X }
- X }
- X }
- X }
- X
- X if(flg < 0) { /* if no matching input multichar sequence found */
- X ch = intcode(*scrcurptr); /* current input character */
- X if((out_seq_ptr = (*inp_maps[cur_inp_set])[ch]) != NULL) {
- X /* if out_seq exists for current input set */
- X out_set_number = (*out_sets[cur_inp_set])[ch];
- X flg = 1;
- X }
- X else if((out_seq_ptr = (*inp_maps[0])[ch]) != NULL) {
- X /* if out_seq exitst for set number 0 */
- X flg = 1;
- X out_set_number = (*out_sets[0])[ch];
- X }
- X if(flg >= 0) { /* set other things */
- X out_seq_length = strlen(out_seq_ptr);
- X inp_seq_length = 1;
- X }
- X }
- X
- X if(flg < 0) { /* if no match found, copy the input char to output */
- X scr1[0] = *scrcurptr;
- X scr1[1] = '\0';
- X if(*scrcurptr != '\0') {
- X out_seq_length = 1;
- X }
- X else {
- X out_seq_length = 0;
- X }
- X
- X inp_seq_length = 1;
- X out_set_number = 0;
- X out_seq_ptr = scr1;
- X }
- X
- X /* At this point all matches and substitutuions have been done */
- X
- X /* check if nesting sequences found for a given set and increase or
- X decrease nesting if needed */
- X if((n_inp_sets > 0) && (out_cur_set > 0)) {
- X for(i = 0; i < inp_seq_length; i++) {
- X if(chkseqs(1, &inp_nest_close[cur_inp_set-1], scrcurptr+i) >= 0) {
- X inp_cur_nest[level]--;
- X }
- X if(chkseqs(1, &inp_nest_open[cur_inp_set-1], scrcurptr+i) >= 0) {
- X inp_cur_nest[level]++;
- X }
- X }
- X }
- X
- X /* output the SI/SO sequences if output set changed */
- X if((n_out_sets > 0) &&
- X (out_set_number > 0)) { /* check if multiple output sets */
- X if(out_cur_set != out_set_number) { /* if new set starts */
- X if(out_cur_set > 0) { /* put SHIFT IN for a previous set */
- X l = out_SI_len[out_cur_set-1]; /* old SHIFT IN seq length */
- X for(i = 0; i < l; i++) { /* output old SHIFT IN */
- X k = out_SI[out_cur_set-1][i];
- X fputc(k, outf);
- X }
- X }
- X out_cur_set = out_set_number; /* make it current now */
- X if(out_cur_set > 0) {
- X l = out_SO_len[out_cur_set-1]; /* length of SHIFT OUT sequence */
- X for(i = 0; i < l; i++) { /* output SHIFT OUT seq for this set */
- X k = out_SO[out_cur_set-1][i];
- X fputc(k, outf);
- X }
- X }
- X } /* end out_set changes */
- X } /* if multiple output sets specified */
- X
- X /* now output the corresponding sequence */
- X for(i = 0; i < out_seq_length; i++) {
- X k = *(out_seq_ptr+i);
- X fputc(k,outf);
- X }
- X
- X /* move past processed input text */
- X scrcurptr += inp_seq_length;
- X chars_left -= inp_seq_length;
- X
- X
- X } /* while scanning input characters */
- X
- X } /* end while reading input file */
- X fprintf(outf,"%s",endseq); /* output ending sequence */
- X fclose(inpf);
- X fclose(outf);
- X exit(0);
- X}
- X
- END_OF_FILE
- if test 54601 -ne `wc -c <'translit.c'`; then
- echo shar: \"'translit.c'\" unpacked with wrong size!
- fi
- # end of 'translit.c'
- fi
- echo shar: End of archive 3 \(of 10\).
- cp /dev/null ark3isdone
- MISSING=""
- for I in 1 2 3 4 5 6 7 8 9 10 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 10 archives.
- rm -f ark[1-9]isdone ark[1-9][0-9]isdone
- else
- echo You still must unpack the following archives:
- echo " " ${MISSING}
- fi
- exit 0
- exit 0 # Just in case...
-