Source Code 1994 March

home *** CD-ROM | disk | FTP | other *** search

/ Source Code 1994 March / Source_Code_CD-ROM_Walnut_Creek_March_1994.iso / compsrcs / misc / volume36 / translit / part08 < prev next >

Wrap

Text File | 1993-03-21 | 62.7 KB | 1,913 lines

Newsgroups: comp.sources.misc From: jkl@osc.edu (Jan Labanowski) Subject: REPOST: v36i030: translit - transliterate foreign alphabets, Part08/10 Message-ID: <1993Mar23.031051.21304@sparky.imd.sterling.com> X-Md4-Signature: 8dbd56262e22e63de4c0c140853fb7e5 Date: Tue, 23 Mar 1993 03:10:51 GMT Approved: kent@sparky.imd.sterling.com Submitted-by: jkl@osc.edu (Jan Labanowski) Posting-number: Volume 36, Issue 30 Archive-name: translit/part08 Environment: UNIX, MS-DOS, VMS [ This is being reposted due to a file containing control characters ] [ didn't make it through news systems... -Kent+ ] #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # Contents: example.tex.UU koi8-tex.rus readme.doc reg_exp.c # Wrapped by kent@sparky on Fri Mar 19 16:00:14 1993 PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive 8 (of 10)."' if test -f 'example.tex.UU' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'example.tex.UU'\" else echo shar: Extracting \"'example.tex.UU'\" $13202 characters$ sed "s/^X//" >'example.tex.UU' <<'END_OF_FILE' begin 664 example.tex M7&1O8W5M96YT<W1Y;&5[87)T:6-L97T*7&EN<'5T(&-Y<F%C8RYD968*7&9O M;G1<=&5N8WER/7=N8WER,3 *7&1E9EQC>7)[7'1E;F-Y<EQC>7)A8V-]"EQB M96=I;GMD;V-U;65N='T*('M<8WER($$@0B!6($<@1"!%(%PB12!::"!:($D@ M>UQU($E]($L@3"!-($X@3R!0(%(@4R!4(%4@1B!+:"!#($-H(%-H(%-H8V@@ M>UQ#9'!R:6UE?2!9('M<0W!R:6UE?2!<8$4@674@66$@4UQ<#0IA(&(@=B!G M(&0@92!<(F4@>F@@>B!I('M<=2!I?2!K(&P@;2!N(&\@<"!R(',@="!U(&8@ M:V@@8R!C:"!S:"!S:&-H('M<8V1P<FEM97T@>2![7&-P<FEM97T@7&!E('EU M('EA(%1<7 T*>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!] M>UP@7"!]>UP@7"!]("AI>B!G87IE='D@4TUO<VMO=G-K:64@;F]V;W-T:50L M(#8N,3(N.3(I7%P-"GM<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M< M(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@ M?7M<(%P@?7M<(%P@?2!665I/5GM<(%P@?2 H8VAA<W1[7&-P<FEM97T@<&5R M=F%Y82E<7 T*>UP@7"!]>UP@7"!]5B!K86MO>UQU(&E]('-U971E(&UY('-E M9V]D;GEA('IH:79<(F5M+GM<(%P@?41A>FAE('!O;&ET:6MA+"!I('1A(&1A M=FYO('5Z:&5<7 T*<&5R97-T86QA(&YA<R!Z86YI;6%T>UQC<')I;65]+GM< M(%P@?4UY(&]B<F%S:&-H865M(&YA(&YE7")E('9N:6UA;FEE('1O;'M<8W!R M:6UE?6MO('1O9V1A+"!K;V=D85Q<#0IN879E<FMH=2!R87IG;W)A971[7&-Y M9&]T?7-Y82!O8VAE<F5D;F%Y82!S=F%R82Y[7"!<('U.92!V;VQN=7EU="!N M87,@:2!M:7)O=GEE('!R;V)L96UY+EQ<#0I#:'1O('IH+"!Z87IE;6Q<(F5N M;F]S='M<8W!R:6UE?2!N87-H96=O(&)Y=&EY82!V<&]L;F4@<&]N>6%T;F$N M>UP@7"!]02!M97IH9'4@=&5M('!R;VES:VAO9'EA<VAC:&5E7%P-"G9O:W)U M9R!D97M<=2!I?7-T=FET96Q[7&-P<FEM97UN;R!P<FEO8G)E=&%E="!G;&]B M86Q[7&-P<FEM97UN>64@:7IM97)E;FEY82Y<7 T*>UP@7"!]>UP@7"!]>UP@ M7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]+BXN+BXN+BXN+BXN M+BXN+BXN+BXN+BXN+BXN+BXN+BXN+EQ<#0I[7"!<('U[7"!<('U[7"!<('U[ M7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('T@4U-3 M4B!53452+GM<(%P@?4M43R!03T)%1$E,/UQ<#0I[7"!<('U[7"!<('TN+BY) M=&%K+"!T;VQ[7&-P<FEM97UK;R!T97!E<GM<8W!R:6UE?2!N86-H:6YA>75T M('9Y<FES;W9Y=F%T>UQC<')I;65]<WEA(&MO;G1U<GD@=&5K:"!G<F%N9&EO M>FYY:VA<7 T*<&]S;&5D<W1V:7M<=2!I?2P@:V]T;W)Y92!S=GEA>F%N>2!S M(&MR86MH;VT@:V]M;75N:7IM82!V(%-O=F5T>UQC>61O='US:V]M(%-O>75Z M92!I(&5G;UQ<#0IR87-P861O;2Y[7"!<('U%<VAC:%PB92!V8VAE<F$@>F%R M=6)E>FAN>64@86YA;&ET:6MI(',@;VQI;7!I>UQU(&E]<VMI;2!B97-P<FES M=')A<W1I96U<7 T*=GII<F%L:2!N82!B97IR87-S=61O8VAN>64@:7IG:6)Y M(&YA<VAE9V\@=GER>79N;V=O(&D@;F5P;VYY871N;V=O(&1V:7IH96YI>6$N M7%P-"E-E9V]D;GEA('5Z:&4@;6YO9VEM('-T86YO=FET>UQC>61O='US>6$@ M>6%S;F\L(&-H=&\@<W5M8G5R;GEE(&MO;G9U;'M<8W!R:6UE?7-I:5Q<#0IP M;W-T:V]M;75N:7-T:6-H97-K;V=O(&UI<F$@<V%M>6T@;F5P;W-R961S='9E M;FYY;2!O8G)A>F]M('IA=')A9VEV87EU=%Q<#0ID86Q[7&-P<FEM97UN97M< M=2!I?7-H=7EU('-U9'M<8W!R:6UE?6)U('9S96=O(&UE>FAD=6YA<F]D;F]G M;R!S;V]B<VAC:&5S='9A+GM<(%P@?41A+"!K;VUM=6YI>FT@<G5K:&YU;"Q< M7 T*4U-34B!I<V-H97H@<R!P;VQI=&EC:&5S:V][7'4@:7T@:V%R='DL(&YO M('1E;2!S86UY;2!B>6QA('IA=F5R<VAE;F$@8V5L87EA(&=L879A7%P-"G8@ M:7-T;W)I:2!V<V5[7'4@:7T@8VEV:6QI>F%C:6DN7%P-"GM<(%P@?7M<(%P@ M?4YE('1O;'M<8W!R:6UE?6MO(&YA<VQE9&YI:VD@:V]M;75N:7IM82P@;F\@ M:2!O<W1A;'M<8W!R:6UE?6YO>UQU(&E](&UI<B!O:V%Z86QI<WM<8W!R:6UE M?2!V9')U9UQ<#0IP97)E9"!C:&ES='EM(&QI<W1O;2Y[7"!<('U0;R!M97)E M(&]S;WIN86YI>6$@7&!E=&]G;R!F86MT82!V('IA<G5B97IH;GEK:%Q<#0IP M;VQI=&EC:&5S:VEK:"!K<G5G86MH('!E<G9A>6$@7&!E>UQU(&E]9F]R:7EA M('!O8F5D>2!N860@4VEM<&5R:65[7'4@:7T@>FQA5"!I(&YE:V]E9V]<7 T* M<V%M;W5D;W9L971V;W)E;FEY82!S;65N>6%E='M<8WED;W1]<WEA('9S7")E M(&)O;'M<8W!R:6UE?7-H97M<=2!I?2!O>F%B;V-H96YN;W-T>UQC<')I;65] M>74L(')A<W1E<GEA;FYO<W1[7&-P<FEM97UY=2Q<7 T*82!K;V4M9V1E(&D@ M<&%N:6MO>UQU(&E]+GM<(%P@?5)E8VA[7&-P<FEM97T@:61<(F5T(&YE('1O M;'M<8W!R:6UE?6MO(&\@=')E=F]G92P@=GEZ=F%N;F][7'4@:7U<7 T*;F5P M<F5D<VMA>G5E;6]S='M<8W!R:6UE?7EU('!R;V-E<W-O=B!N82!T97)R:71O M<FEI('9C:&5R87-H;F5G;R!34U-2+GM<(%P@?4MS=&%T:2P@<&]K85Q<#0IO M;FD@;F4@<')I;GEA;&D@<W1O;'M<8W!R:6UE?2!A<&]K86QI<'-I8VAE<VMO M>UQU(&E](&9O<FUY+"!K86L@;WIH:61A;&D@;6YO9VEE+"!I7%P-"G!R;W1E M:V%Y=70@=B!O=&QI8VAI92!O="!9=6=O<VQA=FEI(&)O;&5E(&-I=FEL:7IO M=F%N;F\N7%P-"GM<(%P@?7M<(%P@?59D<G5G(',@;V-H979I9&YO<W1[7&-P M<FEM97UY=2!O=&MR>6QO<WM<8W!R:6UE?2!D<G5G;V4@+2!C:'1O+"!N97-M M;W1R>6$@;F$@=FYE<VAN95Q<#0IA8G-O;'EU=&YU>74@<')O=&EV;W!O;&]Z M:&YO<W1[7&-P<FEM97T@>F%P861N;WM<=2!I?2!I(&MO;6UU;FES=&EC:&5S M:V][7'4@:7T@<VES=&5M+"!O;FE<7 T*=GIA:6UO<W9Y87IA;GDN>UP@7"!] M365K:&%N:7IM(')A>G9I=&EY82!T;WM<=2!I?2!I(&1R=6=O>UQU(&E]+"!K M86L@=&5P97)[7&-P<FEM97T@;V)N87)U>FAI=F%E='M<8WED;W1]<WEA+%Q< M#0IB>6P@>F%P<F]G<F%M;6ER;W9A;B!N82!N86QI8VAI92!S=F]E9V\@86YT M86=O;FES=&$N7%P-"GM<(%P@?7M<(%P@?45S:&-H7")E('!R961S=&]I="!R M87IO8G)A='M<8W!R:6UE?7-Y82P@=B!K86MO>UQU(&E]('-T97!E;FD@=&4@ M:6QI(&EN>64@=&5N9&5N8VEI7%P-"F]B<VAC:&5S='9E;FYO>UQU(&E]('IH M:7IN:2!:87!A9&$@>6%V:6QI<WM<8W!R:6UE?2!R97IU;'M<8W!R:6UE?71A M=&]M(&5G;R!V;G5T<F5N;GE[7'4@:7T@7&!E=F]L>75C:6DL(&%<7 T*=B!K M86MO>UQU(&E](&)Y;&D@;V)U<VQO=FQE;GD@<W5S:&-H97-T=F]V86YI96T@ M:V]M;75N:7-T:6-H97-K;V=O(&]B<VAC:&5S='9A+"!I7%P-"FYA;V)O<F]T M+GM<(%P@?4YO('IA<&%D;GE[7'4@:7T@;6ER+"!T86L@9&]L9V\@:2!A:W1I M=FYO(&1O8FEV879S:&E[7'4@:7US>6$@:V]N8V%<7 T*:V]M;75N:7IM82P@ M;VMA>F%L<WEA(&YE('!O9&=O=&]V;&5N;GEM(&L@>FAI>FYI('!O<VQE(&5G M;R!P861E;FEY82Y[7"!<('U/;F]<7 T*;F%R=7-H:6QO(&=L;V)A;'M<8W!R M:6UE?6YU>74@<VES=&5M=2!B97IO<&%S;F]S=&D@:2!O8G-H8VAE>FAI=&EY M82P@:V]T;W)A>6$@<VMR=7!U;%PB97IN;UQ<#0IS;WID879A;&%S>UQC<')I M;65]('!O<VQE('9T;W)O>UQU(&E](&UI<F]V;WM<=2!I?2!V;WM<=2!I?6YY M+"!U<W!E;&$@;V)R87-T:2!S=F]E>UQU(&E](&)Y=7)O:W)A=&EE>UQU(&E] M(&E<7 T*<&]L=6-H:71[7&-P<FEM97T@9&%Z:&4@<V]B<W1V96YN=7EU(&QO M9VEK=2!R87IV:71I>6$N>UP@7"!]02!T=70@=B!O9&YO(&UG;F]V96YI92!< M8&5T;W1<7 T*;6ER;W!O<GEA9&]K(')A>G9A;&EL<WEA+EQ<#0I[7"!<('U[ M7"!<('U6;65S=&4@<R!N:6T@<&]K86-H;G5L87-[7&-P<FEM97T@=G-Y82!R M87IV971V;%PB96YN87EA('-I<W1E;6$@:6YS=&ET=71O=B!I7%P-"F-E;FYO M<W1E>UQU(&E]+"!N82!K;W1O<GEK:"!D;R!S:6MH('!O<B!D97)Z:&%L;W-[ M7&-P<FEM97T@:VAR=7!K;V4@;6ER;W9O92!R879N;W9E<VEE+EQ<#0I3=&%L M;R!Y87-N;RP@8VAT;R!N92!T;VQ[7&-P<FEM97UK;R!B>79S:&EM(&MO;6UU M;FES=&EC:&5S:VEM(&=O<W5D87)S='9A;2P@;F\@:5Q<#0IV<V5M=2!M:7)O M=F]M=2!S;V]B<VAC:&5S='9U('!R961S=&]I="!I<VMA='M<8W!R:6UE?2!N M;W9Y92!F;W)M>2!S=7-H8VAE<W1V;W9A;FEY82Y<7 T*4')I9%PB971[7&-Y M9&]T?7-Y82!Z86YO=F\@;W-M>7-L:79A='M<8W!R:6UE?2!M;F]G:64@=F]P M<F]S>2P@:V%Z879S:&EE<WEA(')A>B!I(&YA=G-E9V1A7%P-"G)E<VA<(F5N M;GEM:2Y[7"!<('U3:V%Z:&5M+"!S=&%B:6Q[7&-P<FEM97UN;W-T>UQC<')I M;65](&=R86YI8R!I;&D@;V)E<W!E8VAE;FEE(&-E;&]S=&YO<W1I7%P-"F=O M<W5D87)S='8N>UP@7"!]02!P<F%V82!N86-I>UQU(&E](&YA('-A;6]O<')E M9&5L96YI92P@;F]V>7M<=2!I?2!F961E<F%L:7IM(&E<7 T*<V]C:&5T86YI M92!K<FET97)I978@;F%C:6]N86Q[7&-P<FEM97UN;V=O('9O>G)O>FAD96YI M>6$@:2!D96UO:W)A=&EI/WM<(%P@?5-E9V]D;GEA(%Q@971I7%P-"G9O<')O M<WD@<&]D;GEA='D@<&]S=&MO;6UU;FES=&EC:&5S:VEM:2!O8G-H8VAE<W1V M86UI+GM<(%P@?4YO('9O="UV;W0@:R!N:6T@=FYO=GM<8W!R:6UE?5Q<#0IV M;WIV<F%T>6%T>UQC>61O='US>6$@*'5Z:&4@=F]Z=G)A<VAC:&%Y=71[7&-Y M9&]T?7-Y82D@9V]S=61A<G-T=F$@07II:2!I($%F<FEK:2P@9V1E7%P-"G-O M;W1V971[7&-Y9&]T?7-T=G5Y=7-H8VAI92!P<F]C97-S>2!B>6QI('8@<W9O M7")E('9R96UY82!I<VMU<W-T=F5N;F\@>F%M;W)O>FAE;GDN7%P-"GM<(%P@ M?7M<(%P@?5!R:6MH;V1I='M<8WED;W1]<WEA('!R96]D;VQE=F%T>UQC<')I M;65](&D@;F%S:&D@;F5D879N:64@:6QL>75Z:6DN>UP@7"!]4VMO;'M<8W!R M:6UE?6MO(&)Y;&]<7 T*=F]S=&]R9V]V('!O('!O=F]D=2!R87-P861A(&)I M<&]L>6%N;WM<=2!I?2!S:7-T96UY(&UE>FAD=2!N87)O9&YY:V@@;W1N;W-H M96YI>UQU(&E]+%Q<#0IP;VMO:79S:&5[7'4@:7US>6$@;F$@<V]P97)N:6-H M97-T=F4@:2!V>F%I;6YO;2!S9&5R>FAI=F%N:6D@9'9U:V@@>6%D97)N>6MH M7%P-"G-V97)K:&1E<GIH878@+2!34VA!(&D@4U-34BY[7"!<('U$=6UA;&]S M>UQC<')I;65]+"!V;W0@;VYO+"!N87-T=7!L96YI92!D879N;UQ<#0II<VMO M;6]G;R!B97IO8FQA8VAN;V=O(&UI<F]P;W)Y861K82Y[7"!<('U.92!T=70M M=&\@8GEL;RY[7"!<('U59W)O>F$@9VQO8F%L>UQC<')I;65];F][7'4@:7U< M7 T*>6%D97)N;WM<=2!I?2!K;VYF<F]N=&%C:6D@9&5[7'4@:7US='9I=&5L M>UQC<')I;65];F\@<VYI>FEL87-[7&-P<FEM97TN>UP@7"!]3F\@=GIA;65N M(&)Y=G-H96=O7%P-"FMO;6UU;FES=&EC:&5S:V]G;R!L86=E<GEA(&UI<B!P M;VQU8VAI;"!C96QY>UQU(&E](%-B=6ME=%0@<')O8FQE;2 M(&D@<F]Z:&1E M;FEE7%P-"FYO=GEK:"!G;W-U9&%R<W1V+"!I('!E<F5S;6]T<B!G<F%N:6,L M(&D@:V]N9FQI:W1Y('!O('!O=F]D=2!P<F%V7%P-"FYA8VUE;GM<8W!R:6UE M?7-H:6YS='8L(&D@;F%K;VYE8RP@<W)A>G4@9'9E('9O>UQU(&E];GD@+2!N M82!"86QK86YA:V@@:2!+879K87IE+GM<(%P@?51A:V]V;UQ<#0IP<GEA;6]E M('-L961S='9I92!P861E;FEY82!34U-2+"!Z:&5L97IN;WM<=2!I?2!K:'9A M=&MO>UQU(&E]('!O9&%V;'EA=G-H96=O+"!Z86=O;GEA=G-H96=O7%P-"G9O M=FYU=')[7&-P<FEM97T@=G-E('!R;W1I=F]R96-H:7EA('9N=71R:2!S96)Y M82!I('8@<V9E<F4@<W9O96=O('9L:7EA;FEY82P@8VAT;R!T96U<7 T*<V%M M>6T@=7-I;&EV86QO(&UO<VAC:'M<8W!R:6UE?2!I:V@@<&]T96YC:6%L>UQC M<')I;65];F]G;R!V>G)Y=F$@<&]S;&4@=GEK:&]D82!N82!P;W9E<FMH;F]S M='M<8W!R:6UE?2Y<7 T*>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!] M>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]7&!%2VA/(%-/5D54>UQC M>61O='U32T]'3R!205-0041!7%P-"GM<(%P@?7M<(%P@?41A;FYY>UQU(&E] M(&MH;V0@<V]B>71I>UQU(&E](&UO>FAN;R!B>6QO(&QE9VMO('!R;V=N;WII M<F]V871[7&-P<FEM97TN>UP@7"!]3F\@;6%L;R!K=&\@9'5M86PL7%P-"FMA M:V]V;R!B=61E="!V;&EY86YI92!S;W9E='M<8WED;W1]<VMO9V\@:W)U<VAE M;FEY82!N82!V>F%I;6]O=&YO<VAE;FEY82!V;G5T<FD@>F%P861N;V=O7%P- M"G-O;V)S:&-H97-T=F$N>UP@7"!]4V5G;V1N>6$@=7IH92!O8VAE=FED;F\L M(&-H=&\@4V]Y=7H@8GEL(&YE;6%L;W9A>FAN>6T@9F%K=&]R;VU<7 T*<W!L M;V-H96YI>6$@7&!E=&]G;R!S;V]B<VAC:&5S='9A(&D@<F%S<&%D('-O=F5T M>UQC>61O='US:V]G;R!G;W-U9&%R<W1V82!Y879I;'-Y82!T;VQC:&MO;2Q< M7 T*:V]T;W)Y>UQU(&E]('5S:6QI;"!N82!:87!A9&4@8V5N=')O8F5Z:&YY M92!T96YD96YC:6DL('9Y>6%V:6P@;F5S;W9P861E;FEE7%P-"FEN=&5R97-O M=B!I;F1U<W1R:6%L>UQC<')I;65];GEK:"!S=')A;BY[7"!<('U.82!F;VYE M('1E;F1E;F-I>UQU(&E]+"!N86UE=&EV<VAI:VAS>6$@=FYU=')I($53+%Q< M#0ID86Q[7&-P<FEM97UN97M<=2!I?7-H87EA('-U9'M<8W!R:6UE?6)A(&5V M<F]P97M<=2!I?7-K;WM<=2!I?2!I;G1E9W)A8VEI('5Z:&4@;F4@=GEG;'EA M9&ET(&)E>F]B;&%C:&YO>UQU(&E]+EQ<#0I+;VYE8VAN;RP@;V)[7&-D<')I M;65]>6%S;GEA971[7&-Y9&]T?7-Y82!<8&5T;R!V;G5T<F5N;FEM:2!P<F]C M97-S86UI('8@>F%P861N;V5V<F]P97M<=2!I?7-K:6MH('-T<F%N86MH+EQ< M#0I4;W0@9F%K="P@8VAT;R!P<F%K=&EC:&5S:VD@<&]V<WEU9'4@;F%C:&%L M:2!V9')U9R!G;W9O<FET>UQC<')I;65](&YE(&]B(&EN=&5G<F%C:6DL7%P- M"F$@;R!N86-I;VYA;'M<8W!R:6UE?6YY:V@@:6YT97)E<V%K:"P@=')E8F]V M871[7&-P<FEM97T@;F4@<F%S<&%K:&YU='M<8W!R:6UE?2!O:VYO('8@;6ER M+"!A(&]P=7-T:71[7&-P<FEM97U<7 T*<VAT;W)Y+"!V;R!M;F]G;VT@>6%V M;'EA971[7&-Y9&]T?7-Y82!S;&5D<W1V:65M('!O<')A=FQE;FEY82!O8G-H M8VAE<W1V96YN;V=O(&UN96YI>6$L7%P-"F%K=&EV:6EZ86-I:2!N86-I;VYA M;&ES=&EC:&5S:VEK:"!G<G5P<&ER;W9O:RP@<&]V<V5M97-T;F]G;R!U:VAU M9'-H96YI>6%<7 T*7&!E:V]N;VUI8VAE<VMO>UQU(&E]('-I='5A8VEI+EQ< M#0I[7"!<('U[7"!<('U.;RP@=B!S=F]Y=2!O8VAE<F5D>UQC<')I;65]+"!V M<V4@7&!E=&D@<')O8V5S<WD@<')Y86UO('-V>6%Z86YY(',@:7IM96YE;FEE M;5Q<#0IP<FEV>6-H;F]G;R!O:W)U>FAE;FEY82!:87!A9&YO>UQU(&E]($5V M87)O<'DN>UP@7"!]3VYA(&]K87IA;&%S>UQC<')I;65](&QI8V]M(&L@;&EC M=2!S7%P-"FYE<W1A8FEL>UQC<')I;65];GEM:2!I(&YE<')E9'-K87IU96UY M;6D@<&]S=&MO;6UU;FES=&EC:&5S:VEM:2!O8G-H8VAE<W1V86UI+GM<(%P@ M?4-H96U<7 T*9&%L>UQC<')I;65]<VAE+"!T96T@8F]L>UQC<')I;65]<VAI M;2!P;V1O>G)E;FEE;2!Z87!A9&YY>UQU(&E](&]B>79A=&5L>UQC<')I;65] M('-M;W1R:70@=B!I:V@@<W1O<F]N=2Q<7 T*;F5S;'5S:&%V<VAI<WM<8W!R M:6UE?2!P<F]G;F]Z;W8@;R!G;W1O=GEA<VAC:&EK:'-Y82!Z86MH;&5S=&YU M='M<8W!R:6UE?2!%=G)O<'4@;6%S<V]V>6MH(&YA<VAE<W1V:7EA:VA<7 T* M;6EG<F%N=&]V(&D@<')O8VAI:V@@;F5P<FEY871N;W-T>6%K:"Y<7 T*>UP@ M7"!]>UP@7"!]4V%M>64@:W)U<&YY92P@<F%Z=6UE971[7&-Y9&]T?7-Y82P@ M;WIH:61A>75T>UQC>61O='US>6$@<V\@<W1O<F]N>2!B>79S:&5G;R!3;W9E M='M<8WED;W1]<VMO9V]<7 T*4V]Y=7IA+"!K;W1O<GE[7'4@:7T@8VAA<VAC M:&4@=G-E9V\@=F]S<')I;FEM865T>UQC>61O='US>6$@:V%K('!O=&5N8VEA M;'M<8W!R:6UE?6YY>UQU(&E](&ES=&]C:&YI:R!Y861E<FYY:VA<7 T*:V%T M87-T<F]F(&D@:W)O=F%V>6MH(&UE>FAN86-I;VYA;'M<8W!R:6UE?6YY:V@@ M:V]N9FQI:W1O=BY[7"!<('U6;W0@=F%M(&D@<&]C:'9A(&1L>6$@;F]V>6MH M7%P-"G-T<F%K:&]V.R!V;W0@:2!I<W1O:VD@=FYE>F%P;F]G;R!K;VYS97)V M871I=FYO9V\@:W)E;F$@>F%P861N;V=O(&]B<VAC:&5S='9A+%Q<#0IE9V\@ M='EA=&]T96YI>6$@:R!B;VQE92!Z:&5S=&MO;74@<F5Z:&EM=2P@82!Z86]D M;F\@:2!K(&YO=F]M=2!R87ID96QI=&5L>UQC<')I;65];F]M=5Q<#0IZ86YA M=F5S=2P@:V]T;W)Y>UQU(&E](&)Y(&]G<F%D:6P@96=O(&]T(&YA<VAE>UQU M(&E](&-H87-T:2!S=F5T82Y<7 T*>UP@7"!]>UP@7"!]4')I;65R($=E<FUA M;FEI+"!R97-H:79S:&5[7'4@:7T@=B!O='9E="!N82!P;V=R;VUY('9Y9'9O M<FET>UQC<')I;65]('IA('-V;VD@<')E9&5L>5Q<#0IT>7-Y86-H:2!R=6UY M;G-K:6MH(&-Y9V%N(&D@9&%Z:&4@:7IM96YI='M<8W!R:6UE?2!K;VYS=&ET M=6-I>74@<R!T96TL(&-H=&]B>2!O9W)A;FEC:&ET>UQC<')I;65]7%P-"F-H M:7-L;R!P<F5T96YD96YT;W8@;F$@<&]L:71I8VAE<VMO92!U8F5Z:&ES:&-H M92P@<W9I9&5T96Q[7&-P<FEM97US='9U970@;R!T;VTL(&-H=&]<7 T*<')A M=GEA<VAC:&EE('IA<&%D;GEE(&MR=6=I('9Y;G5Z:&1E;GD@<F5A9VER;W9A M='M<8W!R:6UE?2!N82!N;W9Y92!S=')A:VAI+GM<(%P@?59O>FYI:V%E=%Q< M#0IV;W!R;W,Z(&$@;F4@>F%S=&%V>6%T(&QI(%Q@971I('-T<F%K:&D@=B!U M<VQO=FEY86MH('9O>FUO>FAN;V=O('5K:'5D<VAE;FEY85Q<#0I<8&5K;VYO M;6EC:&5S:V]G;R!P;VQO>FAE;FEY82!B;VQ[7&-P<FEM97US:&EN<W1V82!E M=G)O<&5[7'4@:7US:VEK:"!G;W-U9&%R<W1V('!R;WM<=2!I?71I(&EK:%Q< M#0IC:&5R97H@8F]L97IN96YN>64@:7-P>71A;FEY82!N82!P<FEV97)Z:&5N M;F]S='M<8W!R:6UE?2!I9&5Y86T@9&5M;VMR871I:3]<7 T*>UP@7"!]>UP@ M7"!]5B!P<FEV97)Z:&5N;F]S=&D@97M<=2!I?2!U(&YY;F5S:&YI:V@@<')A M=GEA<VAC:&EK:"!<8&5L:70@6F%P861A('-O;6YE;FE[7'4@:7T@;F5T+EQ< M#0I.;R!K86MI;2!B=61E="!N;W9O92!P;VMO;&5N:64@<&]L:71I8VAE<VMI M:V@@9&5Y871E;&5[7'4@:7TL(&YE('!O>UQU(&E]9%PB970@;&D@;VYO7%P- M"FYA('!O=F]D=2!U('!O<')A=F5V<VAE>UQU(&E](&-H87-T:2!O8G-H8VAE M<W1V83][7"!<('U0;VYY871N;RP@8VAT;R!<8&5T;W0@=F]P<F]S(&-H87-H M8VAE7%P-"G9S96=O('-O<')O=F]Z:&1A971[7&-Y9&]T?7-Y82!T<F5V;WIH M;GEM('9Z9VQY861O;2!V('-T;W)O;G4@1V5R;6%N:6DL('!R979R87-H8VAA M>75S:&-H97M<=2!I?7-Y85Q<#0IS96=O9&YY82!V(&1O;6EN:7)U>75S:&-H M:7M<=2!I?2!F86MT;W(@979R;W!E>UQU(&E]<VMO>UQU(&E]('-C96YY+EQ< M#0I[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[ M7"!<('U[7"!<('U[7"!<('U[7"!<('TH;VMO;F-H86YI92!S;&5D=65T*5Q< 4#0H@?0I<96YD>V1O8W5M96YT?0I< end END_OF_FILE if test 13202 -ne `wc -c <'example.tex.UU'`; then echo shar: \"'example.tex.UU'\" unpacked with wrong size! else echo shar: Uudecoding \"'example.tex'\" $9560 characters$ cat example.tex.UU | uudecode if test 9560 -ne `wc -c <'example.tex'`; then echo shar: \"'example.tex'\" uudecoded with wrong size! else rm example.tex.UU fi fi # end of 'example.tex.UU' fi if test -f 'koi8-tex.rus' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'koi8-tex.rus'\" else echo shar: Extracting \"'koi8-tex.rus'\" $9352 characters$ sed "s/^X//" >'koi8-tex.rus' <<'END_OF_FILE' X# Jan Labanowski, jkl@osc.edu, Jan. 10, 1992 X# File koi8_tex.dat X X# This is a transliteration data file for converting from KOI-8 as used X# by RELCOM (GOST 19768-74) to LaTeX X# The TeX tranliteration sequences follow AMS cyrillic convention for X# WNCYR fonts with cyracc.def file X# To be used with translit.c program by Jan Labanowski. For a format of X# this file consult translit documentation X X 1 file version number X X " " # string delimiters X [ ] # list delimites X { } # regular expression delimiters X X X X#starting sequence for LaTeX X"\documentstyle{article} X\input cyracc.def X\font\tencyr=wncyr10 X\def\cyr{\tencyr\cyracc} X\begin{document} X" X X#ending sequence X" X\end{document} X" X X 0 # number of input SHIFT sequences, only one set of input characters X X 2 # number of output SHIFT sequences, two sets of input characters X X# SHIFT-OUT SHIFT-IN X "" "" #shift sequences for set 1 (Latin) X "{\cyr " "}" #cyrillic enclosed in {\cyr ... } X X# conversion table X# inp_set inp_seq out_set out_seq X X X# characters which are not in ASCII (and DEL) and not in KOI8 to * X 0 [\0x7F-\0xA2\0xA4-\0xB2\0xB4-\0xBF] 0 "$\star$" X X# dehyphenate words, e.g. con- (NL)cert is changed to concert(NL) X# Below is a complicated (?) regular expression. It joins a hyphenated X# word. It looks for one of more letters (saves them as substring 1) X# followed by a hyphen (which may be followed by zero or more spaces X# or tabs). The hyphen must be followed by a NewLine (characters 0A-0D hex X# are various new line sequences) and saves NewLine sequence. Then it looks X# for zero or more tabs and spaces (at the beginning of the line). Then it X# looks for the rest of the hyphenated word and saves it as substring 3. X# The word may have punctuation attached. Then it looks again for some spaces X# or tabs. The substitute string junks all sequences which were not withn (), X# i.e., hyphen and spaces/tabs and inserts only substrings but in a different X# order. The 1 (word beginning) is followed by 3 (word end) and followed by X# the NewLine. The {\2\1\3} would be equally good. The string is then returned X# back for processing (output code is -1). Note that since input regular X# expression is very long, I chopped it into several lines by using \NL. X# If \ is followed by a white space, the \ and all white space which follow it X# is removed by the program. Be carefull not to use "\white_space" in strings, X# lists or regular expressions. If you must, enter \ as a code (i.e., \0x5C). X X# uncomment lines below if you want to dehyphenate X X# 0 {([A-Za-z\0xA3\0xB3\0xC0-\0xFF]+)-[ \0x09]*([\0x0A-\0x0D]+)[ \0x09]*(\ X# [A-Za-z\0xA3\0xB3\0xC0-\0xFF,.?;:")'`!]+)[ \0x09]} X# -1 {\1\3\2} X X# All latin letters are converted to the same letters but with the output X# set 1 X 0 [A-Za-z] 1 [A-Za-z] #Latin letters A-Z and a-z X X# Add \\ before all NewLine sequences X 0 {([\0x0B-\0x0D]*)\0x0A([\0x0B-\0x0D]*)} 0 {\\\\\1\0x0A\2} X X# Convert all double spaces to protected LaTeX spaces. Note that the X# backslash is followed by a space here, and had to be entered as its code X 0 " " 0 "{\0x5C \0x5C }" X X# Quote some special TeX characters X X# these do not require going out of {\cyr ....} X 0 "[" 0 "$[$" X 0 "]" 0 "$]$" X 0 "^" 0 "$\wedge$" X 0 "{" 0 "$\lbrace$" X 0 "}" 0 "$\rbrace$" X 0 "~" 0 "$\sim$" X 0 "\" 0 "$\backslash$" X 0 "|" 0 "$\mid$" X 0 "*" 0 "$\star$" X 0 "<" 0 "$<$" X 0 ">" 0 "$>$" X 0 "$" 0 "\$" X 0 "%" 0 "\%" X X# these can be represented correctly only in Latin charset X 0 "_" 1 "\_" X 0 "&" 1 "\&" X 0 "#" 1 "\#" X 0 "@" 1 "@" X X# Cyrillic letters X 0 "\0xF4\0xFD" 2 "T{\cydot}Shch" # to prevent C X 0 "\0xF4\0xDD" 2 "T{\cydot}shch" # to prevent C X 0 "\0xD4\0xFD" 2 "t{\cydot}Shch" # to prevent C X 0 "\0xD4\0xDD" 2 "t{\cydot}shch" # to prevent C X X 0 "\0xF4\0xFB" 2 "T{\cydot}Sh" # to prevent C X 0 "\0xF4\0xDB" 2 "T{\cydot}sh" # to prevent C X 0 "\0xD4\0xFB" 2 "t{\cydot}Sh" # to prevent C X 0 "\0xD4\0xDB" 2 "t{\cydot}sh" # to prevent C X X 0 "\0xF4\0xF3" 2 "T{\cydot}S" # to prevent C X 0 "\0xF4\0xD3" 2 "T{\cydot}s" # to prevent C X 0 "\0xD4\0xF3" 2 "t{\cydot}S" # to prevent c X 0 "\0xD4\0xD3" 2 "t{\cydot}s" # to prevent c X X 0 "\0xA3" 2 "\\0o42e" # small \"e (yo) X 0 "\0xB3" 2 "\\0o42E" # capital \"E (Yo) X 0 "\0xE1" 2 "A" X 0 "\0xE2" 2 "B" X 0 "\0xF7" 2 "V" X 0 "\0xE7" 2 "G" X 0 "\0xE4" 2 "D" X 0 "\0xE5" 2 "E" X 0 "\0xF6" 2 "Zh" X 0 "\0xFA" 2 "Z" X 0 "\0xE9" 2 "I" X 0 "\0xEA" 2 "{\u I}" # I kratkoje X 0 "\0xEB" 2 "K" X 0 "\0xEC" 2 "L" X 0 "\0xED" 2 "M" X 0 "\0xEE" 2 "N" X 0 "\0xEF" 2 "O" X 0 "\0xF0" 2 "P" X 0 "\0xF2" 2 "R" X 0 "\0xF3" 2 "S" X 0 "\0xF4" 2 "T" X 0 "\0xF5" 2 "U" X 0 "\0xE6" 2 "F" X 0 "\0xE8" 2 "Kh" X 0 "\0xE3" 2 "C" X 0 "\0xFE" 2 "Ch" X 0 "\0xFB" 2 "Sh" X 0 "\0xFD" 2 "Shch" X 0 "\0xFF" 2 "{\Cdprime}" # Tverdyj znak X 0 "\0xF9" 2 "Y" X 0 "\0xF8" 2 "{\Cprime}" # Myagkij znak X 0 "\0xFC" 2 "\`E" X 0 "\0xE0" 2 "Yu" X 0 "\0xF1" 2 "Ya" X 0 "\0xC1" 2 "a" X 0 "\0xC2" 2 "b" X 0 "\0xD7" 2 "v" X 0 "\0xC7" 2 "g" X 0 "\0xC4" 2 "d" X 0 "\0xC5" 2 "e" X 0 "\0xD6" 2 "zh" X 0 "\0xDA" 2 "z" X 0 "\0xC9" 2 "i" X 0 "\0xCA" 2 "{\u i}" X 0 "\0xCB" 2 "k" X 0 "\0xCC" 2 "l" X 0 "\0xCD" 2 "m" X 0 "\0xCE" 2 "n" X 0 "\0xCF" 2 "o" X 0 "\0xD0" 2 "p" X 0 "\0xD2" 2 "r" X 0 "\0xD3" 2 "s" X 0 "\0xD4" 2 "t" X 0 "\0xD5" 2 "u" X 0 "\0xC6" 2 "f" X 0 "\0xC8" 2 "kh" X 0 "\0xC3" 2 "c" X 0 "\0xDE" 2 "ch" X 0 "\0xDB" 2 "sh" X 0 "\0xDD" 2 "shch" X 0 "\0xDF" 2 "{\cdprime}" X 0 "\0xD9" 2 "y" X 0 "\0xD8" 2 "{\cprime}" X 0 "\0xDC" 2 "\`e" X 0 "\0xC0" 2 "yu" X 0 "\0xD1" 2 "ya" END_OF_FILE if test 9352 -ne `wc -c <'koi8-tex.rus'`; then echo shar: \"'koi8-tex.rus'\" unpacked with wrong size! fi # end of 'koi8-tex.rus' fi if test -f 'readme.doc' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'readme.doc'\" else echo shar: Extracting \"'readme.doc'\" $8389 characters$ sed "s/^X//" >'readme.doc' <<'END_OF_FILE' XWHAT IS TRANSLIT PROGRAM X======================== XThe TRANSLIT program is used to transliterate character codes. XThe ASCII table of characters (containing characters with codes 0 to 127) Xis a table for English language. For other languages many different schemes Xare used to represent their respective alphabets. Some use codes larger Xthan 127, some use multicharacter sequences to represent a single letter Xin foreign alphabets. There is also UNICODE and other proposed standards Xto use units larger than 8-bits(1 byte) to represent foreign alphabets. XFor example, UNICODE will use 16-bit(2 byte) codes. At this moment, the XTRANSLIT program supports only 8-bit codes, but will be expanded to XUNICODE if there is enough interest. X XIt is frequently necessary to convert from one representation to another Xrepresentation of the foreign alphabet. E.g., in the Library of Congress Xtransliteration, the Russian letter sha is transliterated as two Latin Xletters "sh" while the popular word processors use a code 232 (decimal), Xthe RELCOM network uses a code 221, and the KOI7 set uses character "[" Xfor the same letter. So if your screen driver, printer, word processor, Xetc. uses different codes than your text, you need to transliterate. X XThe TRANSLIT program is a powerful tool for such tasks. It converts an input Xfile in one representation to the output file in another representation using Xan appropriate, user defined, transliteration table. Transliteration table Xallows for very elaborate transliteration tasks and includes provisions for Xplain character sequences, character lists, regular expressions (flexible Xmatches), SHIFT-OUT/IN sequences and more. The program comes with documentation Xand examples of popular transliteration schemes for Russian language. Other Xfiles will be added with your collaboration. X XFILES IN THE PROGRAM DISTRIBUTION X================================ XThe following files are currently in the distribution. They are all ASCII X(text) files (with the exception on translit.tar.Z and translit.zip). XPlease note that the copyright notice requires that, if you distribute this Xprogram, you have to distribute the complete set of files. XTRANSLIT is copyrighted: Copyright (c) Jan Labanowski and JKL Enterprises, Inc. X X Name Description X DOCUMENTATION Xreadme.doc This file Xtranslit.ps PostScript version of program documentation and X installation procedure Xtranslit.1 [nt]roff version of the above in the format X of UN*X man page (use -man option with [nt]roff) Xtranslit.txt Plain text version of the above. Xorder.txt Order form for ordering the executable program (compiled X with installation script and instructions) X X TRANSLITERATION TABLES FOR RUSSIAN (read comments in the files) Xalt-gos.rus ALT to GOSTCII table Xalt-koi8.rus ALT to KOI8 table Xgos-alt.rus GOSTCII to ALT table Xgos-koi8.rus GOSTCII to KOI8 table Xkoi7-8.rus KOI7 to KOI8 table Xkoi7nl-8.rus KOI7 (no Latin) to KOI8 table Xkoi8-7.rus KOI8 to KOI7 table Xkoi8-alt.rus KOI8 to ALT table Xkoi8-gos.rus KOI8 to GOSTCII table Xkoi8-lc.rus KOI8 to Library of Congress table Xkoi8-phg.rus KOI8 to GOST transliteration Xkoi8-php.rus KOI8 to Pokrovsky transliteration Xkoi8-tex.rus KOI8 to LaTeX conversion Xphg-koi8.rus GOST transliteration to KOI8 Xpho-8sim.rus Simple phonetic to KOI8 Xpho-koi8.rus Various phonetic to KOI8 Xphp-koi8.rus Pokrovsky transliteration to KOI8 Xtex-koi8.rus LaTeX to KOI8 X X EXAMPLES Xexample.alt.uu uuencoded example in ALT Xexample.ko8.uu uuencoded example in KOI8 Xexample.pho phonetic transliteration example Xexample.tex LaTeX example X X TRANSLIT PROGRAM SOURCE in C. Xtranslit.c Main program Xpaths.h Include file Xreg_exp.h Include file Xreg_exp.c Modified regular expression package by H. Spencer Xreg_sub.c Modified regular expression package by H. Spencer X X X PACKED FILES CONTAINING THE WHOLE DISTRIBUTION FROM ABOVE Xtranslit.tar.Z --- Compressed tar file with the whole distribution. X ON UN*X use: X zcat translit.tar.Z | tar xvof - X to get all individual files. This file is BINARY, and X you should not attempt to obtain it via email. X This is a best way to get the whole ditribution via X ftp if you are on the UN*X machine. Xtranslit.tar.z.uu --- uuencoded file from the above. It can be transmitted X via e-mail, but it is a large file, and if your mailer X sets limits on your messages, it may not be correctly X transmitted. To recover individual files from the X email message, do: X uudecode message_file X where the mesage_file is a saved email message. X You will obtain translit.tar.Z file which you can X unpack as described above. Xtranslit.zip --- This is a "zipped" file (i.e., compressed with a ZIP X program. It is binary (i.e., you cannot get it via X e-mail, but you can get it via ftp with binary switch X set) To get individual file do: X unzip translit.zip (in UNIX) X or X PKUNZIP translit.zip (under MS-DOS) X and you will obtain a full distribution. Xtranslit.zip.uu --- Uuencoded file from above. Can be sent via e-mail but X it is big. To recover all files do: X uudecode message_file X where message_file is your saved message and then X "unzip" it as shown above. X X X HOW TO OBTAIN THE FILES: X ======================= X XVia FTP (if you are on Internet): X--------------------------------- X ftp kekule.osc.edu (or ftp 128.146.36.48) X Login: anonymous X Password: Your_email_address (Please...) X ftp> ascii (or binary if you retrieve binary files) X ftp> cd pub/russian/translit X ftp> get file_name X ..... (for each file) X ftp> quit X XVia E-mail: X----------- X Send message: X send translit/file_name from russian X to OSCPOST@osc.edu or OSCPOST@OHSTPY.BITNET. You can retrieve more files X with a single message by placing several lines of the above format. X The file will be forwarded to your mailbox automatically. X XThe "file_name" in the instructions above means any file from the list Xgiven above. If you do not know or have programs like uudecode, unzip, tar, Xzcat or uncompress, get all individual files one by one. If you know how Xto use the above programs it may be faster for you to get a tar or zip Xarchive and unpack it. X XProgram installation and compilation is described in the translit docs. XSince the program requires that you make small changes to paths.h before Xcompilation (depending on your system and environment), I cannot realy Xdistribute generic executables (i.e., compiled programs). You have to modify Xpaths.h to suit your needs and operationg system and compile the program using Xyour favorite C compiler. X XGETTING THE READY TO RUN PROGRAM X================================ XIf you do not have time, do not have resources, or for whatever reason Xyou wish a ready to run executable of TRANSLIT, you can order it for Xa very modest fee from JKL ENTERPRISES, INC. as described in the file: Xorder.txt. It will come with an easy installation script which will ask Xyou a few simple questions and install the program. X XI invite, and will try to answer, bug reports, comments and suggestions. XIf there is an interest I will work on optimizing the program, on supporting Xthe UNICODE, and other enhancements which you suggest. If you use the Xprogram for commercial purposes, and on many computers in your organization, Xyou might want to buy the program from JKL ENTERPRISES, INC., to aid further Xdevelopment, though you are not required to do so. X X XEnjoy, X XAuthor coordinates: XJan Labanowski XP.O. Box 21821 XColumbus, OH 43221-0821, USA Xjkl@osc.edu, JKL@OHSTPY.BITNET X------------------------------ X X X X X END_OF_FILE if test 8389 -ne `wc -c <'readme.doc'`; then echo shar: \"'readme.doc'\" unpacked with wrong size! fi # end of 'readme.doc' fi if test -f 'reg_exp.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'reg_exp.c'\" else echo shar: Extracting \"'reg_exp.c'\" $28384 characters$ sed "s/^X//" >'reg_exp.c' <<'END_OF_FILE' X/* X * reg_comp and reg_exec -- reg_sub and reg_error are elsewhere X * X * Copyright (c) 1986 by University of Toronto. X * Written by Henry Spencer. Not derived from licensed software. X * X * Permission is granted to anyone to use this software for any X * purpose on any computer system, and to redistribute it freely, X * subject to the following restrictions: X * X * 1. The author is not responsible for the consequences of use of X * this software, no matter how awful, even if they arise X * from defects in it. X * X * 2. The origin of this software must not be misrepresented, either X * by explicit claim or by omission. X * X * 3. Altered versions must be plainly marked as such, and must not X * be misrepresented as being the original software. X * X * Beware that some of this code is subtly aware of the way operator X * precedence is structured in regular expressions. Serious changes in X * regular-expression syntax might require a total rethink. X */ X X /* Jan Labanowski made some small modifications which are marked in the X text (jkl) X */ X X#include "paths.h" /* jkl */ X/* #include <regexp.h> ---- originally */ X#include "reg_exp.h" /* modified by jkl */ X/* #include "reg_magic.h" regmagic included in reg_exp.h, jkl */ X X/* X * The "internal use only" fields in reg_exp.h are present to pass info from X * compile to execute that permits the execute phase to run lots faster on X * simple cases. They are: X * X * regstart char that must begin a match; '\0' if none obvious X * reganch is the match anchored (at beginning-of-line only)? X * regmust string (pointer into program) that match must include, or NULL X * regmlen length of regmust string X * X * Regstart and reganch permit very fast decisions on suitable starting points X * for a match, cutting down the work a lot. Regmust permits fast rejection X * of lines that cannot possibly match. The regmust tests are costly enough X * that reg_comp() supplies a regmust only if the r.e. contains something X * potentially expensive (at present, the only such thing detected is * or + X * at the start of the r.e., which can involve a lot of backup). Regmlen is X * supplied because the test in reg_exec() needs it and reg_comp() is computing X * it anyway. X */ X X/* X * Structure for reg_exp "program". This is essentially a linear encoding X * of a nondeterministic finite-state machine (aka syntax charts or X * "railroad normal form" in parsing technology). Each node is an opcode X * plus a "next" pointer, possibly plus an operand. "Next" pointers of X * all nodes except BRANCH implement concatenation; a "next" pointer with X * a BRANCH on both ends of it is connecting two alternatives. (Here we X * have one of the subtle syntax dependencies: an individual BRANCH (as X * opposed to a collection of them) is never concatenated with anything X * because of operator precedence.) The operand of some types of node is X * a literal string; for others, it is a node leading into a sub-FSM. In X * particular, the operand of a BRANCH node is the first node of the branch. X * (NB this is *not* a tree structure: the tail of the branch connects X * to the thing following the set of BRANCHes.) The opcodes are: X */ X X/* definition number opnd? meaning */ X#define END 0 /* no End of program. */ X#define BOL 1 /* no Match "" at beginning of line. */ X#define EOL 2 /* no Match "" at end of line. */ X#define ANY 3 /* no Match any one character. */ X#define ANYOF 4 /* str Match any character in this string. */ X#define ANYBUT 5 /* str Match any character not in this string. */ X#define BRANCH 6 /* node Match this alternative, or the next... */ X#define BACK 7 /* no Match "", "next" ptr points backward. */ X#define EXACTLY 8 /* str Match this string. */ X#define NOTHING 9 /* no Match empty string. */ X#define STAR 10 /* node Match this (simple) thing 0 or more times. */ X#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ X#define OPEN 20 /* no Mark this point in input as start of #n. */ X /* OPEN+1 is number 1, etc. */ X#define CLOSE 30 /* no Analogous to OPEN. */ X X/* X * Opcode notes: X * X * BRANCH The set of branches constituting a single choice are hooked X * together with their "next" pointers, since precedence prevents X * anything being concatenated to any individual branch. The X * "next" pointer of the last BRANCH in a choice points to the X * thing following the whole choice. This is also where the X * final "next" pointer of each individual branch points; each X * branch starts with the operand node of a BRANCH node. X * X * BACK Normal "next" pointers all implicitly point forward; BACK X * exists to make loop structures possible. X * X * STAR,PLUS '?', and complex '*' and '+', are implemented as circular X * BRANCH structures using BACK. Simple cases (one character X * per match) are implemented with STAR and PLUS for speed X * and to minimize recursive plunges. X * X * OPEN,CLOSE ...are numbered at compile time. X */ X X/* X * A node is one char of opcode followed by two chars of "next" pointer. X * "Next" pointers are stored as two 8-bit pieces, high order first. The X * value is a positive offset from the opcode of the node containing it. X * An operand, if any, simply follows the node. (Note that much of the X * code generation knows about this implicit relationship.) X * X * Using two bytes for the "next" pointer is vast overkill for most things, X * but allows patterns to get big without disasters. X */ X#define OP(p) (*(p)) X#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) X#define OPERAND(p) ((p) + 3) X X/* X * See reg_magic.h for one further detail of program structure. X */ X X X/* X * Utility definitions. X */ X/* replaced this with my intcode() routine in translit.c , jkl X #ifndef CHARBITS X #define UCHARAT(p) ((int)*(unsigned char *)(p)) X #else X #define UCHARAT(p) ((int)*(p)&CHARBITS) X #endif X*/ X#define UCHARAT(p) (intcode(*(p))) Xextern int intcode(); X X#if STRCHR X#else X#define strchr indexfun X extern int indexfun(); X#endif X X#define FAIL(m) { reg_error(m); return(NULL); } X#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') X/* #define META "^$.[()|?+*\\" */ X#define META ".[()|?+*\\" /* disabled ^$ jkl */ X X/* X * Flags to be passed up and down. X */ X#define HASWIDTH 01 /* Known never to match null string. */ X#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ X#define SPSTART 04 /* Starts with * or +. */ X#define WORST 0 /* Worst case. */ X X/* X * Global work variables for reg_comp(). X */ Xstatic char *regparse; /* Input-scan pointer. */ Xstatic int regnpar; /* () count. */ Xstatic char regdummy; Xstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */ Xstatic long regsize; /* Code size. */ X X/* X * Forward declarations for reg_comp()'s friends. X */ X#if STATICFUN X#define STATIC static X#else X#define STATIC X#endif X XSTATIC char *reg(); XSTATIC char *regbranch(); XSTATIC char *regpiece(); XSTATIC char *regatom(); XSTATIC char *regnode(); XSTATIC char *regnext(); XSTATIC void regc(); XSTATIC void reginsert(); XSTATIC void regtail(); XSTATIC void regoptail(); X X X/* X - reg_comp - compile a regular expression into internal code X * X * We can't allocate space until we know how big the compiled form will be, X * but we can't compile it (and thus know how big it is) until we've got a X * place to put the code. So we cheat: we compile it twice, once with code X * generation turned off and size counting turned on, and once "for real". X * This also means that we don't allocate space until we are sure that the X * thing really will compile successfully, and we never have to move the X * code and thus invalidate pointers into it. (Note that it has to be in X * one piece because free() must be able to free it all.) X * X * Beware that the optimization-preparation code in here knows about some X * of the structure of the compiled reg_exp. X */ Xreg_exp * Xreg_comp(exp) Xchar *exp; X{ X register reg_exp *r; X register char *scan; X register char *longest; X register int len; X int flags; X/* extern char *malloc(); --- */ X X if (exp == NULL) X FAIL("NULL argument"); X X /* First pass: determine size, legality. */ X regparse = exp; X regnpar = 1; X regsize = 0L; X regcode = ®dummy; X regc(MAGIC); X if (reg(0, &flags) == NULL) X return(NULL); X X /* Small enough for pointer-storage convention? */ X if (regsize >= 32767L) /* Probably could be 65535L. */ X FAIL("regexp too big"); X X /* Allocate space. */ X r = (reg_exp *)malloc(sizeof(reg_exp) + (unsigned)regsize); X if (r == NULL) X FAIL("out of space"); X X /* Second pass: emit code. */ X regparse = exp; X regnpar = 1; X regcode = r->program; X regc(MAGIC); X if (reg(0, &flags) == NULL) X return(NULL); X X /* Dig out information for optimizations. */ X r->regstart = '\0'; /* Worst-case defaults. */ X r->reganch = 0; X r->regmust = NULL; X r->regmlen = 0; X scan = r->program+1; /* First BRANCH. */ X if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ X scan = OPERAND(scan); X X /* Starting-point info. */ X if (OP(scan) == EXACTLY) X r->regstart = *OPERAND(scan); X else if (OP(scan) == BOL) X r->reganch++; X X /* X * If there's something expensive in the r.e., find the X * longest literal string that must appear and make it the X * regmust. Resolve ties in favor of later strings, since X * the regstart check works with the beginning of the r.e. X * and avoiding duplication strengthens checking. Not a X * strong reason, but sufficient in the absence of others. X */ X if (flags&SPSTART) { X longest = NULL; X len = 0; X for (; scan != NULL; scan = regnext(scan)) X if ((OP(scan) == EXACTLY) && X (strlen(OPERAND(scan)) >= len)) { X longest = OPERAND(scan); X len = strlen(OPERAND(scan)); X } X r->regmust = longest; X r->regmlen = len; X } X } X X return(r); X} X X/* X - reg - regular expression, i.e. main body or parenthesized thing X * X * Caller must absorb opening parenthesis. X * X * Combining parenthesis handling with the base level of regular expression X * is a trifle forced, but the need to tie the tails of the branches to what X * follows makes it hard to avoid. X */ XSTATIC char * Xreg(paren, flagp) Xint paren; /* Parenthesized? */ Xint *flagp; X{ X register char *ret; X register char *br; X register char *ender; X register int parno; X int flags; X X *flagp = HASWIDTH; /* Tentatively. */ X X /* Make an OPEN node, if parenthesized. */ X if (paren) { X if (regnpar >= NSUBEXP) X FAIL("too many ()"); X parno = regnpar; X regnpar++; X ret = regnode(OPEN+parno); X } else X ret = NULL; X X /* Pick up the branches, linking them together. */ X br = regbranch(&flags); X if (br == NULL) X return(NULL); X if (ret != NULL) X regtail(ret, br); /* OPEN -> first. */ X else X ret = br; X if (!(flags&HASWIDTH)) X *flagp &= ~HASWIDTH; X *flagp |= flags&SPSTART; X while (*regparse == '|') { X regparse++; X br = regbranch(&flags); X if (br == NULL) X return(NULL); X regtail(ret, br); /* BRANCH -> BRANCH. */ X if (!(flags&HASWIDTH)) X *flagp &= ~HASWIDTH; X *flagp |= flags&SPSTART; X } X X /* Make a closing node, and hook it on the end. */ X ender = regnode((paren) ? CLOSE+parno : END); X regtail(ret, ender); X X /* Hook the tails of the branches to the closing node. */ X for (br = ret; br != NULL; br = regnext(br)) X regoptail(br, ender); X X /* Check for proper termination. */ X if (paren && *regparse++ != ')') { X FAIL("unmatched ()"); X } else if (!paren && *regparse != '\0') { X if (*regparse == ')') { X FAIL("unmatched ()"); X } else X FAIL("junk on end"); /* "Can't happen". */ X /* NOTREACHED */ X } X X return(ret); X} X X/* X - regbranch - one alternative of an | operator X * X * Implements the concatenation operator. X */ XSTATIC char * Xregbranch(flagp) Xint *flagp; X{ X register char *ret; X register char *chain; X register char *latest; X int flags; X X *flagp = WORST; /* Tentatively. */ X X ret = regnode(BRANCH); X chain = NULL; X while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { X latest = regpiece(&flags); X if (latest == NULL) X return(NULL); X *flagp |= flags&HASWIDTH; X if (chain == NULL) /* First piece. */ X *flagp |= flags&SPSTART; X else X regtail(chain, latest); X chain = latest; X } X if (chain == NULL) /* Loop ran zero times. */ X (void) regnode(NOTHING); X X return(ret); X} X X/* X - regpiece - something followed by possible [*+?] X * X * Note that the branching code sequences used for ? and the general cases X * of * and + are somewhat optimized: they use the same NOTHING node as X * both the endmarker for their branch list and the body of the last branch. X * It might seem that this node could be dispensed with entirely, but the X * endmarker role is not redundant. X */ XSTATIC char * Xregpiece(flagp) Xint *flagp; X{ X register char *ret; X register char op; X register char *next; X int flags; X X ret = regatom(&flags); X if (ret == NULL) X return(NULL); X X op = *regparse; X if (!ISMULT(op)) { X *flagp = flags; X return(ret); X } X X if (!(flags&HASWIDTH) && op != '?') X FAIL("*+ operand could be empty"); X *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); X X if (op == '*' && (flags&SIMPLE)) X reginsert(STAR, ret); X else if (op == '*') { X /* Emit x* as (x&|), where & means "self". */ X reginsert(BRANCH, ret); /* Either x */ X regoptail(ret, regnode(BACK)); /* and loop */ X regoptail(ret, ret); /* back */ X regtail(ret, regnode(BRANCH)); /* or */ X regtail(ret, regnode(NOTHING)); /* null. */ X } else if (op == '+' && (flags&SIMPLE)) X reginsert(PLUS, ret); X else if (op == '+') { X /* Emit x+ as x(&|), where & means "self". */ X next = regnode(BRANCH); /* Either */ X regtail(ret, next); X regtail(regnode(BACK), ret); /* loop back */ X regtail(next, regnode(BRANCH)); /* or */ X regtail(ret, regnode(NOTHING)); /* null. */ X } else if (op == '?') { X /* Emit x? as (x|) */ X reginsert(BRANCH, ret); /* Either x */ X regtail(ret, regnode(BRANCH)); /* or */ X next = regnode(NOTHING); /* null. */ X regtail(ret, next); X regoptail(ret, next); X } X regparse++; X if (ISMULT(*regparse)) X FAIL("nested *?+"); X X return(ret); X} X X/* X - regatom - the lowest level X * X * Optimization: gobbles an entire sequence of ordinary characters so that X * it can turn them into a single node, which is smaller to store and X * faster to run. Backslashed characters are exceptions, each becoming a X * separate node; the code is simpler that way and it's not worth fixing. X */ XSTATIC char * Xregatom(flagp) Xint *flagp; X{ X register char *ret; X int flags; X X *flagp = WORST; /* Tentatively. */ X X switch (*regparse++) { X/* the ^ and $ hooks are disabled by jkl */ X/* case '^': X ret = regnode(BOL); X break; X case '$': X ret = regnode(EOL); X break; */ X case '.': X ret = regnode(ANY); X *flagp |= HASWIDTH|SIMPLE; X break; X case '[': { X register int class; X register int classend; X X if (*regparse == '^') { /* Complement of range. */ X ret = regnode(ANYBUT); X regparse++; X } else X ret = regnode(ANYOF); X if (*regparse == ']' || *regparse == '-') X regc(*regparse++); X while (*regparse != '\0' && *regparse != ']') { X if (*regparse == '-') { X regparse++; X if (*regparse == ']' || *regparse == '\0') X regc('-'); X else { X class = UCHARAT(regparse-2)+1; X classend = UCHARAT(regparse); X if (class > classend+1) X FAIL("invalid [] range"); X for (; class <= classend; class++) X regc(class); X regparse++; X } X } else X regc(*regparse++); X } X regc('\0'); X if (*regparse != ']') X FAIL("unmatched []"); X regparse++; X *flagp |= HASWIDTH|SIMPLE; X } X break; X case '(': X ret = reg(1, &flags); X if (ret == NULL) X return(NULL); X *flagp |= flags&(HASWIDTH|SPSTART); X break; X case '\0': X case '|': X case ')': X FAIL("internal urp"); /* Supposed to be caught earlier. */ X break; X case '?': X case '+': X case '*': X FAIL("?+* follows nothing"); X break; X case '\\': X if (*regparse == '\0') X FAIL("trailing \\"); X ret = regnode(EXACTLY); X regc(*regparse++); X regc('\0'); X *flagp |= HASWIDTH|SIMPLE; X break; X default: { X register int len; X register char ender; X X regparse--; X len = strcspn(regparse, META); X if (len <= 0) X FAIL("internal disaster"); X ender = *(regparse+len); X if (len > 1 && ISMULT(ender)) X len--; /* Back off clear of ?+* operand. */ X *flagp |= HASWIDTH; X if (len == 1) X *flagp |= SIMPLE; X ret = regnode(EXACTLY); X while (len > 0) { X regc(*regparse++); X len--; X } X regc('\0'); X } X break; X } X X return(ret); X} X X/* X - regnode - emit a node X */ XSTATIC char * /* Location. */ Xregnode(op) Xchar op; X{ X register char *ret; X register char *ptr; X X ret = regcode; X if (ret == ®dummy) { X regsize += 3; X return(ret); X } X X ptr = ret; X *ptr++ = op; X *ptr++ = '\0'; /* Null "next" pointer. */ X *ptr++ = '\0'; X regcode = ptr; X X return(ret); X} X X/* X - regc - emit (if appropriate) a byte of code X */ XSTATIC void Xregc(b) Xchar b; X{ X if (regcode != ®dummy) X *regcode++ = b; X else X regsize++; X} X X/* X - reginsert - insert an operator in front of already-emitted operand X * X * Means relocating the operand. X */ XSTATIC void Xreginsert(op, opnd) Xchar op; Xchar *opnd; X{ X register char *src; X register char *dst; X register char *place; X X if (regcode == ®dummy) { X regsize += 3; X return; X } X X src = regcode; X regcode += 3; X dst = regcode; X while (src > opnd) X *--dst = *--src; X X place = opnd; /* Op node, where operand used to be. */ X *place++ = op; X *place++ = '\0'; X *place++ = '\0'; X} X X/* X - regtail - set the next-pointer at the end of a node chain X */ XSTATIC void Xregtail(p, val) Xchar *p; Xchar *val; X{ X register char *scan; X register char *temp; X register int offset; X X if (p == ®dummy) X return; X X /* Find last node. */ X scan = p; X for (;;) { X temp = regnext(scan); X if (temp == NULL) X break; X scan = temp; X } X X if (OP(scan) == BACK) X offset = scan - val; X else X offset = val - scan; X *(scan+1) = (offset>>8)&0377; X *(scan+2) = offset&0377; X} X X/* X - regoptail - regtail on operand of first argument; nop if operandless X */ XSTATIC void Xregoptail(p, val) Xchar *p; Xchar *val; X{ X /* "Operandless" and "op != BRANCH" are synonymous in practice. */ X if (p == NULL || p == ®dummy || OP(p) != BRANCH) X return; X regtail(OPERAND(p), val); X} X X/* X * regexec and friends X */ X X/* X * Global work variables for reg_exec(). X */ Xstatic char *reginput; /* String-input pointer. */ Xstatic char *regbol; /* Beginning of input, for ^ check. */ Xstatic char **regstartp; /* Pointer to startp array. */ Xstatic char **regendp; /* Ditto for endp. */ X X/* X * Forwards. X */ Xint reg_try(); /* jkl, took "static" out, so it is known to the linker */ XSTATIC int regmatch(); XSTATIC int regrepeat(); X X#ifdef DEBUG Xint regnarrate = 0; Xvoid regdump(); XSTATIC char *regprop(); X#endif X X/* X - reg_exec - match a regexp against a string X */ Xint Xreg_exec(prog, string) Xregister reg_exp *prog; Xregister char *string; X{ X register char *s; X/* extern char *strchr(); jkl */ X X /* Be paranoid... */ X if (prog == NULL || string == NULL) { X reg_error("NULL parameter"); X return(0); X } X X /* Check validity of program. */ X if (UCHARAT(prog->program) != MAGIC) { X reg_error("corrupted program"); X return(0); X } X X /* If there is a "must appear" string, look for it. */ X if (prog->regmust != NULL) { X s = string; X while ((s = strchr(s, prog->regmust[0])) != NULL) { X if (strncmp(s, prog->regmust, prog->regmlen) == 0) X break; /* Found it. */ X s++; X } X if (s == NULL) /* Not present. */ X return(0); X } X X /* Mark beginning of line for ^ . */ X regbol = string; X X /* Simplest case: anchored match need be tried only once. */ X if (prog->reganch) X return(reg_try(prog, string)); X X /* Messy cases: unanchored match. */ X s = string; X if (prog->regstart != '\0') X /* We know what char it must start with. */ X while ((s = strchr(s, prog->regstart)) != NULL) { X if (reg_try(prog, s)) X return(1); X s++; X } X else X /* We don't -- general case. */ X do { X if (reg_try(prog, s)) X return(1); X } while (*s++ != '\0'); X X /* Failure. */ X return(0); X} X X/* X - reg_try - try match at specific point X */ Xint /* 0 failure, 1 success */ Xreg_try(prog, string) Xreg_exp *prog; Xchar *string; X{ X register int i; X register char **sp; X register char **ep; X X reginput = string; X regstartp = prog->startp; X regendp = prog->endp; X X sp = prog->startp; X ep = prog->endp; X for (i = NSUBEXP; i > 0; i--) { X *sp++ = NULL; X *ep++ = NULL; X } X if (regmatch(prog->program + 1)) { X prog->startp[0] = string; X prog->endp[0] = reginput; X return(1); X } else X return(0); X} X X/* X - regmatch - main matching routine X * X * Conceptually the strategy is simple: check to see whether the current X * node matches, call self recursively to see whether the rest matches, X * and then act accordingly. In practice we make some effort to avoid X * recursion, in particular by going through "ordinary" nodes (that don't X * need to know whether the rest of the match failed) by a loop instead of X * by recursion. X */ XSTATIC int /* 0 failure, 1 success */ Xregmatch(prog) Xchar *prog; X{ X register char *scan; /* Current node. */ X char *next; /* Next node. */ X/* extern char *strchr(); --- jkl */ X X scan = prog; X#ifdef DEBUG X if (scan != NULL && regnarrate) X fprintf(stderr, "%s(\n", regprop(scan)); X#endif X while (scan != NULL) { X#ifdef DEBUG X if (regnarrate) X fprintf(stderr, "%s...\n", regprop(scan)); X#endif X next = regnext(scan); X X switch (OP(scan)) { X case BOL: X if (reginput != regbol) X return(0); X break; X case EOL: X if (*reginput != '\0') X return(0); X break; X case ANY: X if (*reginput == '\0') X return(0); X reginput++; X break; X case EXACTLY: { X register int len; X register char *opnd; X X opnd = OPERAND(scan); X /* Inline the first character, for speed. */ X if (*opnd != *reginput) X return(0); X len = strlen(opnd); X if (len > 1 && strncmp(opnd, reginput, len) != 0) X return(0); X reginput += len; X } X break; X case ANYOF: X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) X return(0); X reginput++; X break; X case ANYBUT: X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) X return(0); X reginput++; X break; X case NOTHING: X break; X case BACK: X break; X case OPEN+1: X case OPEN+2: X case OPEN+3: X case OPEN+4: X case OPEN+5: X case OPEN+6: X case OPEN+7: X case OPEN+8: X case OPEN+9: { X register int no; X register char *save; X X no = OP(scan) - OPEN; X save = reginput; X X if (regmatch(next)) { X /* X * Don't set startp if some later X * invocation of the same parentheses X * already has. X */ X if (regstartp[no] == NULL) X regstartp[no] = save; X return(1); X } else X return(0); X } X break; X case CLOSE+1: X case CLOSE+2: X case CLOSE+3: X case CLOSE+4: X case CLOSE+5: X case CLOSE+6: X case CLOSE+7: X case CLOSE+8: X case CLOSE+9: { X register int no; X register char *save; X X no = OP(scan) - CLOSE; X save = reginput; X X if (regmatch(next)) { X /* X * Don't set endp if some later X * invocation of the same parentheses X * already has. X */ X if (regendp[no] == NULL) X regendp[no] = save; X return(1); X } else X return(0); X } X break; X case BRANCH: { X register char *save; X X if (OP(next) != BRANCH) /* No choice. */ X next = OPERAND(scan); /* Avoid recursion. */ X else { X do { X save = reginput; X if (regmatch(OPERAND(scan))) X return(1); X reginput = save; X scan = regnext(scan); X } while (scan != NULL && OP(scan) == BRANCH); X return(0); X /* NOTREACHED */ X } X } X break; X case STAR: X case PLUS: { X register char nextch; X register int no; X register char *save; X register int min; X X /* X * Lookahead to avoid useless match attempts X * when we know what character comes next. X */ X nextch = '\0'; X if (OP(next) == EXACTLY) X nextch = *OPERAND(next); X min = (OP(scan) == STAR) ? 0 : 1; X save = reginput; X no = regrepeat(OPERAND(scan)); X while (no >= min) { X /* If it could work, try it. */ X if (nextch == '\0' || *reginput == nextch) X if (regmatch(next)) X return(1); X /* Couldn't or didn't -- back up. */ X no--; X reginput = save + no; X } X return(0); X } X break; X case END: X return(1); /* Success! */ X break; X default: X reg_error("memory corruption"); X return(0); X break; X } X X scan = next; X } X X /* X * We get here only if there's trouble -- normally "case END" is X * the terminating point. X */ X reg_error("corrupted pointers"); X return(0); X} X X/* X - regrepeat - repeatedly match something simple, report how many X */ XSTATIC int Xregrepeat(p) Xchar *p; X{ X register int count = 0; X register char *scan; X register char *opnd; X X scan = reginput; X opnd = OPERAND(p); X switch (OP(p)) { X case ANY: X count = strlen(scan); X scan += count; X break; X case EXACTLY: X while (*opnd == *scan) { X count++; X scan++; X } X break; X case ANYOF: X while ((*scan != '\0') && /* paranthetized by jkl */ X (strchr(opnd, *scan) != (char *)NULL)) { X count++; X scan++; X } X break; X case ANYBUT: X while (*scan != '\0' && strchr(opnd, *scan) == NULL) { X count++; X scan++; X } X break; X default: /* Oh dear. Called inappropriately. */ X reg_error("internal foulup"); X count = 0; /* Best compromise. */ X break; X } X reginput = scan; X X return(count); X} X X/* X - regnext - dig the "next" pointer out of a node X */ XSTATIC char * Xregnext(p) Xregister char *p; X{ X register int offset; X X if (p == ®dummy) X return(NULL); X X offset = NEXT(p); X if (offset == 0) X return(NULL); X X if (OP(p) == BACK) X return(p-offset); X else X return(p+offset); X} X X#ifdef DEBUG X XSTATIC char *regprop(); X X/* X - regdump - dump a regexp onto stdout in vaguely comprehensible form X */ Xvoid Xregdump(r) Xreg_exp *r; X{ X register char *s; X register char op = EXACTLY; /* Arbitrary non-END op. */ X register char *next; X/* extern char *strchr(); ---jkl */ X X X s = r->program + 1; X while (op != END) { /* While that wasn't END last time... */ X op = OP(s); X printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ X next = regnext(s); X if (next == NULL) /* Next ptr. */ X printf("(0)"); X else X printf("(%d)", (s-r->program)+(next-s)); X s += 3; X if (op == ANYOF || op == ANYBUT || op == EXACTLY) { X /* Literal string, where present. */ X while (*s != '\0') { X putchar(*s); X s++; X } X s++; X } X putchar('\n'); X } X X /* Header fields of interest. */ X if (r->regstart != '\0') X printf("start `%c' ", r->regstart); X if (r->reganch) X printf("anchored "); X if (r->regmust != NULL) X printf("must have \"%s\"", r->regmust); X printf("\n"); X} X X/* X - regprop - printable representation of opcode X */ XSTATIC char * Xregprop(op) Xchar *op; X{ X register char *p; X static char buf[50]; X X (void) strcpy(buf, ":"); X X switch (OP(op)) { X case BOL: X p = "BOL"; X break; X case EOL: X p = "EOL"; X break; X case ANY: X p = "ANY"; X break; X case ANYOF: X p = "ANYOF"; X break; X case ANYBUT: X p = "ANYBUT"; X break; X case BRANCH: X p = "BRANCH"; X break; X case EXACTLY: X p = "EXACTLY"; X break; X case NOTHING: X p = "NOTHING"; X break; X case BACK: X p = "BACK"; X break; X case END: X p = "END"; X break; X case OPEN+1: X case OPEN+2: X case OPEN+3: X case OPEN+4: X case OPEN+5: X case OPEN+6: X case OPEN+7: X case OPEN+8: X case OPEN+9: X sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); X p = NULL; X break; X case CLOSE+1: X case CLOSE+2: X case CLOSE+3: X case CLOSE+4: X case CLOSE+5: X case CLOSE+6: X case CLOSE+7: X case CLOSE+8: X case CLOSE+9: X sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); X p = NULL; X break; X case STAR: X p = "STAR"; X break; X case PLUS: X p = "PLUS"; X break; X default: X reg_error("corrupted opcode"); X break; X } X if (p != NULL) X (void) strcat(buf, p); X return(buf); X} X#endif X X/* X * The following is provided for those people who do not have strcspn() in X * their C libraries. They should get off their butts and do something X * about it; at least one public-domain implementation of those (highly X * useful) string routines has been published on Usenet. X */ X#if STRCSPN X#else X X/* X * strcspn - find length of initial segment of s1 consisting entirely X * of characters not from s2 X */ X XSTATIC int Xstrcspn(s1, s2) Xchar *s1; Xchar *s2; X{ X register char *scan1; X register char *scan2; X register int count; X X count = 0; X for (scan1 = s1; *scan1 != '\0'; scan1++) { X for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ X if (*scan1 == *scan2++) X return(count); X count++; X } X return(count); X} X#endif END_OF_FILE if test 28384 -ne `wc -c <'reg_exp.c'`; then echo shar: \"'reg_exp.c'\" unpacked with wrong size! fi # end of 'reg_exp.c' fi echo shar: End of archive 8 $of 10$. cp /dev/null ark8isdone MISSING="" for I in 1 2 3 4 5 6 7 8 9 10 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 10 archives. rm -f ark[1-9]isdone ark[1-9][0-9]isdone else echo You still must unpack the following archives: echo " " ${MISSING} fi exit 0 exit 0 # Just in case...