home *** CD-ROM | disk | FTP | other *** search
Text File | 1993-03-21 | 62.7 KB | 1,913 lines |
- Newsgroups: comp.sources.misc
- From: jkl@osc.edu (Jan Labanowski)
- Subject: REPOST: v36i030: translit - transliterate foreign alphabets, Part08/10
- Message-ID: <1993Mar23.031051.21304@sparky.imd.sterling.com>
- X-Md4-Signature: 8dbd56262e22e63de4c0c140853fb7e5
- Date: Tue, 23 Mar 1993 03:10:51 GMT
- Approved: kent@sparky.imd.sterling.com
-
- Submitted-by: jkl@osc.edu (Jan Labanowski)
- Posting-number: Volume 36, Issue 30
- Archive-name: translit/part08
- Environment: UNIX, MS-DOS, VMS
-
- [ This is being reposted due to a file containing control characters ]
- [ didn't make it through news systems... -Kent+ ]
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # Contents: example.tex.UU koi8-tex.rus readme.doc reg_exp.c
- # Wrapped by kent@sparky on Fri Mar 19 16:00:14 1993
- PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH
- echo If this archive is complete, you will see the following message:
- echo ' "shar: End of archive 8 (of 10)."'
- if test -f 'example.tex.UU' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'example.tex.UU'\"
- else
- echo shar: Extracting \"'example.tex.UU'\" \(13202 characters\)
- sed "s/^X//" >'example.tex.UU' <<'END_OF_FILE'
- begin 664 example.tex
- M7&1O8W5M96YT<W1Y;&5[87)T:6-L97T*7&EN<'5T(&-Y<F%C8RYD968*7&9O
- M;G1<=&5N8WER/7=N8WER,3 *7&1E9EQC>7)[7'1E;F-Y<EQC>7)A8V-]"EQB
- M96=I;GMD;V-U;65N='T*('M<8WER($$@0B!6($<@1"!%(%PB12!::"!:($D@
- M>UQU($E]($L@3"!-($X@3R!0(%(@4R!4(%4@1B!+:"!#($-H(%-H(%-H8V@@
- M>UQ#9'!R:6UE?2!9('M<0W!R:6UE?2!<8$4@674@66$@4UQ<#0IA(&(@=B!G
- M(&0@92!<(F4@>F@@>B!I('M<=2!I?2!K(&P@;2!N(&\@<"!R(',@="!U(&8@
- M:V@@8R!C:"!S:"!S:&-H('M<8V1P<FEM97T@>2![7&-P<FEM97T@7&!E('EU
- M('EA(%1<7 T*>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]
- M>UP@7"!]>UP@7"!]("AI>B!G87IE='D@4TUO<VMO=G-K:64@;F]V;W-T:50L
- M(#8N,3(N.3(I7%P-"GM<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<
- M(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@?7M<(%P@
- M?7M<(%P@?7M<(%P@?2!665I/5GM<(%P@?2 H8VAA<W1[7&-P<FEM97T@<&5R
- M=F%Y82E<7 T*>UP@7"!]>UP@7"!]5B!K86MO>UQU(&E]('-U971E(&UY('-E
- M9V]D;GEA('IH:79<(F5M+GM<(%P@?41A>FAE('!O;&ET:6MA+"!I('1A(&1A
- M=FYO('5Z:&5<7 T*<&5R97-T86QA(&YA<R!Z86YI;6%T>UQC<')I;65]+GM<
- M(%P@?4UY(&]B<F%S:&-H865M(&YA(&YE7")E('9N:6UA;FEE('1O;'M<8W!R
- M:6UE?6MO('1O9V1A+"!K;V=D85Q<#0IN879E<FMH=2!R87IG;W)A971[7&-Y
- M9&]T?7-Y82!O8VAE<F5D;F%Y82!S=F%R82Y[7"!<('U.92!V;VQN=7EU="!N
- M87,@:2!M:7)O=GEE('!R;V)L96UY+EQ<#0I#:'1O('IH+"!Z87IE;6Q<(F5N
- M;F]S='M<8W!R:6UE?2!N87-H96=O(&)Y=&EY82!V<&]L;F4@<&]N>6%T;F$N
- M>UP@7"!]02!M97IH9'4@=&5M('!R;VES:VAO9'EA<VAC:&5E7%P-"G9O:W)U
- M9R!D97M<=2!I?7-T=FET96Q[7&-P<FEM97UN;R!P<FEO8G)E=&%E="!G;&]B
- M86Q[7&-P<FEM97UN>64@:7IM97)E;FEY82Y<7 T*>UP@7"!]>UP@7"!]>UP@
- M7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]+BXN+BXN+BXN+BXN
- M+BXN+BXN+BXN+BXN+BXN+BXN+BXN+EQ<#0I[7"!<('U[7"!<('U[7"!<('U[
- M7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('T@4U-3
- M4B!53452+GM<(%P@?4M43R!03T)%1$E,/UQ<#0I[7"!<('U[7"!<('TN+BY)
- M=&%K+"!T;VQ[7&-P<FEM97UK;R!T97!E<GM<8W!R:6UE?2!N86-H:6YA>75T
- M('9Y<FES;W9Y=F%T>UQC<')I;65]<WEA(&MO;G1U<GD@=&5K:"!G<F%N9&EO
- M>FYY:VA<7 T*<&]S;&5D<W1V:7M<=2!I?2P@:V]T;W)Y92!S=GEA>F%N>2!S
- M(&MR86MH;VT@:V]M;75N:7IM82!V(%-O=F5T>UQC>61O='US:V]M(%-O>75Z
- M92!I(&5G;UQ<#0IR87-P861O;2Y[7"!<('U%<VAC:%PB92!V8VAE<F$@>F%R
- M=6)E>FAN>64@86YA;&ET:6MI(',@;VQI;7!I>UQU(&E]<VMI;2!B97-P<FES
- M=')A<W1I96U<7 T*=GII<F%L:2!N82!B97IR87-S=61O8VAN>64@:7IG:6)Y
- M(&YA<VAE9V\@=GER>79N;V=O(&D@;F5P;VYY871N;V=O(&1V:7IH96YI>6$N
- M7%P-"E-E9V]D;GEA('5Z:&4@;6YO9VEM('-T86YO=FET>UQC>61O='US>6$@
- M>6%S;F\L(&-H=&\@<W5M8G5R;GEE(&MO;G9U;'M<8W!R:6UE?7-I:5Q<#0IP
- M;W-T:V]M;75N:7-T:6-H97-K;V=O(&UI<F$@<V%M>6T@;F5P;W-R961S='9E
- M;FYY;2!O8G)A>F]M('IA=')A9VEV87EU=%Q<#0ID86Q[7&-P<FEM97UN97M<
- M=2!I?7-H=7EU('-U9'M<8W!R:6UE?6)U('9S96=O(&UE>FAD=6YA<F]D;F]G
- M;R!S;V]B<VAC:&5S='9A+GM<(%P@?41A+"!K;VUM=6YI>FT@<G5K:&YU;"Q<
- M7 T*4U-34B!I<V-H97H@<R!P;VQI=&EC:&5S:V][7'4@:7T@:V%R='DL(&YO
- M('1E;2!S86UY;2!B>6QA('IA=F5R<VAE;F$@8V5L87EA(&=L879A7%P-"G8@
- M:7-T;W)I:2!V<V5[7'4@:7T@8VEV:6QI>F%C:6DN7%P-"GM<(%P@?7M<(%P@
- M?4YE('1O;'M<8W!R:6UE?6MO(&YA<VQE9&YI:VD@:V]M;75N:7IM82P@;F\@
- M:2!O<W1A;'M<8W!R:6UE?6YO>UQU(&E](&UI<B!O:V%Z86QI<WM<8W!R:6UE
- M?2!V9')U9UQ<#0IP97)E9"!C:&ES='EM(&QI<W1O;2Y[7"!<('U0;R!M97)E
- M(&]S;WIN86YI>6$@7&!E=&]G;R!F86MT82!V('IA<G5B97IH;GEK:%Q<#0IP
- M;VQI=&EC:&5S:VEK:"!K<G5G86MH('!E<G9A>6$@7&!E>UQU(&E]9F]R:7EA
- M('!O8F5D>2!N860@4VEM<&5R:65[7'4@:7T@>FQA5"!I(&YE:V]E9V]<7 T*
- M<V%M;W5D;W9L971V;W)E;FEY82!S;65N>6%E='M<8WED;W1]<WEA('9S7")E
- M(&)O;'M<8W!R:6UE?7-H97M<=2!I?2!O>F%B;V-H96YN;W-T>UQC<')I;65]
- M>74L(')A<W1E<GEA;FYO<W1[7&-P<FEM97UY=2Q<7 T*82!K;V4M9V1E(&D@
- M<&%N:6MO>UQU(&E]+GM<(%P@?5)E8VA[7&-P<FEM97T@:61<(F5T(&YE('1O
- M;'M<8W!R:6UE?6MO(&\@=')E=F]G92P@=GEZ=F%N;F][7'4@:7U<7 T*;F5P
- M<F5D<VMA>G5E;6]S='M<8W!R:6UE?7EU('!R;V-E<W-O=B!N82!T97)R:71O
- M<FEI('9C:&5R87-H;F5G;R!34U-2+GM<(%P@?4MS=&%T:2P@<&]K85Q<#0IO
- M;FD@;F4@<')I;GEA;&D@<W1O;'M<8W!R:6UE?2!A<&]K86QI<'-I8VAE<VMO
- M>UQU(&E](&9O<FUY+"!K86L@;WIH:61A;&D@;6YO9VEE+"!I7%P-"G!R;W1E
- M:V%Y=70@=B!O=&QI8VAI92!O="!9=6=O<VQA=FEI(&)O;&5E(&-I=FEL:7IO
- M=F%N;F\N7%P-"GM<(%P@?7M<(%P@?59D<G5G(',@;V-H979I9&YO<W1[7&-P
- M<FEM97UY=2!O=&MR>6QO<WM<8W!R:6UE?2!D<G5G;V4@+2!C:'1O+"!N97-M
- M;W1R>6$@;F$@=FYE<VAN95Q<#0IA8G-O;'EU=&YU>74@<')O=&EV;W!O;&]Z
- M:&YO<W1[7&-P<FEM97T@>F%P861N;WM<=2!I?2!I(&MO;6UU;FES=&EC:&5S
- M:V][7'4@:7T@<VES=&5M+"!O;FE<7 T*=GIA:6UO<W9Y87IA;GDN>UP@7"!]
- M365K:&%N:7IM(')A>G9I=&EY82!T;WM<=2!I?2!I(&1R=6=O>UQU(&E]+"!K
- M86L@=&5P97)[7&-P<FEM97T@;V)N87)U>FAI=F%E='M<8WED;W1]<WEA+%Q<
- M#0IB>6P@>F%P<F]G<F%M;6ER;W9A;B!N82!N86QI8VAI92!S=F]E9V\@86YT
- M86=O;FES=&$N7%P-"GM<(%P@?7M<(%P@?45S:&-H7")E('!R961S=&]I="!R
- M87IO8G)A='M<8W!R:6UE?7-Y82P@=B!K86MO>UQU(&E]('-T97!E;FD@=&4@
- M:6QI(&EN>64@=&5N9&5N8VEI7%P-"F]B<VAC:&5S='9E;FYO>UQU(&E]('IH
- M:7IN:2!:87!A9&$@>6%V:6QI<WM<8W!R:6UE?2!R97IU;'M<8W!R:6UE?71A
- M=&]M(&5G;R!V;G5T<F5N;GE[7'4@:7T@7&!E=F]L>75C:6DL(&%<7 T*=B!K
- M86MO>UQU(&E](&)Y;&D@;V)U<VQO=FQE;GD@<W5S:&-H97-T=F]V86YI96T@
- M:V]M;75N:7-T:6-H97-K;V=O(&]B<VAC:&5S='9A+"!I7%P-"FYA;V)O<F]T
- M+GM<(%P@?4YO('IA<&%D;GE[7'4@:7T@;6ER+"!T86L@9&]L9V\@:2!A:W1I
- M=FYO(&1O8FEV879S:&E[7'4@:7US>6$@:V]N8V%<7 T*:V]M;75N:7IM82P@
- M;VMA>F%L<WEA(&YE('!O9&=O=&]V;&5N;GEM(&L@>FAI>FYI('!O<VQE(&5G
- M;R!P861E;FEY82Y[7"!<('U/;F]<7 T*;F%R=7-H:6QO(&=L;V)A;'M<8W!R
- M:6UE?6YU>74@<VES=&5M=2!B97IO<&%S;F]S=&D@:2!O8G-H8VAE>FAI=&EY
- M82P@:V]T;W)A>6$@<VMR=7!U;%PB97IN;UQ<#0IS;WID879A;&%S>UQC<')I
- M;65]('!O<VQE('9T;W)O>UQU(&E](&UI<F]V;WM<=2!I?2!V;WM<=2!I?6YY
- M+"!U<W!E;&$@;V)R87-T:2!S=F]E>UQU(&E](&)Y=7)O:W)A=&EE>UQU(&E]
- M(&E<7 T*<&]L=6-H:71[7&-P<FEM97T@9&%Z:&4@<V]B<W1V96YN=7EU(&QO
- M9VEK=2!R87IV:71I>6$N>UP@7"!]02!T=70@=B!O9&YO(&UG;F]V96YI92!<
- M8&5T;W1<7 T*;6ER;W!O<GEA9&]K(')A>G9A;&EL<WEA+EQ<#0I[7"!<('U[
- M7"!<('U6;65S=&4@<R!N:6T@<&]K86-H;G5L87-[7&-P<FEM97T@=G-Y82!R
- M87IV971V;%PB96YN87EA('-I<W1E;6$@:6YS=&ET=71O=B!I7%P-"F-E;FYO
- M<W1E>UQU(&E]+"!N82!K;W1O<GEK:"!D;R!S:6MH('!O<B!D97)Z:&%L;W-[
- M7&-P<FEM97T@:VAR=7!K;V4@;6ER;W9O92!R879N;W9E<VEE+EQ<#0I3=&%L
- M;R!Y87-N;RP@8VAT;R!N92!T;VQ[7&-P<FEM97UK;R!B>79S:&EM(&MO;6UU
- M;FES=&EC:&5S:VEM(&=O<W5D87)S='9A;2P@;F\@:5Q<#0IV<V5M=2!M:7)O
- M=F]M=2!S;V]B<VAC:&5S='9U('!R961S=&]I="!I<VMA='M<8W!R:6UE?2!N
- M;W9Y92!F;W)M>2!S=7-H8VAE<W1V;W9A;FEY82Y<7 T*4')I9%PB971[7&-Y
- M9&]T?7-Y82!Z86YO=F\@;W-M>7-L:79A='M<8W!R:6UE?2!M;F]G:64@=F]P
- M<F]S>2P@:V%Z879S:&EE<WEA(')A>B!I(&YA=G-E9V1A7%P-"G)E<VA<(F5N
- M;GEM:2Y[7"!<('U3:V%Z:&5M+"!S=&%B:6Q[7&-P<FEM97UN;W-T>UQC<')I
- M;65](&=R86YI8R!I;&D@;V)E<W!E8VAE;FEE(&-E;&]S=&YO<W1I7%P-"F=O
- M<W5D87)S='8N>UP@7"!]02!P<F%V82!N86-I>UQU(&E](&YA('-A;6]O<')E
- M9&5L96YI92P@;F]V>7M<=2!I?2!F961E<F%L:7IM(&E<7 T*<V]C:&5T86YI
- M92!K<FET97)I978@;F%C:6]N86Q[7&-P<FEM97UN;V=O('9O>G)O>FAD96YI
- M>6$@:2!D96UO:W)A=&EI/WM<(%P@?5-E9V]D;GEA(%Q@971I7%P-"G9O<')O
- M<WD@<&]D;GEA='D@<&]S=&MO;6UU;FES=&EC:&5S:VEM:2!O8G-H8VAE<W1V
- M86UI+GM<(%P@?4YO('9O="UV;W0@:R!N:6T@=FYO=GM<8W!R:6UE?5Q<#0IV
- M;WIV<F%T>6%T>UQC>61O='US>6$@*'5Z:&4@=F]Z=G)A<VAC:&%Y=71[7&-Y
- M9&]T?7-Y82D@9V]S=61A<G-T=F$@07II:2!I($%F<FEK:2P@9V1E7%P-"G-O
- M;W1V971[7&-Y9&]T?7-T=G5Y=7-H8VAI92!P<F]C97-S>2!B>6QI('8@<W9O
- M7")E('9R96UY82!I<VMU<W-T=F5N;F\@>F%M;W)O>FAE;GDN7%P-"GM<(%P@
- M?7M<(%P@?5!R:6MH;V1I='M<8WED;W1]<WEA('!R96]D;VQE=F%T>UQC<')I
- M;65](&D@;F%S:&D@;F5D879N:64@:6QL>75Z:6DN>UP@7"!]4VMO;'M<8W!R
- M:6UE?6MO(&)Y;&]<7 T*=F]S=&]R9V]V('!O('!O=F]D=2!R87-P861A(&)I
- M<&]L>6%N;WM<=2!I?2!S:7-T96UY(&UE>FAD=2!N87)O9&YY:V@@;W1N;W-H
- M96YI>UQU(&E]+%Q<#0IP;VMO:79S:&5[7'4@:7US>6$@;F$@<V]P97)N:6-H
- M97-T=F4@:2!V>F%I;6YO;2!S9&5R>FAI=F%N:6D@9'9U:V@@>6%D97)N>6MH
- M7%P-"G-V97)K:&1E<GIH878@+2!34VA!(&D@4U-34BY[7"!<('U$=6UA;&]S
- M>UQC<')I;65]+"!V;W0@;VYO+"!N87-T=7!L96YI92!D879N;UQ<#0II<VMO
- M;6]G;R!B97IO8FQA8VAN;V=O(&UI<F]P;W)Y861K82Y[7"!<('U.92!T=70M
- M=&\@8GEL;RY[7"!<('U59W)O>F$@9VQO8F%L>UQC<')I;65];F][7'4@:7U<
- M7 T*>6%D97)N;WM<=2!I?2!K;VYF<F]N=&%C:6D@9&5[7'4@:7US='9I=&5L
- M>UQC<')I;65];F\@<VYI>FEL87-[7&-P<FEM97TN>UP@7"!]3F\@=GIA;65N
- M(&)Y=G-H96=O7%P-"FMO;6UU;FES=&EC:&5S:V]G;R!L86=E<GEA(&UI<B!P
- M;VQU8VAI;"!C96QY>UQU(&E](%-B=6ME=%0@<')O8FQE;2 M(&D@<F]Z:&1E
- M;FEE7%P-"FYO=GEK:"!G;W-U9&%R<W1V+"!I('!E<F5S;6]T<B!G<F%N:6,L
- M(&D@:V]N9FQI:W1Y('!O('!O=F]D=2!P<F%V7%P-"FYA8VUE;GM<8W!R:6UE
- M?7-H:6YS='8L(&D@;F%K;VYE8RP@<W)A>G4@9'9E('9O>UQU(&E];GD@+2!N
- M82!"86QK86YA:V@@:2!+879K87IE+GM<(%P@?51A:V]V;UQ<#0IP<GEA;6]E
- M('-L961S='9I92!P861E;FEY82!34U-2+"!Z:&5L97IN;WM<=2!I?2!K:'9A
- M=&MO>UQU(&E]('!O9&%V;'EA=G-H96=O+"!Z86=O;GEA=G-H96=O7%P-"G9O
- M=FYU=')[7&-P<FEM97T@=G-E('!R;W1I=F]R96-H:7EA('9N=71R:2!S96)Y
- M82!I('8@<V9E<F4@<W9O96=O('9L:7EA;FEY82P@8VAT;R!T96U<7 T*<V%M
- M>6T@=7-I;&EV86QO(&UO<VAC:'M<8W!R:6UE?2!I:V@@<&]T96YC:6%L>UQC
- M<')I;65];F]G;R!V>G)Y=F$@<&]S;&4@=GEK:&]D82!N82!P;W9E<FMH;F]S
- M='M<8W!R:6UE?2Y<7 T*>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]
- M>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]>UP@7"!]7&!%2VA/(%-/5D54>UQC
- M>61O='U32T]'3R!205-0041!7%P-"GM<(%P@?7M<(%P@?41A;FYY>UQU(&E]
- M(&MH;V0@<V]B>71I>UQU(&E](&UO>FAN;R!B>6QO(&QE9VMO('!R;V=N;WII
- M<F]V871[7&-P<FEM97TN>UP@7"!]3F\@;6%L;R!K=&\@9'5M86PL7%P-"FMA
- M:V]V;R!B=61E="!V;&EY86YI92!S;W9E='M<8WED;W1]<VMO9V\@:W)U<VAE
- M;FEY82!N82!V>F%I;6]O=&YO<VAE;FEY82!V;G5T<FD@>F%P861N;V=O7%P-
- M"G-O;V)S:&-H97-T=F$N>UP@7"!]4V5G;V1N>6$@=7IH92!O8VAE=FED;F\L
- M(&-H=&\@4V]Y=7H@8GEL(&YE;6%L;W9A>FAN>6T@9F%K=&]R;VU<7 T*<W!L
- M;V-H96YI>6$@7&!E=&]G;R!S;V]B<VAC:&5S='9A(&D@<F%S<&%D('-O=F5T
- M>UQC>61O='US:V]G;R!G;W-U9&%R<W1V82!Y879I;'-Y82!T;VQC:&MO;2Q<
- M7 T*:V]T;W)Y>UQU(&E]('5S:6QI;"!N82!:87!A9&4@8V5N=')O8F5Z:&YY
- M92!T96YD96YC:6DL('9Y>6%V:6P@;F5S;W9P861E;FEE7%P-"FEN=&5R97-O
- M=B!I;F1U<W1R:6%L>UQC<')I;65];GEK:"!S=')A;BY[7"!<('U.82!F;VYE
- M('1E;F1E;F-I>UQU(&E]+"!N86UE=&EV<VAI:VAS>6$@=FYU=')I($53+%Q<
- M#0ID86Q[7&-P<FEM97UN97M<=2!I?7-H87EA('-U9'M<8W!R:6UE?6)A(&5V
- M<F]P97M<=2!I?7-K;WM<=2!I?2!I;G1E9W)A8VEI('5Z:&4@;F4@=GEG;'EA
- M9&ET(&)E>F]B;&%C:&YO>UQU(&E]+EQ<#0I+;VYE8VAN;RP@;V)[7&-D<')I
- M;65]>6%S;GEA971[7&-Y9&]T?7-Y82!<8&5T;R!V;G5T<F5N;FEM:2!P<F]C
- M97-S86UI('8@>F%P861N;V5V<F]P97M<=2!I?7-K:6MH('-T<F%N86MH+EQ<
- M#0I4;W0@9F%K="P@8VAT;R!P<F%K=&EC:&5S:VD@<&]V<WEU9'4@;F%C:&%L
- M:2!V9')U9R!G;W9O<FET>UQC<')I;65](&YE(&]B(&EN=&5G<F%C:6DL7%P-
- M"F$@;R!N86-I;VYA;'M<8W!R:6UE?6YY:V@@:6YT97)E<V%K:"P@=')E8F]V
- M871[7&-P<FEM97T@;F4@<F%S<&%K:&YU='M<8W!R:6UE?2!O:VYO('8@;6ER
- M+"!A(&]P=7-T:71[7&-P<FEM97U<7 T*<VAT;W)Y+"!V;R!M;F]G;VT@>6%V
- M;'EA971[7&-Y9&]T?7-Y82!S;&5D<W1V:65M('!O<')A=FQE;FEY82!O8G-H
- M8VAE<W1V96YN;V=O(&UN96YI>6$L7%P-"F%K=&EV:6EZ86-I:2!N86-I;VYA
- M;&ES=&EC:&5S:VEK:"!G<G5P<&ER;W9O:RP@<&]V<V5M97-T;F]G;R!U:VAU
- M9'-H96YI>6%<7 T*7&!E:V]N;VUI8VAE<VMO>UQU(&E]('-I='5A8VEI+EQ<
- M#0I[7"!<('U[7"!<('U.;RP@=B!S=F]Y=2!O8VAE<F5D>UQC<')I;65]+"!V
- M<V4@7&!E=&D@<')O8V5S<WD@<')Y86UO('-V>6%Z86YY(',@:7IM96YE;FEE
- M;5Q<#0IP<FEV>6-H;F]G;R!O:W)U>FAE;FEY82!:87!A9&YO>UQU(&E]($5V
- M87)O<'DN>UP@7"!]3VYA(&]K87IA;&%S>UQC<')I;65](&QI8V]M(&L@;&EC
- M=2!S7%P-"FYE<W1A8FEL>UQC<')I;65];GEM:2!I(&YE<')E9'-K87IU96UY
- M;6D@<&]S=&MO;6UU;FES=&EC:&5S:VEM:2!O8G-H8VAE<W1V86UI+GM<(%P@
- M?4-H96U<7 T*9&%L>UQC<')I;65]<VAE+"!T96T@8F]L>UQC<')I;65]<VAI
- M;2!P;V1O>G)E;FEE;2!Z87!A9&YY>UQU(&E](&]B>79A=&5L>UQC<')I;65]
- M('-M;W1R:70@=B!I:V@@<W1O<F]N=2Q<7 T*;F5S;'5S:&%V<VAI<WM<8W!R
- M:6UE?2!P<F]G;F]Z;W8@;R!G;W1O=GEA<VAC:&EK:'-Y82!Z86MH;&5S=&YU
- M='M<8W!R:6UE?2!%=G)O<'4@;6%S<V]V>6MH(&YA<VAE<W1V:7EA:VA<7 T*
- M;6EG<F%N=&]V(&D@<')O8VAI:V@@;F5P<FEY871N;W-T>6%K:"Y<7 T*>UP@
- M7"!]>UP@7"!]4V%M>64@:W)U<&YY92P@<F%Z=6UE971[7&-Y9&]T?7-Y82P@
- M;WIH:61A>75T>UQC>61O='US>6$@<V\@<W1O<F]N>2!B>79S:&5G;R!3;W9E
- M='M<8WED;W1]<VMO9V]<7 T*4V]Y=7IA+"!K;W1O<GE[7'4@:7T@8VAA<VAC
- M:&4@=G-E9V\@=F]S<')I;FEM865T>UQC>61O='US>6$@:V%K('!O=&5N8VEA
- M;'M<8W!R:6UE?6YY>UQU(&E](&ES=&]C:&YI:R!Y861E<FYY:VA<7 T*:V%T
- M87-T<F]F(&D@:W)O=F%V>6MH(&UE>FAN86-I;VYA;'M<8W!R:6UE?6YY:V@@
- M:V]N9FQI:W1O=BY[7"!<('U6;W0@=F%M(&D@<&]C:'9A(&1L>6$@;F]V>6MH
- M7%P-"G-T<F%K:&]V.R!V;W0@:2!I<W1O:VD@=FYE>F%P;F]G;R!K;VYS97)V
- M871I=FYO9V\@:W)E;F$@>F%P861N;V=O(&]B<VAC:&5S='9A+%Q<#0IE9V\@
- M='EA=&]T96YI>6$@:R!B;VQE92!Z:&5S=&MO;74@<F5Z:&EM=2P@82!Z86]D
- M;F\@:2!K(&YO=F]M=2!R87ID96QI=&5L>UQC<')I;65];F]M=5Q<#0IZ86YA
- M=F5S=2P@:V]T;W)Y>UQU(&E](&)Y(&]G<F%D:6P@96=O(&]T(&YA<VAE>UQU
- M(&E](&-H87-T:2!S=F5T82Y<7 T*>UP@7"!]>UP@7"!]4')I;65R($=E<FUA
- M;FEI+"!R97-H:79S:&5[7'4@:7T@=B!O='9E="!N82!P;V=R;VUY('9Y9'9O
- M<FET>UQC<')I;65]('IA('-V;VD@<')E9&5L>5Q<#0IT>7-Y86-H:2!R=6UY
- M;G-K:6MH(&-Y9V%N(&D@9&%Z:&4@:7IM96YI='M<8W!R:6UE?2!K;VYS=&ET
- M=6-I>74@<R!T96TL(&-H=&]B>2!O9W)A;FEC:&ET>UQC<')I;65]7%P-"F-H
- M:7-L;R!P<F5T96YD96YT;W8@;F$@<&]L:71I8VAE<VMO92!U8F5Z:&ES:&-H
- M92P@<W9I9&5T96Q[7&-P<FEM97US='9U970@;R!T;VTL(&-H=&]<7 T*<')A
- M=GEA<VAC:&EE('IA<&%D;GEE(&MR=6=I('9Y;G5Z:&1E;GD@<F5A9VER;W9A
- M='M<8W!R:6UE?2!N82!N;W9Y92!S=')A:VAI+GM<(%P@?59O>FYI:V%E=%Q<
- M#0IV;W!R;W,Z(&$@;F4@>F%S=&%V>6%T(&QI(%Q@971I('-T<F%K:&D@=B!U
- M<VQO=FEY86MH('9O>FUO>FAN;V=O('5K:'5D<VAE;FEY85Q<#0I<8&5K;VYO
- M;6EC:&5S:V]G;R!P;VQO>FAE;FEY82!B;VQ[7&-P<FEM97US:&EN<W1V82!E
- M=G)O<&5[7'4@:7US:VEK:"!G;W-U9&%R<W1V('!R;WM<=2!I?71I(&EK:%Q<
- M#0IC:&5R97H@8F]L97IN96YN>64@:7-P>71A;FEY82!N82!P<FEV97)Z:&5N
- M;F]S='M<8W!R:6UE?2!I9&5Y86T@9&5M;VMR871I:3]<7 T*>UP@7"!]>UP@
- M7"!]5B!P<FEV97)Z:&5N;F]S=&D@97M<=2!I?2!U(&YY;F5S:&YI:V@@<')A
- M=GEA<VAC:&EK:"!<8&5L:70@6F%P861A('-O;6YE;FE[7'4@:7T@;F5T+EQ<
- M#0I.;R!K86MI;2!B=61E="!N;W9O92!P;VMO;&5N:64@<&]L:71I8VAE<VMI
- M:V@@9&5Y871E;&5[7'4@:7TL(&YE('!O>UQU(&E]9%PB970@;&D@;VYO7%P-
- M"FYA('!O=F]D=2!U('!O<')A=F5V<VAE>UQU(&E](&-H87-T:2!O8G-H8VAE
- M<W1V83][7"!<('U0;VYY871N;RP@8VAT;R!<8&5T;W0@=F]P<F]S(&-H87-H
- M8VAE7%P-"G9S96=O('-O<')O=F]Z:&1A971[7&-Y9&]T?7-Y82!T<F5V;WIH
- M;GEM('9Z9VQY861O;2!V('-T;W)O;G4@1V5R;6%N:6DL('!R979R87-H8VAA
- M>75S:&-H97M<=2!I?7-Y85Q<#0IS96=O9&YY82!V(&1O;6EN:7)U>75S:&-H
- M:7M<=2!I?2!F86MT;W(@979R;W!E>UQU(&E]<VMO>UQU(&E]('-C96YY+EQ<
- M#0I[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[7"!<('U[
- M7"!<('U[7"!<('U[7"!<('U[7"!<('TH;VMO;F-H86YI92!S;&5D=65T*5Q<
- 4#0H@?0I<96YD>V1O8W5M96YT?0I<
-
- end
- END_OF_FILE
- if test 13202 -ne `wc -c <'example.tex.UU'`; then
- echo shar: \"'example.tex.UU'\" unpacked with wrong size!
- else
- echo shar: Uudecoding \"'example.tex'\" \(9560 characters\)
- cat example.tex.UU | uudecode
- if test 9560 -ne `wc -c <'example.tex'`; then
- echo shar: \"'example.tex'\" uudecoded with wrong size!
- else
- rm example.tex.UU
- fi
- fi
- # end of 'example.tex.UU'
- fi
- if test -f 'koi8-tex.rus' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'koi8-tex.rus'\"
- else
- echo shar: Extracting \"'koi8-tex.rus'\" \(9352 characters\)
- sed "s/^X//" >'koi8-tex.rus' <<'END_OF_FILE'
- X# Jan Labanowski, jkl@osc.edu, Jan. 10, 1992
- X# File koi8_tex.dat
- X
- X# This is a transliteration data file for converting from KOI-8 as used
- X# by RELCOM (GOST 19768-74) to LaTeX
- X# The TeX tranliteration sequences follow AMS cyrillic convention for
- X# WNCYR fonts with cyracc.def file
- X# To be used with translit.c program by Jan Labanowski. For a format of
- X# this file consult translit documentation
- X
- X 1 file version number
- X
- X " " # string delimiters
- X [ ] # list delimites
- X { } # regular expression delimiters
- X
- X
- X
- X#starting sequence for LaTeX
- X"\documentstyle{article}
- X\input cyracc.def
- X\font\tencyr=wncyr10
- X\def\cyr{\tencyr\cyracc}
- X\begin{document}
- X"
- X
- X#ending sequence
- X"
- X\end{document}
- X"
- X
- X 0 # number of input SHIFT sequences, only one set of input characters
- X
- X 2 # number of output SHIFT sequences, two sets of input characters
- X
- X# SHIFT-OUT SHIFT-IN
- X "" "" #shift sequences for set 1 (Latin)
- X "{\cyr " "}" #cyrillic enclosed in {\cyr ... }
- X
- X# conversion table
- X# inp_set inp_seq out_set out_seq
- X
- X
- X# characters which are not in ASCII (and DEL) and not in KOI8 to *
- X 0 [\0x7F-\0xA2\0xA4-\0xB2\0xB4-\0xBF] 0 "$\star$"
- X
- X# dehyphenate words, e.g. con- (NL)cert is changed to concert(NL)
- X# Below is a complicated (?) regular expression. It joins a hyphenated
- X# word. It looks for one of more letters (saves them as substring 1)
- X# followed by a hyphen (which may be followed by zero or more spaces
- X# or tabs). The hyphen must be followed by a NewLine (characters 0A-0D hex
- X# are various new line sequences) and saves NewLine sequence. Then it looks
- X# for zero or more tabs and spaces (at the beginning of the line). Then it
- X# looks for the rest of the hyphenated word and saves it as substring 3.
- X# The word may have punctuation attached. Then it looks again for some spaces
- X# or tabs. The substitute string junks all sequences which were not withn (),
- X# i.e., hyphen and spaces/tabs and inserts only substrings but in a different
- X# order. The 1 (word beginning) is followed by 3 (word end) and followed by
- X# the NewLine. The {\2\1\3} would be equally good. The string is then returned
- X# back for processing (output code is -1). Note that since input regular
- X# expression is very long, I chopped it into several lines by using \NL.
- X# If \ is followed by a white space, the \ and all white space which follow it
- X# is removed by the program. Be carefull not to use "\white_space" in strings,
- X# lists or regular expressions. If you must, enter \ as a code (i.e., \0x5C).
- X
- X# uncomment lines below if you want to dehyphenate
- X
- X# 0 {([A-Za-z\0xA3\0xB3\0xC0-\0xFF]+)-[ \0x09]*([\0x0A-\0x0D]+)[ \0x09]*(\
- X# [A-Za-z\0xA3\0xB3\0xC0-\0xFF,.?;:")'`!]+)[ \0x09]}
- X# -1 {\1\3\2}
- X
- X# All latin letters are converted to the same letters but with the output
- X# set 1
- X 0 [A-Za-z] 1 [A-Za-z] #Latin letters A-Z and a-z
- X
- X# Add \\ before all NewLine sequences
- X 0 {([\0x0B-\0x0D]*)\0x0A([\0x0B-\0x0D]*)} 0 {\\\\\1\0x0A\2}
- X
- X# Convert all double spaces to protected LaTeX spaces. Note that the
- X# backslash is followed by a space here, and had to be entered as its code
- X 0 " " 0 "{\0x5C \0x5C }"
- X
- X# Quote some special TeX characters
- X
- X# these do not require going out of {\cyr ....}
- X 0 "[" 0 "$[$"
- X 0 "]" 0 "$]$"
- X 0 "^" 0 "$\wedge$"
- X 0 "{" 0 "$\lbrace$"
- X 0 "}" 0 "$\rbrace$"
- X 0 "~" 0 "$\sim$"
- X 0 "\" 0 "$\backslash$"
- X 0 "|" 0 "$\mid$"
- X 0 "*" 0 "$\star$"
- X 0 "<" 0 "$<$"
- X 0 ">" 0 "$>$"
- X 0 "$" 0 "\$"
- X 0 "%" 0 "\%"
- X
- X# these can be represented correctly only in Latin charset
- X 0 "_" 1 "\_"
- X 0 "&" 1 "\&"
- X 0 "#" 1 "\#"
- X 0 "@" 1 "@"
- X
- X# Cyrillic letters
- X 0 "\0xF4\0xFD" 2 "T{\cydot}Shch" # to prevent C
- X 0 "\0xF4\0xDD" 2 "T{\cydot}shch" # to prevent C
- X 0 "\0xD4\0xFD" 2 "t{\cydot}Shch" # to prevent C
- X 0 "\0xD4\0xDD" 2 "t{\cydot}shch" # to prevent C
- X
- X 0 "\0xF4\0xFB" 2 "T{\cydot}Sh" # to prevent C
- X 0 "\0xF4\0xDB" 2 "T{\cydot}sh" # to prevent C
- X 0 "\0xD4\0xFB" 2 "t{\cydot}Sh" # to prevent C
- X 0 "\0xD4\0xDB" 2 "t{\cydot}sh" # to prevent C
- X
- X 0 "\0xF4\0xF3" 2 "T{\cydot}S" # to prevent C
- X 0 "\0xF4\0xD3" 2 "T{\cydot}s" # to prevent C
- X 0 "\0xD4\0xF3" 2 "t{\cydot}S" # to prevent c
- X 0 "\0xD4\0xD3" 2 "t{\cydot}s" # to prevent c
- X
- X 0 "\0xA3" 2 "\\0o42e" # small \"e (yo)
- X 0 "\0xB3" 2 "\\0o42E" # capital \"E (Yo)
- X 0 "\0xE1" 2 "A"
- X 0 "\0xE2" 2 "B"
- X 0 "\0xF7" 2 "V"
- X 0 "\0xE7" 2 "G"
- X 0 "\0xE4" 2 "D"
- X 0 "\0xE5" 2 "E"
- X 0 "\0xF6" 2 "Zh"
- X 0 "\0xFA" 2 "Z"
- X 0 "\0xE9" 2 "I"
- X 0 "\0xEA" 2 "{\u I}" # I kratkoje
- X 0 "\0xEB" 2 "K"
- X 0 "\0xEC" 2 "L"
- X 0 "\0xED" 2 "M"
- X 0 "\0xEE" 2 "N"
- X 0 "\0xEF" 2 "O"
- X 0 "\0xF0" 2 "P"
- X 0 "\0xF2" 2 "R"
- X 0 "\0xF3" 2 "S"
- X 0 "\0xF4" 2 "T"
- X 0 "\0xF5" 2 "U"
- X 0 "\0xE6" 2 "F"
- X 0 "\0xE8" 2 "Kh"
- X 0 "\0xE3" 2 "C"
- X 0 "\0xFE" 2 "Ch"
- X 0 "\0xFB" 2 "Sh"
- X 0 "\0xFD" 2 "Shch"
- X 0 "\0xFF" 2 "{\Cdprime}" # Tverdyj znak
- X 0 "\0xF9" 2 "Y"
- X 0 "\0xF8" 2 "{\Cprime}" # Myagkij znak
- X 0 "\0xFC" 2 "\`E"
- X 0 "\0xE0" 2 "Yu"
- X 0 "\0xF1" 2 "Ya"
- X 0 "\0xC1" 2 "a"
- X 0 "\0xC2" 2 "b"
- X 0 "\0xD7" 2 "v"
- X 0 "\0xC7" 2 "g"
- X 0 "\0xC4" 2 "d"
- X 0 "\0xC5" 2 "e"
- X 0 "\0xD6" 2 "zh"
- X 0 "\0xDA" 2 "z"
- X 0 "\0xC9" 2 "i"
- X 0 "\0xCA" 2 "{\u i}"
- X 0 "\0xCB" 2 "k"
- X 0 "\0xCC" 2 "l"
- X 0 "\0xCD" 2 "m"
- X 0 "\0xCE" 2 "n"
- X 0 "\0xCF" 2 "o"
- X 0 "\0xD0" 2 "p"
- X 0 "\0xD2" 2 "r"
- X 0 "\0xD3" 2 "s"
- X 0 "\0xD4" 2 "t"
- X 0 "\0xD5" 2 "u"
- X 0 "\0xC6" 2 "f"
- X 0 "\0xC8" 2 "kh"
- X 0 "\0xC3" 2 "c"
- X 0 "\0xDE" 2 "ch"
- X 0 "\0xDB" 2 "sh"
- X 0 "\0xDD" 2 "shch"
- X 0 "\0xDF" 2 "{\cdprime}"
- X 0 "\0xD9" 2 "y"
- X 0 "\0xD8" 2 "{\cprime}"
- X 0 "\0xDC" 2 "\`e"
- X 0 "\0xC0" 2 "yu"
- X 0 "\0xD1" 2 "ya"
- END_OF_FILE
- if test 9352 -ne `wc -c <'koi8-tex.rus'`; then
- echo shar: \"'koi8-tex.rus'\" unpacked with wrong size!
- fi
- # end of 'koi8-tex.rus'
- fi
- if test -f 'readme.doc' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'readme.doc'\"
- else
- echo shar: Extracting \"'readme.doc'\" \(8389 characters\)
- sed "s/^X//" >'readme.doc' <<'END_OF_FILE'
- XWHAT IS TRANSLIT PROGRAM
- X========================
- XThe TRANSLIT program is used to transliterate character codes.
- XThe ASCII table of characters (containing characters with codes 0 to 127)
- Xis a table for English language. For other languages many different schemes
- Xare used to represent their respective alphabets. Some use codes larger
- Xthan 127, some use multicharacter sequences to represent a single letter
- Xin foreign alphabets. There is also UNICODE and other proposed standards
- Xto use units larger than 8-bits(1 byte) to represent foreign alphabets.
- XFor example, UNICODE will use 16-bit(2 byte) codes. At this moment, the
- XTRANSLIT program supports only 8-bit codes, but will be expanded to
- XUNICODE if there is enough interest.
- X
- XIt is frequently necessary to convert from one representation to another
- Xrepresentation of the foreign alphabet. E.g., in the Library of Congress
- Xtransliteration, the Russian letter sha is transliterated as two Latin
- Xletters "sh" while the popular word processors use a code 232 (decimal),
- Xthe RELCOM network uses a code 221, and the KOI7 set uses character "["
- Xfor the same letter. So if your screen driver, printer, word processor,
- Xetc. uses different codes than your text, you need to transliterate.
- X
- XThe TRANSLIT program is a powerful tool for such tasks. It converts an input
- Xfile in one representation to the output file in another representation using
- Xan appropriate, user defined, transliteration table. Transliteration table
- Xallows for very elaborate transliteration tasks and includes provisions for
- Xplain character sequences, character lists, regular expressions (flexible
- Xmatches), SHIFT-OUT/IN sequences and more. The program comes with documentation
- Xand examples of popular transliteration schemes for Russian language. Other
- Xfiles will be added with your collaboration.
- X
- XFILES IN THE PROGRAM DISTRIBUTION
- X================================
- XThe following files are currently in the distribution. They are all ASCII
- X(text) files (with the exception on translit.tar.Z and translit.zip).
- XPlease note that the copyright notice requires that, if you distribute this
- Xprogram, you have to distribute the complete set of files.
- XTRANSLIT is copyrighted: Copyright (c) Jan Labanowski and JKL Enterprises, Inc.
- X
- X Name Description
- X DOCUMENTATION
- Xreadme.doc This file
- Xtranslit.ps PostScript version of program documentation and
- X installation procedure
- Xtranslit.1 [nt]roff version of the above in the format
- X of UN*X man page (use -man option with [nt]roff)
- Xtranslit.txt Plain text version of the above.
- Xorder.txt Order form for ordering the executable program (compiled
- X with installation script and instructions)
- X
- X TRANSLITERATION TABLES FOR RUSSIAN (read comments in the files)
- Xalt-gos.rus ALT to GOSTCII table
- Xalt-koi8.rus ALT to KOI8 table
- Xgos-alt.rus GOSTCII to ALT table
- Xgos-koi8.rus GOSTCII to KOI8 table
- Xkoi7-8.rus KOI7 to KOI8 table
- Xkoi7nl-8.rus KOI7 (no Latin) to KOI8 table
- Xkoi8-7.rus KOI8 to KOI7 table
- Xkoi8-alt.rus KOI8 to ALT table
- Xkoi8-gos.rus KOI8 to GOSTCII table
- Xkoi8-lc.rus KOI8 to Library of Congress table
- Xkoi8-phg.rus KOI8 to GOST transliteration
- Xkoi8-php.rus KOI8 to Pokrovsky transliteration
- Xkoi8-tex.rus KOI8 to LaTeX conversion
- Xphg-koi8.rus GOST transliteration to KOI8
- Xpho-8sim.rus Simple phonetic to KOI8
- Xpho-koi8.rus Various phonetic to KOI8
- Xphp-koi8.rus Pokrovsky transliteration to KOI8
- Xtex-koi8.rus LaTeX to KOI8
- X
- X EXAMPLES
- Xexample.alt.uu uuencoded example in ALT
- Xexample.ko8.uu uuencoded example in KOI8
- Xexample.pho phonetic transliteration example
- Xexample.tex LaTeX example
- X
- X TRANSLIT PROGRAM SOURCE in C.
- Xtranslit.c Main program
- Xpaths.h Include file
- Xreg_exp.h Include file
- Xreg_exp.c Modified regular expression package by H. Spencer
- Xreg_sub.c Modified regular expression package by H. Spencer
- X
- X
- X PACKED FILES CONTAINING THE WHOLE DISTRIBUTION FROM ABOVE
- Xtranslit.tar.Z --- Compressed tar file with the whole distribution.
- X ON UN*X use:
- X zcat translit.tar.Z | tar xvof -
- X to get all individual files. This file is BINARY, and
- X you should not attempt to obtain it via email.
- X This is a best way to get the whole ditribution via
- X ftp if you are on the UN*X machine.
- Xtranslit.tar.z.uu --- uuencoded file from the above. It can be transmitted
- X via e-mail, but it is a large file, and if your mailer
- X sets limits on your messages, it may not be correctly
- X transmitted. To recover individual files from the
- X email message, do:
- X uudecode message_file
- X where the mesage_file is a saved email message.
- X You will obtain translit.tar.Z file which you can
- X unpack as described above.
- Xtranslit.zip --- This is a "zipped" file (i.e., compressed with a ZIP
- X program. It is binary (i.e., you cannot get it via
- X e-mail, but you can get it via ftp with binary switch
- X set) To get individual file do:
- X unzip translit.zip (in UNIX)
- X or
- X PKUNZIP translit.zip (under MS-DOS)
- X and you will obtain a full distribution.
- Xtranslit.zip.uu --- Uuencoded file from above. Can be sent via e-mail but
- X it is big. To recover all files do:
- X uudecode message_file
- X where message_file is your saved message and then
- X "unzip" it as shown above.
- X
- X
- X HOW TO OBTAIN THE FILES:
- X =======================
- X
- XVia FTP (if you are on Internet):
- X---------------------------------
- X ftp kekule.osc.edu (or ftp 128.146.36.48)
- X Login: anonymous
- X Password: Your_email_address (Please...)
- X ftp> ascii (or binary if you retrieve binary files)
- X ftp> cd pub/russian/translit
- X ftp> get file_name
- X ..... (for each file)
- X ftp> quit
- X
- XVia E-mail:
- X-----------
- X Send message:
- X send translit/file_name from russian
- X to OSCPOST@osc.edu or OSCPOST@OHSTPY.BITNET. You can retrieve more files
- X with a single message by placing several lines of the above format.
- X The file will be forwarded to your mailbox automatically.
- X
- XThe "file_name" in the instructions above means any file from the list
- Xgiven above. If you do not know or have programs like uudecode, unzip, tar,
- Xzcat or uncompress, get all individual files one by one. If you know how
- Xto use the above programs it may be faster for you to get a tar or zip
- Xarchive and unpack it.
- X
- XProgram installation and compilation is described in the translit docs.
- XSince the program requires that you make small changes to paths.h before
- Xcompilation (depending on your system and environment), I cannot realy
- Xdistribute generic executables (i.e., compiled programs). You have to modify
- Xpaths.h to suit your needs and operationg system and compile the program using
- Xyour favorite C compiler.
- X
- XGETTING THE READY TO RUN PROGRAM
- X================================
- XIf you do not have time, do not have resources, or for whatever reason
- Xyou wish a ready to run executable of TRANSLIT, you can order it for
- Xa very modest fee from JKL ENTERPRISES, INC. as described in the file:
- Xorder.txt. It will come with an easy installation script which will ask
- Xyou a few simple questions and install the program.
- X
- XI invite, and will try to answer, bug reports, comments and suggestions.
- XIf there is an interest I will work on optimizing the program, on supporting
- Xthe UNICODE, and other enhancements which you suggest. If you use the
- Xprogram for commercial purposes, and on many computers in your organization,
- Xyou might want to buy the program from JKL ENTERPRISES, INC., to aid further
- Xdevelopment, though you are not required to do so.
- X
- X
- XEnjoy,
- X
- XAuthor coordinates:
- XJan Labanowski
- XP.O. Box 21821
- XColumbus, OH 43221-0821, USA
- Xjkl@osc.edu, JKL@OHSTPY.BITNET
- X------------------------------
- X
- X
- X
- X
- X
- END_OF_FILE
- if test 8389 -ne `wc -c <'readme.doc'`; then
- echo shar: \"'readme.doc'\" unpacked with wrong size!
- fi
- # end of 'readme.doc'
- fi
- if test -f 'reg_exp.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'reg_exp.c'\"
- else
- echo shar: Extracting \"'reg_exp.c'\" \(28384 characters\)
- sed "s/^X//" >'reg_exp.c' <<'END_OF_FILE'
- X/*
- X * reg_comp and reg_exec -- reg_sub and reg_error are elsewhere
- X *
- X * Copyright (c) 1986 by University of Toronto.
- X * Written by Henry Spencer. Not derived from licensed software.
- X *
- X * Permission is granted to anyone to use this software for any
- X * purpose on any computer system, and to redistribute it freely,
- X * subject to the following restrictions:
- X *
- X * 1. The author is not responsible for the consequences of use of
- X * this software, no matter how awful, even if they arise
- X * from defects in it.
- X *
- X * 2. The origin of this software must not be misrepresented, either
- X * by explicit claim or by omission.
- X *
- X * 3. Altered versions must be plainly marked as such, and must not
- X * be misrepresented as being the original software.
- X *
- X * Beware that some of this code is subtly aware of the way operator
- X * precedence is structured in regular expressions. Serious changes in
- X * regular-expression syntax might require a total rethink.
- X */
- X
- X /* Jan Labanowski made some small modifications which are marked in the
- X text (jkl)
- X */
- X
- X#include "paths.h" /* jkl */
- X/* #include <regexp.h> ---- originally */
- X#include "reg_exp.h" /* modified by jkl */
- X/* #include "reg_magic.h" regmagic included in reg_exp.h, jkl */
- X
- X/*
- X * The "internal use only" fields in reg_exp.h are present to pass info from
- X * compile to execute that permits the execute phase to run lots faster on
- X * simple cases. They are:
- X *
- X * regstart char that must begin a match; '\0' if none obvious
- X * reganch is the match anchored (at beginning-of-line only)?
- X * regmust string (pointer into program) that match must include, or NULL
- X * regmlen length of regmust string
- X *
- X * Regstart and reganch permit very fast decisions on suitable starting points
- X * for a match, cutting down the work a lot. Regmust permits fast rejection
- X * of lines that cannot possibly match. The regmust tests are costly enough
- X * that reg_comp() supplies a regmust only if the r.e. contains something
- X * potentially expensive (at present, the only such thing detected is * or +
- X * at the start of the r.e., which can involve a lot of backup). Regmlen is
- X * supplied because the test in reg_exec() needs it and reg_comp() is computing
- X * it anyway.
- X */
- X
- X/*
- X * Structure for reg_exp "program". This is essentially a linear encoding
- X * of a nondeterministic finite-state machine (aka syntax charts or
- X * "railroad normal form" in parsing technology). Each node is an opcode
- X * plus a "next" pointer, possibly plus an operand. "Next" pointers of
- X * all nodes except BRANCH implement concatenation; a "next" pointer with
- X * a BRANCH on both ends of it is connecting two alternatives. (Here we
- X * have one of the subtle syntax dependencies: an individual BRANCH (as
- X * opposed to a collection of them) is never concatenated with anything
- X * because of operator precedence.) The operand of some types of node is
- X * a literal string; for others, it is a node leading into a sub-FSM. In
- X * particular, the operand of a BRANCH node is the first node of the branch.
- X * (NB this is *not* a tree structure: the tail of the branch connects
- X * to the thing following the set of BRANCHes.) The opcodes are:
- X */
- X
- X/* definition number opnd? meaning */
- X#define END 0 /* no End of program. */
- X#define BOL 1 /* no Match "" at beginning of line. */
- X#define EOL 2 /* no Match "" at end of line. */
- X#define ANY 3 /* no Match any one character. */
- X#define ANYOF 4 /* str Match any character in this string. */
- X#define ANYBUT 5 /* str Match any character not in this string. */
- X#define BRANCH 6 /* node Match this alternative, or the next... */
- X#define BACK 7 /* no Match "", "next" ptr points backward. */
- X#define EXACTLY 8 /* str Match this string. */
- X#define NOTHING 9 /* no Match empty string. */
- X#define STAR 10 /* node Match this (simple) thing 0 or more times. */
- X#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
- X#define OPEN 20 /* no Mark this point in input as start of #n. */
- X /* OPEN+1 is number 1, etc. */
- X#define CLOSE 30 /* no Analogous to OPEN. */
- X
- X/*
- X * Opcode notes:
- X *
- X * BRANCH The set of branches constituting a single choice are hooked
- X * together with their "next" pointers, since precedence prevents
- X * anything being concatenated to any individual branch. The
- X * "next" pointer of the last BRANCH in a choice points to the
- X * thing following the whole choice. This is also where the
- X * final "next" pointer of each individual branch points; each
- X * branch starts with the operand node of a BRANCH node.
- X *
- X * BACK Normal "next" pointers all implicitly point forward; BACK
- X * exists to make loop structures possible.
- X *
- X * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
- X * BRANCH structures using BACK. Simple cases (one character
- X * per match) are implemented with STAR and PLUS for speed
- X * and to minimize recursive plunges.
- X *
- X * OPEN,CLOSE ...are numbered at compile time.
- X */
- X
- X/*
- X * A node is one char of opcode followed by two chars of "next" pointer.
- X * "Next" pointers are stored as two 8-bit pieces, high order first. The
- X * value is a positive offset from the opcode of the node containing it.
- X * An operand, if any, simply follows the node. (Note that much of the
- X * code generation knows about this implicit relationship.)
- X *
- X * Using two bytes for the "next" pointer is vast overkill for most things,
- X * but allows patterns to get big without disasters.
- X */
- X#define OP(p) (*(p))
- X#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
- X#define OPERAND(p) ((p) + 3)
- X
- X/*
- X * See reg_magic.h for one further detail of program structure.
- X */
- X
- X
- X/*
- X * Utility definitions.
- X */
- X/* replaced this with my intcode() routine in translit.c , jkl
- X #ifndef CHARBITS
- X #define UCHARAT(p) ((int)*(unsigned char *)(p))
- X #else
- X #define UCHARAT(p) ((int)*(p)&CHARBITS)
- X #endif
- X*/
- X#define UCHARAT(p) (intcode(*(p)))
- Xextern int intcode();
- X
- X#if STRCHR
- X#else
- X#define strchr indexfun
- X extern int indexfun();
- X#endif
- X
- X#define FAIL(m) { reg_error(m); return(NULL); }
- X#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
- X/* #define META "^$.[()|?+*\\" */
- X#define META ".[()|?+*\\" /* disabled ^$ jkl */
- X
- X/*
- X * Flags to be passed up and down.
- X */
- X#define HASWIDTH 01 /* Known never to match null string. */
- X#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
- X#define SPSTART 04 /* Starts with * or +. */
- X#define WORST 0 /* Worst case. */
- X
- X/*
- X * Global work variables for reg_comp().
- X */
- Xstatic char *regparse; /* Input-scan pointer. */
- Xstatic int regnpar; /* () count. */
- Xstatic char regdummy;
- Xstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */
- Xstatic long regsize; /* Code size. */
- X
- X/*
- X * Forward declarations for reg_comp()'s friends.
- X */
- X#if STATICFUN
- X#define STATIC static
- X#else
- X#define STATIC
- X#endif
- X
- XSTATIC char *reg();
- XSTATIC char *regbranch();
- XSTATIC char *regpiece();
- XSTATIC char *regatom();
- XSTATIC char *regnode();
- XSTATIC char *regnext();
- XSTATIC void regc();
- XSTATIC void reginsert();
- XSTATIC void regtail();
- XSTATIC void regoptail();
- X
- X
- X/*
- X - reg_comp - compile a regular expression into internal code
- X *
- X * We can't allocate space until we know how big the compiled form will be,
- X * but we can't compile it (and thus know how big it is) until we've got a
- X * place to put the code. So we cheat: we compile it twice, once with code
- X * generation turned off and size counting turned on, and once "for real".
- X * This also means that we don't allocate space until we are sure that the
- X * thing really will compile successfully, and we never have to move the
- X * code and thus invalidate pointers into it. (Note that it has to be in
- X * one piece because free() must be able to free it all.)
- X *
- X * Beware that the optimization-preparation code in here knows about some
- X * of the structure of the compiled reg_exp.
- X */
- Xreg_exp *
- Xreg_comp(exp)
- Xchar *exp;
- X{
- X register reg_exp *r;
- X register char *scan;
- X register char *longest;
- X register int len;
- X int flags;
- X/* extern char *malloc(); --- */
- X
- X if (exp == NULL)
- X FAIL("NULL argument");
- X
- X /* First pass: determine size, legality. */
- X regparse = exp;
- X regnpar = 1;
- X regsize = 0L;
- X regcode = ®dummy;
- X regc(MAGIC);
- X if (reg(0, &flags) == NULL)
- X return(NULL);
- X
- X /* Small enough for pointer-storage convention? */
- X if (regsize >= 32767L) /* Probably could be 65535L. */
- X FAIL("regexp too big");
- X
- X /* Allocate space. */
- X r = (reg_exp *)malloc(sizeof(reg_exp) + (unsigned)regsize);
- X if (r == NULL)
- X FAIL("out of space");
- X
- X /* Second pass: emit code. */
- X regparse = exp;
- X regnpar = 1;
- X regcode = r->program;
- X regc(MAGIC);
- X if (reg(0, &flags) == NULL)
- X return(NULL);
- X
- X /* Dig out information for optimizations. */
- X r->regstart = '\0'; /* Worst-case defaults. */
- X r->reganch = 0;
- X r->regmust = NULL;
- X r->regmlen = 0;
- X scan = r->program+1; /* First BRANCH. */
- X if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- X scan = OPERAND(scan);
- X
- X /* Starting-point info. */
- X if (OP(scan) == EXACTLY)
- X r->regstart = *OPERAND(scan);
- X else if (OP(scan) == BOL)
- X r->reganch++;
- X
- X /*
- X * If there's something expensive in the r.e., find the
- X * longest literal string that must appear and make it the
- X * regmust. Resolve ties in favor of later strings, since
- X * the regstart check works with the beginning of the r.e.
- X * and avoiding duplication strengthens checking. Not a
- X * strong reason, but sufficient in the absence of others.
- X */
- X if (flags&SPSTART) {
- X longest = NULL;
- X len = 0;
- X for (; scan != NULL; scan = regnext(scan))
- X if ((OP(scan) == EXACTLY) &&
- X (strlen(OPERAND(scan)) >= len)) {
- X longest = OPERAND(scan);
- X len = strlen(OPERAND(scan));
- X }
- X r->regmust = longest;
- X r->regmlen = len;
- X }
- X }
- X
- X return(r);
- X}
- X
- X/*
- X - reg - regular expression, i.e. main body or parenthesized thing
- X *
- X * Caller must absorb opening parenthesis.
- X *
- X * Combining parenthesis handling with the base level of regular expression
- X * is a trifle forced, but the need to tie the tails of the branches to what
- X * follows makes it hard to avoid.
- X */
- XSTATIC char *
- Xreg(paren, flagp)
- Xint paren; /* Parenthesized? */
- Xint *flagp;
- X{
- X register char *ret;
- X register char *br;
- X register char *ender;
- X register int parno;
- X int flags;
- X
- X *flagp = HASWIDTH; /* Tentatively. */
- X
- X /* Make an OPEN node, if parenthesized. */
- X if (paren) {
- X if (regnpar >= NSUBEXP)
- X FAIL("too many ()");
- X parno = regnpar;
- X regnpar++;
- X ret = regnode(OPEN+parno);
- X } else
- X ret = NULL;
- X
- X /* Pick up the branches, linking them together. */
- X br = regbranch(&flags);
- X if (br == NULL)
- X return(NULL);
- X if (ret != NULL)
- X regtail(ret, br); /* OPEN -> first. */
- X else
- X ret = br;
- X if (!(flags&HASWIDTH))
- X *flagp &= ~HASWIDTH;
- X *flagp |= flags&SPSTART;
- X while (*regparse == '|') {
- X regparse++;
- X br = regbranch(&flags);
- X if (br == NULL)
- X return(NULL);
- X regtail(ret, br); /* BRANCH -> BRANCH. */
- X if (!(flags&HASWIDTH))
- X *flagp &= ~HASWIDTH;
- X *flagp |= flags&SPSTART;
- X }
- X
- X /* Make a closing node, and hook it on the end. */
- X ender = regnode((paren) ? CLOSE+parno : END);
- X regtail(ret, ender);
- X
- X /* Hook the tails of the branches to the closing node. */
- X for (br = ret; br != NULL; br = regnext(br))
- X regoptail(br, ender);
- X
- X /* Check for proper termination. */
- X if (paren && *regparse++ != ')') {
- X FAIL("unmatched ()");
- X } else if (!paren && *regparse != '\0') {
- X if (*regparse == ')') {
- X FAIL("unmatched ()");
- X } else
- X FAIL("junk on end"); /* "Can't happen". */
- X /* NOTREACHED */
- X }
- X
- X return(ret);
- X}
- X
- X/*
- X - regbranch - one alternative of an | operator
- X *
- X * Implements the concatenation operator.
- X */
- XSTATIC char *
- Xregbranch(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X register char *chain;
- X register char *latest;
- X int flags;
- X
- X *flagp = WORST; /* Tentatively. */
- X
- X ret = regnode(BRANCH);
- X chain = NULL;
- X while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
- X latest = regpiece(&flags);
- X if (latest == NULL)
- X return(NULL);
- X *flagp |= flags&HASWIDTH;
- X if (chain == NULL) /* First piece. */
- X *flagp |= flags&SPSTART;
- X else
- X regtail(chain, latest);
- X chain = latest;
- X }
- X if (chain == NULL) /* Loop ran zero times. */
- X (void) regnode(NOTHING);
- X
- X return(ret);
- X}
- X
- X/*
- X - regpiece - something followed by possible [*+?]
- X *
- X * Note that the branching code sequences used for ? and the general cases
- X * of * and + are somewhat optimized: they use the same NOTHING node as
- X * both the endmarker for their branch list and the body of the last branch.
- X * It might seem that this node could be dispensed with entirely, but the
- X * endmarker role is not redundant.
- X */
- XSTATIC char *
- Xregpiece(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X register char op;
- X register char *next;
- X int flags;
- X
- X ret = regatom(&flags);
- X if (ret == NULL)
- X return(NULL);
- X
- X op = *regparse;
- X if (!ISMULT(op)) {
- X *flagp = flags;
- X return(ret);
- X }
- X
- X if (!(flags&HASWIDTH) && op != '?')
- X FAIL("*+ operand could be empty");
- X *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
- X
- X if (op == '*' && (flags&SIMPLE))
- X reginsert(STAR, ret);
- X else if (op == '*') {
- X /* Emit x* as (x&|), where & means "self". */
- X reginsert(BRANCH, ret); /* Either x */
- X regoptail(ret, regnode(BACK)); /* and loop */
- X regoptail(ret, ret); /* back */
- X regtail(ret, regnode(BRANCH)); /* or */
- X regtail(ret, regnode(NOTHING)); /* null. */
- X } else if (op == '+' && (flags&SIMPLE))
- X reginsert(PLUS, ret);
- X else if (op == '+') {
- X /* Emit x+ as x(&|), where & means "self". */
- X next = regnode(BRANCH); /* Either */
- X regtail(ret, next);
- X regtail(regnode(BACK), ret); /* loop back */
- X regtail(next, regnode(BRANCH)); /* or */
- X regtail(ret, regnode(NOTHING)); /* null. */
- X } else if (op == '?') {
- X /* Emit x? as (x|) */
- X reginsert(BRANCH, ret); /* Either x */
- X regtail(ret, regnode(BRANCH)); /* or */
- X next = regnode(NOTHING); /* null. */
- X regtail(ret, next);
- X regoptail(ret, next);
- X }
- X regparse++;
- X if (ISMULT(*regparse))
- X FAIL("nested *?+");
- X
- X return(ret);
- X}
- X
- X/*
- X - regatom - the lowest level
- X *
- X * Optimization: gobbles an entire sequence of ordinary characters so that
- X * it can turn them into a single node, which is smaller to store and
- X * faster to run. Backslashed characters are exceptions, each becoming a
- X * separate node; the code is simpler that way and it's not worth fixing.
- X */
- XSTATIC char *
- Xregatom(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X int flags;
- X
- X *flagp = WORST; /* Tentatively. */
- X
- X switch (*regparse++) {
- X/* the ^ and $ hooks are disabled by jkl */
- X/* case '^':
- X ret = regnode(BOL);
- X break;
- X case '$':
- X ret = regnode(EOL);
- X break; */
- X case '.':
- X ret = regnode(ANY);
- X *flagp |= HASWIDTH|SIMPLE;
- X break;
- X case '[': {
- X register int class;
- X register int classend;
- X
- X if (*regparse == '^') { /* Complement of range. */
- X ret = regnode(ANYBUT);
- X regparse++;
- X } else
- X ret = regnode(ANYOF);
- X if (*regparse == ']' || *regparse == '-')
- X regc(*regparse++);
- X while (*regparse != '\0' && *regparse != ']') {
- X if (*regparse == '-') {
- X regparse++;
- X if (*regparse == ']' || *regparse == '\0')
- X regc('-');
- X else {
- X class = UCHARAT(regparse-2)+1;
- X classend = UCHARAT(regparse);
- X if (class > classend+1)
- X FAIL("invalid [] range");
- X for (; class <= classend; class++)
- X regc(class);
- X regparse++;
- X }
- X } else
- X regc(*regparse++);
- X }
- X regc('\0');
- X if (*regparse != ']')
- X FAIL("unmatched []");
- X regparse++;
- X *flagp |= HASWIDTH|SIMPLE;
- X }
- X break;
- X case '(':
- X ret = reg(1, &flags);
- X if (ret == NULL)
- X return(NULL);
- X *flagp |= flags&(HASWIDTH|SPSTART);
- X break;
- X case '\0':
- X case '|':
- X case ')':
- X FAIL("internal urp"); /* Supposed to be caught earlier. */
- X break;
- X case '?':
- X case '+':
- X case '*':
- X FAIL("?+* follows nothing");
- X break;
- X case '\\':
- X if (*regparse == '\0')
- X FAIL("trailing \\");
- X ret = regnode(EXACTLY);
- X regc(*regparse++);
- X regc('\0');
- X *flagp |= HASWIDTH|SIMPLE;
- X break;
- X default: {
- X register int len;
- X register char ender;
- X
- X regparse--;
- X len = strcspn(regparse, META);
- X if (len <= 0)
- X FAIL("internal disaster");
- X ender = *(regparse+len);
- X if (len > 1 && ISMULT(ender))
- X len--; /* Back off clear of ?+* operand. */
- X *flagp |= HASWIDTH;
- X if (len == 1)
- X *flagp |= SIMPLE;
- X ret = regnode(EXACTLY);
- X while (len > 0) {
- X regc(*regparse++);
- X len--;
- X }
- X regc('\0');
- X }
- X break;
- X }
- X
- X return(ret);
- X}
- X
- X/*
- X - regnode - emit a node
- X */
- XSTATIC char * /* Location. */
- Xregnode(op)
- Xchar op;
- X{
- X register char *ret;
- X register char *ptr;
- X
- X ret = regcode;
- X if (ret == ®dummy) {
- X regsize += 3;
- X return(ret);
- X }
- X
- X ptr = ret;
- X *ptr++ = op;
- X *ptr++ = '\0'; /* Null "next" pointer. */
- X *ptr++ = '\0';
- X regcode = ptr;
- X
- X return(ret);
- X}
- X
- X/*
- X - regc - emit (if appropriate) a byte of code
- X */
- XSTATIC void
- Xregc(b)
- Xchar b;
- X{
- X if (regcode != ®dummy)
- X *regcode++ = b;
- X else
- X regsize++;
- X}
- X
- X/*
- X - reginsert - insert an operator in front of already-emitted operand
- X *
- X * Means relocating the operand.
- X */
- XSTATIC void
- Xreginsert(op, opnd)
- Xchar op;
- Xchar *opnd;
- X{
- X register char *src;
- X register char *dst;
- X register char *place;
- X
- X if (regcode == ®dummy) {
- X regsize += 3;
- X return;
- X }
- X
- X src = regcode;
- X regcode += 3;
- X dst = regcode;
- X while (src > opnd)
- X *--dst = *--src;
- X
- X place = opnd; /* Op node, where operand used to be. */
- X *place++ = op;
- X *place++ = '\0';
- X *place++ = '\0';
- X}
- X
- X/*
- X - regtail - set the next-pointer at the end of a node chain
- X */
- XSTATIC void
- Xregtail(p, val)
- Xchar *p;
- Xchar *val;
- X{
- X register char *scan;
- X register char *temp;
- X register int offset;
- X
- X if (p == ®dummy)
- X return;
- X
- X /* Find last node. */
- X scan = p;
- X for (;;) {
- X temp = regnext(scan);
- X if (temp == NULL)
- X break;
- X scan = temp;
- X }
- X
- X if (OP(scan) == BACK)
- X offset = scan - val;
- X else
- X offset = val - scan;
- X *(scan+1) = (offset>>8)&0377;
- X *(scan+2) = offset&0377;
- X}
- X
- X/*
- X - regoptail - regtail on operand of first argument; nop if operandless
- X */
- XSTATIC void
- Xregoptail(p, val)
- Xchar *p;
- Xchar *val;
- X{
- X /* "Operandless" and "op != BRANCH" are synonymous in practice. */
- X if (p == NULL || p == ®dummy || OP(p) != BRANCH)
- X return;
- X regtail(OPERAND(p), val);
- X}
- X
- X/*
- X * regexec and friends
- X */
- X
- X/*
- X * Global work variables for reg_exec().
- X */
- Xstatic char *reginput; /* String-input pointer. */
- Xstatic char *regbol; /* Beginning of input, for ^ check. */
- Xstatic char **regstartp; /* Pointer to startp array. */
- Xstatic char **regendp; /* Ditto for endp. */
- X
- X/*
- X * Forwards.
- X */
- Xint reg_try(); /* jkl, took "static" out, so it is known to the linker */
- XSTATIC int regmatch();
- XSTATIC int regrepeat();
- X
- X#ifdef DEBUG
- Xint regnarrate = 0;
- Xvoid regdump();
- XSTATIC char *regprop();
- X#endif
- X
- X/*
- X - reg_exec - match a regexp against a string
- X */
- Xint
- Xreg_exec(prog, string)
- Xregister reg_exp *prog;
- Xregister char *string;
- X{
- X register char *s;
- X/* extern char *strchr(); jkl */
- X
- X /* Be paranoid... */
- X if (prog == NULL || string == NULL) {
- X reg_error("NULL parameter");
- X return(0);
- X }
- X
- X /* Check validity of program. */
- X if (UCHARAT(prog->program) != MAGIC) {
- X reg_error("corrupted program");
- X return(0);
- X }
- X
- X /* If there is a "must appear" string, look for it. */
- X if (prog->regmust != NULL) {
- X s = string;
- X while ((s = strchr(s, prog->regmust[0])) != NULL) {
- X if (strncmp(s, prog->regmust, prog->regmlen) == 0)
- X break; /* Found it. */
- X s++;
- X }
- X if (s == NULL) /* Not present. */
- X return(0);
- X }
- X
- X /* Mark beginning of line for ^ . */
- X regbol = string;
- X
- X /* Simplest case: anchored match need be tried only once. */
- X if (prog->reganch)
- X return(reg_try(prog, string));
- X
- X /* Messy cases: unanchored match. */
- X s = string;
- X if (prog->regstart != '\0')
- X /* We know what char it must start with. */
- X while ((s = strchr(s, prog->regstart)) != NULL) {
- X if (reg_try(prog, s))
- X return(1);
- X s++;
- X }
- X else
- X /* We don't -- general case. */
- X do {
- X if (reg_try(prog, s))
- X return(1);
- X } while (*s++ != '\0');
- X
- X /* Failure. */
- X return(0);
- X}
- X
- X/*
- X - reg_try - try match at specific point
- X */
- Xint /* 0 failure, 1 success */
- Xreg_try(prog, string)
- Xreg_exp *prog;
- Xchar *string;
- X{
- X register int i;
- X register char **sp;
- X register char **ep;
- X
- X reginput = string;
- X regstartp = prog->startp;
- X regendp = prog->endp;
- X
- X sp = prog->startp;
- X ep = prog->endp;
- X for (i = NSUBEXP; i > 0; i--) {
- X *sp++ = NULL;
- X *ep++ = NULL;
- X }
- X if (regmatch(prog->program + 1)) {
- X prog->startp[0] = string;
- X prog->endp[0] = reginput;
- X return(1);
- X } else
- X return(0);
- X}
- X
- X/*
- X - regmatch - main matching routine
- X *
- X * Conceptually the strategy is simple: check to see whether the current
- X * node matches, call self recursively to see whether the rest matches,
- X * and then act accordingly. In practice we make some effort to avoid
- X * recursion, in particular by going through "ordinary" nodes (that don't
- X * need to know whether the rest of the match failed) by a loop instead of
- X * by recursion.
- X */
- XSTATIC int /* 0 failure, 1 success */
- Xregmatch(prog)
- Xchar *prog;
- X{
- X register char *scan; /* Current node. */
- X char *next; /* Next node. */
- X/* extern char *strchr(); --- jkl */
- X
- X scan = prog;
- X#ifdef DEBUG
- X if (scan != NULL && regnarrate)
- X fprintf(stderr, "%s(\n", regprop(scan));
- X#endif
- X while (scan != NULL) {
- X#ifdef DEBUG
- X if (regnarrate)
- X fprintf(stderr, "%s...\n", regprop(scan));
- X#endif
- X next = regnext(scan);
- X
- X switch (OP(scan)) {
- X case BOL:
- X if (reginput != regbol)
- X return(0);
- X break;
- X case EOL:
- X if (*reginput != '\0')
- X return(0);
- X break;
- X case ANY:
- X if (*reginput == '\0')
- X return(0);
- X reginput++;
- X break;
- X case EXACTLY: {
- X register int len;
- X register char *opnd;
- X
- X opnd = OPERAND(scan);
- X /* Inline the first character, for speed. */
- X if (*opnd != *reginput)
- X return(0);
- X len = strlen(opnd);
- X if (len > 1 && strncmp(opnd, reginput, len) != 0)
- X return(0);
- X reginput += len;
- X }
- X break;
- X case ANYOF:
- X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
- X return(0);
- X reginput++;
- X break;
- X case ANYBUT:
- X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
- X return(0);
- X reginput++;
- X break;
- X case NOTHING:
- X break;
- X case BACK:
- X break;
- X case OPEN+1:
- X case OPEN+2:
- X case OPEN+3:
- X case OPEN+4:
- X case OPEN+5:
- X case OPEN+6:
- X case OPEN+7:
- X case OPEN+8:
- X case OPEN+9: {
- X register int no;
- X register char *save;
- X
- X no = OP(scan) - OPEN;
- X save = reginput;
- X
- X if (regmatch(next)) {
- X /*
- X * Don't set startp if some later
- X * invocation of the same parentheses
- X * already has.
- X */
- X if (regstartp[no] == NULL)
- X regstartp[no] = save;
- X return(1);
- X } else
- X return(0);
- X }
- X break;
- X case CLOSE+1:
- X case CLOSE+2:
- X case CLOSE+3:
- X case CLOSE+4:
- X case CLOSE+5:
- X case CLOSE+6:
- X case CLOSE+7:
- X case CLOSE+8:
- X case CLOSE+9: {
- X register int no;
- X register char *save;
- X
- X no = OP(scan) - CLOSE;
- X save = reginput;
- X
- X if (regmatch(next)) {
- X /*
- X * Don't set endp if some later
- X * invocation of the same parentheses
- X * already has.
- X */
- X if (regendp[no] == NULL)
- X regendp[no] = save;
- X return(1);
- X } else
- X return(0);
- X }
- X break;
- X case BRANCH: {
- X register char *save;
- X
- X if (OP(next) != BRANCH) /* No choice. */
- X next = OPERAND(scan); /* Avoid recursion. */
- X else {
- X do {
- X save = reginput;
- X if (regmatch(OPERAND(scan)))
- X return(1);
- X reginput = save;
- X scan = regnext(scan);
- X } while (scan != NULL && OP(scan) == BRANCH);
- X return(0);
- X /* NOTREACHED */
- X }
- X }
- X break;
- X case STAR:
- X case PLUS: {
- X register char nextch;
- X register int no;
- X register char *save;
- X register int min;
- X
- X /*
- X * Lookahead to avoid useless match attempts
- X * when we know what character comes next.
- X */
- X nextch = '\0';
- X if (OP(next) == EXACTLY)
- X nextch = *OPERAND(next);
- X min = (OP(scan) == STAR) ? 0 : 1;
- X save = reginput;
- X no = regrepeat(OPERAND(scan));
- X while (no >= min) {
- X /* If it could work, try it. */
- X if (nextch == '\0' || *reginput == nextch)
- X if (regmatch(next))
- X return(1);
- X /* Couldn't or didn't -- back up. */
- X no--;
- X reginput = save + no;
- X }
- X return(0);
- X }
- X break;
- X case END:
- X return(1); /* Success! */
- X break;
- X default:
- X reg_error("memory corruption");
- X return(0);
- X break;
- X }
- X
- X scan = next;
- X }
- X
- X /*
- X * We get here only if there's trouble -- normally "case END" is
- X * the terminating point.
- X */
- X reg_error("corrupted pointers");
- X return(0);
- X}
- X
- X/*
- X - regrepeat - repeatedly match something simple, report how many
- X */
- XSTATIC int
- Xregrepeat(p)
- Xchar *p;
- X{
- X register int count = 0;
- X register char *scan;
- X register char *opnd;
- X
- X scan = reginput;
- X opnd = OPERAND(p);
- X switch (OP(p)) {
- X case ANY:
- X count = strlen(scan);
- X scan += count;
- X break;
- X case EXACTLY:
- X while (*opnd == *scan) {
- X count++;
- X scan++;
- X }
- X break;
- X case ANYOF:
- X while ((*scan != '\0') && /* paranthetized by jkl */
- X (strchr(opnd, *scan) != (char *)NULL)) {
- X count++;
- X scan++;
- X }
- X break;
- X case ANYBUT:
- X while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
- X count++;
- X scan++;
- X }
- X break;
- X default: /* Oh dear. Called inappropriately. */
- X reg_error("internal foulup");
- X count = 0; /* Best compromise. */
- X break;
- X }
- X reginput = scan;
- X
- X return(count);
- X}
- X
- X/*
- X - regnext - dig the "next" pointer out of a node
- X */
- XSTATIC char *
- Xregnext(p)
- Xregister char *p;
- X{
- X register int offset;
- X
- X if (p == ®dummy)
- X return(NULL);
- X
- X offset = NEXT(p);
- X if (offset == 0)
- X return(NULL);
- X
- X if (OP(p) == BACK)
- X return(p-offset);
- X else
- X return(p+offset);
- X}
- X
- X#ifdef DEBUG
- X
- XSTATIC char *regprop();
- X
- X/*
- X - regdump - dump a regexp onto stdout in vaguely comprehensible form
- X */
- Xvoid
- Xregdump(r)
- Xreg_exp *r;
- X{
- X register char *s;
- X register char op = EXACTLY; /* Arbitrary non-END op. */
- X register char *next;
- X/* extern char *strchr(); ---jkl */
- X
- X
- X s = r->program + 1;
- X while (op != END) { /* While that wasn't END last time... */
- X op = OP(s);
- X printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
- X next = regnext(s);
- X if (next == NULL) /* Next ptr. */
- X printf("(0)");
- X else
- X printf("(%d)", (s-r->program)+(next-s));
- X s += 3;
- X if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
- X /* Literal string, where present. */
- X while (*s != '\0') {
- X putchar(*s);
- X s++;
- X }
- X s++;
- X }
- X putchar('\n');
- X }
- X
- X /* Header fields of interest. */
- X if (r->regstart != '\0')
- X printf("start `%c' ", r->regstart);
- X if (r->reganch)
- X printf("anchored ");
- X if (r->regmust != NULL)
- X printf("must have \"%s\"", r->regmust);
- X printf("\n");
- X}
- X
- X/*
- X - regprop - printable representation of opcode
- X */
- XSTATIC char *
- Xregprop(op)
- Xchar *op;
- X{
- X register char *p;
- X static char buf[50];
- X
- X (void) strcpy(buf, ":");
- X
- X switch (OP(op)) {
- X case BOL:
- X p = "BOL";
- X break;
- X case EOL:
- X p = "EOL";
- X break;
- X case ANY:
- X p = "ANY";
- X break;
- X case ANYOF:
- X p = "ANYOF";
- X break;
- X case ANYBUT:
- X p = "ANYBUT";
- X break;
- X case BRANCH:
- X p = "BRANCH";
- X break;
- X case EXACTLY:
- X p = "EXACTLY";
- X break;
- X case NOTHING:
- X p = "NOTHING";
- X break;
- X case BACK:
- X p = "BACK";
- X break;
- X case END:
- X p = "END";
- X break;
- X case OPEN+1:
- X case OPEN+2:
- X case OPEN+3:
- X case OPEN+4:
- X case OPEN+5:
- X case OPEN+6:
- X case OPEN+7:
- X case OPEN+8:
- X case OPEN+9:
- X sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
- X p = NULL;
- X break;
- X case CLOSE+1:
- X case CLOSE+2:
- X case CLOSE+3:
- X case CLOSE+4:
- X case CLOSE+5:
- X case CLOSE+6:
- X case CLOSE+7:
- X case CLOSE+8:
- X case CLOSE+9:
- X sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
- X p = NULL;
- X break;
- X case STAR:
- X p = "STAR";
- X break;
- X case PLUS:
- X p = "PLUS";
- X break;
- X default:
- X reg_error("corrupted opcode");
- X break;
- X }
- X if (p != NULL)
- X (void) strcat(buf, p);
- X return(buf);
- X}
- X#endif
- X
- X/*
- X * The following is provided for those people who do not have strcspn() in
- X * their C libraries. They should get off their butts and do something
- X * about it; at least one public-domain implementation of those (highly
- X * useful) string routines has been published on Usenet.
- X */
- X#if STRCSPN
- X#else
- X
- X/*
- X * strcspn - find length of initial segment of s1 consisting entirely
- X * of characters not from s2
- X */
- X
- XSTATIC int
- Xstrcspn(s1, s2)
- Xchar *s1;
- Xchar *s2;
- X{
- X register char *scan1;
- X register char *scan2;
- X register int count;
- X
- X count = 0;
- X for (scan1 = s1; *scan1 != '\0'; scan1++) {
- X for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
- X if (*scan1 == *scan2++)
- X return(count);
- X count++;
- X }
- X return(count);
- X}
- X#endif
- END_OF_FILE
- if test 28384 -ne `wc -c <'reg_exp.c'`; then
- echo shar: \"'reg_exp.c'\" unpacked with wrong size!
- fi
- # end of 'reg_exp.c'
- fi
- echo shar: End of archive 8 \(of 10\).
- cp /dev/null ark8isdone
- MISSING=""
- for I in 1 2 3 4 5 6 7 8 9 10 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 10 archives.
- rm -f ark[1-9]isdone ark[1-9][0-9]isdone
- else
- echo You still must unpack the following archives:
- echo " " ${MISSING}
- fi
- exit 0
- exit 0 # Just in case...
-