home *** CD-ROM | disk | FTP | other *** search
/ Source Code 1992 March / Source_Code_CD-ROM_Walnut_Creek_March_1992.iso / unix_c / utils / phoneme.c < prev    next >
Encoding:
Internet Message Format  |  1989-03-21  |  40.5 KB

  1. From: decvax!decwrl!dec-rhea!dec-viking!wasser_1@Ucb-Vax.ARPA
  2. To: net.sources
  3. Subject: Final English-to-Phoneme version!
  4. Date: 15 Apr 85 21:58:34 GMT
  5.  
  6.                     Final Version of
  7.                 ENGLISH TO PHONEME TRANSLATION
  8.                            4/15/85
  9.  
  10. Here it is one last time.  I have fixed all of the bugs I heard about
  11. and added a new feature or two (it now talks money as well as
  12. numbers).  I think that this version is good enough for most purposes.
  13. I have proof-read the phoneme rules (found one bug) and made the
  14. program more "robust".  I added protection against the "toupper()"
  15. problem some people had with earlier versions.
  16.  
  17. If you make a major addition (like better abbreviation handling or an
  18. exception dictionary) please send me a copy.  As before, this is all
  19. public domain and I make no copyright claims on it.  The part derived
  20. from the Naval Research Lab should be public anyway.  Sell it if you
  21. can!
  22.  
  23.                 -John A. Wasser
  24.  
  25. Work address:
  26. ARPAnet:        WASSER%VIKING.DEC@decwrl.ARPA
  27. Usenet:         {allegra,Shasta,decvax}!decwrl!dec-rhea!dec-viking!wasser
  28. Easynet:        VIKING::WASSER
  29. Telephone:      (617)486-2505
  30. USPS:           Digital Equipment Corp.
  31.                 Mail stop: LJO2/E4
  32.                 30 Porter Rd
  33.                 Littleton, MA  01460
  34.  
  35.  
  36.    The files that make up this package are:
  37.  
  38.           ENGLISH.C       Translation rules.
  39.           PHONEME.C       Translate a single word.
  40.           PARSE.C         Split a file into words.
  41.           SPELLWORD.C     Spell an ASCII character or word.
  42.           SAYNUM.C        Say a cardinal or ordinal number (long int).
  43.  
  44. -------------------------------------------------------------------------------
  45. **FILE**        ENGLISH.C        Translation rules.
  46. -------------------------------------------------------------------------------
  47. /*
  48. **    English to Phoneme rules.
  49. **
  50. **    Derived from: 
  51. **
  52. **         AUTOMATIC TRANSLATION OF ENGLISH TEXT TO PHONETICS
  53. **                BY MEANS OF LETTER-TO-SOUND RULES
  54. **
  55. **            NRL Report 7948
  56. **
  57. **              January 21st, 1976
  58. **        Naval Research Laboratory, Washington, D.C.
  59. **
  60. **
  61. **    Published by the National Technical Information Service as
  62. **    document "AD/A021 929".
  63. **
  64. **
  65. **
  66. **    The Phoneme codes:
  67. **
  68. **        IY    bEEt        IH    bIt
  69. **        EY    gAte        EH    gEt
  70. **        AE    fAt        AA    fAther
  71. **        AO    lAWn        OW    lOne
  72. **        UH    fUll        UW    fOOl
  73. **        ER    mURdER        AX    About
  74. **        AH    bUt        AY    hIde
  75. **        AW    hOW        OY    tOY
  76. **    
  77. **        p    Pack        b    Back
  78. **        t    Time        d    Dime
  79. **        k    Coat        g    Goat
  80. **        f    Fault        v    Vault
  81. **        TH    eTHer        DH    eiTHer
  82. **        s    Sue        z    Zoo
  83. **        SH    leaSH        ZH    leiSure
  84. **        HH    How        m    suM
  85. **        n    suN        NG    suNG
  86. **        l    Laugh        w    Wear
  87. **        y    Young        r    Rate
  88. **        CH    CHar        j    Jar
  89. **        WH    WHere
  90. **
  91. **
  92. **    Rules are made up of four parts:
  93. **    
  94. **        The left context.
  95. **        The text to match.
  96. **        The right context.
  97. **        The phonemes to substitute for the matched text.
  98. **
  99. **    Procedure:
  100. **
  101. **        Seperate each block of letters (apostrophes included) 
  102. **        and add a space on each side.  For each unmatched 
  103. **        letter in the word, look through the rules where the 
  104. **        text to match starts with the letter in the word.  If 
  105. **        the text to match is found and the right and left 
  106. **        context patterns also match, output the phonemes for 
  107. **        that rule and skip to the next unmatched letter.
  108. **
  109. **
  110. **    Special Context Symbols:
  111. **
  112. **        #    One or more vowels
  113. **        :    Zero or more consonants
  114. **        ^    One consonant.
  115. **        .    One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
  116. **            consonants)
  117. **        %    One of ER, E, ES, ED, ING, ELY (a suffix)
  118. **            (Found in right context only)
  119. **        +    One of E, I or Y (a "front" vowel)
  120. **
  121. */
  122.  
  123.  
  124. /* Context definitions */
  125. static char Anything[] = "";    /* No context requirement */
  126. static char Nothing[] = " ";    /* Context is beginning or end of word */
  127.  
  128. /* Phoneme definitions */
  129. static char Pause[] = " ";    /* Short silence */
  130. static char Silent[] = "";    /* No phonemes */
  131.  
  132. #define LEFT_PART    0
  133. #define MATCH_PART    1
  134. #define RIGHT_PART    2
  135. #define OUT_PART    3
  136.  
  137. typedef char *Rule[4];    /* Rule is an array of 4 character pointers */
  138.  
  139. /*0 = Punctuation */
  140. /*
  141. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  142. */
  143. static Rule punct_rules[] =
  144.     {
  145.     {Anything,    " ",        Anything,    Pause    },
  146.     {Anything,    "-",        Anything,    Silent    },
  147.     {".",        "'S",        Anything,    "z"    },
  148.     {"#:.E",    "'S",        Anything,    "z"    },
  149.     {"#",        "'S",        Anything,    "z"    },
  150.     {Anything,    "'",        Anything,    Silent    },
  151.     {Anything,    ",",        Anything,    Pause    },
  152.     {Anything,    ".",        Anything,    Pause    },
  153.     {Anything,    "?",        Anything,    Pause    },
  154.     {Anything,    "!",        Anything,    Pause    },
  155.     {Anything,    0,        Anything,    Silent    },
  156.     };
  157.  
  158. /*
  159. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  160. */
  161. static Rule A_rules[] =
  162.     {
  163.     {Anything,    "A",        Nothing,    "AX"    },
  164.     {Nothing,    "ARE",        Nothing,    "AAr"    },
  165.     {Nothing,    "AR",        "O",        "AXr"    },
  166.     {Anything,    "AR",        "#",        "EHr"    },
  167.     {"^",        "AS",        "#",        "EYs"    },
  168.     {Anything,    "A",        "WA",        "AX"    },
  169.     {Anything,    "AW",        Anything,    "AO"    },
  170.     {" :",        "ANY",        Anything,    "EHnIY"    },
  171.     {Anything,    "A",        "^+#",        "EY"    },
  172.     {"#:",        "ALLY",        Anything,    "AXlIY"    },
  173.     {Nothing,    "AL",        "#",        "AXl"    },
  174.     {Anything,    "AGAIN",    Anything,    "AXgEHn"},
  175.     {"#:",        "AG",        "E",        "IHj"    },
  176.     {Anything,    "A",        "^+:#",        "AE"    },
  177.     {" :",        "A",        "^+ ",        "EY"    },
  178.     {Anything,    "A",        "^%",        "EY"    },
  179.     {Nothing,    "ARR",        Anything,    "AXr"    },
  180.     {Anything,    "ARR",        Anything,    "AEr"    },
  181.     {" :",        "AR",        Nothing,    "AAr"    },
  182.     {Anything,    "AR",        Nothing,    "ER"    },
  183.     {Anything,    "AR",        Anything,    "AAr"    },
  184.     {Anything,    "AIR",        Anything,    "EHr"    },
  185.     {Anything,    "AI",        Anything,    "EY"    },
  186.     {Anything,    "AY",        Anything,    "EY"    },
  187.     {Anything,    "AU",        Anything,    "AO"    },
  188.     {"#:",        "AL",        Nothing,    "AXl"    },
  189.     {"#:",        "ALS",        Nothing,    "AXlz"    },
  190.     {Anything,    "ALK",        Anything,    "AOk"    },
  191.     {Anything,    "AL",        "^",        "AOl"    },
  192.     {" :",        "ABLE",        Anything,    "EYbAXl"},
  193.     {Anything,    "ABLE",        Anything,    "AXbAXl"},
  194.     {Anything,    "ANG",        "+",        "EYnj"    },
  195.     {Anything,    "A",        Anything,    "AE"    },
  196.      {Anything,    0,        Anything,    Silent    },
  197.     };
  198.  
  199. /*
  200. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  201. */
  202. static Rule B_rules[] =
  203.     {
  204.     {Nothing,    "BE",        "^#",        "bIH"    },
  205.     {Anything,    "BEING",    Anything,    "bIYIHNG"},
  206.     {Nothing,    "BOTH",        Nothing,    "bOWTH"    },
  207.     {Nothing,    "BUS",        "#",        "bIHz"    },
  208.     {Anything,    "BUIL",        Anything,    "bIHl"    },
  209.     {Anything,    "B",        Anything,    "b"    },
  210.     {Anything,    0,        Anything,    Silent    },
  211.     };
  212.  
  213. /*
  214. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  215. */
  216. static Rule C_rules[] =
  217.     {
  218.     {Nothing,    "CH",        "^",        "k"    },
  219.     {"^E",        "CH",        Anything,    "k"    },
  220.     {Anything,    "CH",        Anything,    "CH"    },
  221.     {" S",        "CI",        "#",        "sAY"    },
  222.     {Anything,    "CI",        "A",        "SH"    },
  223.     {Anything,    "CI",        "O",        "SH"    },
  224.     {Anything,    "CI",        "EN",        "SH"    },
  225.     {Anything,    "C",        "+",        "s"    },
  226.     {Anything,    "CK",        Anything,    "k"    },
  227.     {Anything,    "COM",        "%",        "kAHm"    },
  228.     {Anything,    "C",        Anything,    "k"    },
  229.     {Anything,    0,        Anything,    Silent    },
  230.     };
  231.  
  232. /*
  233. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  234. */
  235. static Rule D_rules[] =
  236.     {
  237.     {"#:",        "DED",        Nothing,    "dIHd"    },
  238.     {".E",        "D",        Nothing,    "d"    },
  239.     {"#:^E",    "D",        Nothing,    "t"    },
  240.     {Nothing,    "DE",        "^#",        "dIH"    },
  241.     {Nothing,    "DO",        Nothing,    "dUW"    },
  242.     {Nothing,    "DOES",        Anything,    "dAHz"    },
  243.     {Nothing,    "DOING",    Anything,    "dUWIHNG"},
  244.     {Nothing,    "DOW",        Anything,    "dAW"    },
  245.     {Anything,    "DU",        "A",        "jUW"    },
  246.     {Anything,    "D",        Anything,    "d"    },
  247.     {Anything,    0,        Anything,    Silent    },
  248.     };
  249.  
  250. /*
  251. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  252. */
  253. static Rule E_rules[] =
  254.     {
  255.     {"#:",        "E",        Nothing,    Silent    },
  256.     {"':^",        "E",        Nothing,    Silent    },
  257.     {" :",        "E",        Nothing,    "IY"    },
  258.     {"#",        "ED",        Nothing,    "d"    },
  259.     {"#:",        "E",        "D ",        Silent    },
  260.     {Anything,    "EV",        "ER",        "EHv"    },
  261.     {Anything,    "E",        "^%",        "IY"    },
  262.     {Anything,    "ERI",        "#",        "IYrIY"    },
  263.     {Anything,    "ERI",        Anything,    "EHrIH"    },
  264.     {"#:",        "ER",        "#",        "ER"    },
  265.     {Anything,    "ER",        "#",        "EHr"    },
  266.     {Anything,    "ER",        Anything,    "ER"    },
  267.     {Nothing,    "EVEN",        Anything,    "IYvEHn"},
  268.     {"#:",        "E",        "W",        Silent    },
  269.     {"T",        "EW",        Anything,    "UW"    },
  270.     {"S",        "EW",        Anything,    "UW"    },
  271.     {"R",        "EW",        Anything,    "UW"    },
  272.     {"D",        "EW",        Anything,    "UW"    },
  273.     {"L",        "EW",        Anything,    "UW"    },
  274.     {"Z",        "EW",        Anything,    "UW"    },
  275.     {"N",        "EW",        Anything,    "UW"    },
  276.     {"J",        "EW",        Anything,    "UW"    },
  277.     {"TH",        "EW",        Anything,    "UW"    },
  278.     {"CH",        "EW",        Anything,    "UW"    },
  279.     {"SH",        "EW",        Anything,    "UW"    },
  280.     {Anything,    "EW",        Anything,    "yUW"    },
  281.     {Anything,    "E",        "O",        "IY"    },
  282.     {"#:S",        "ES",        Nothing,    "IHz"    },
  283.     {"#:C",        "ES",        Nothing,    "IHz"    },
  284.     {"#:G",        "ES",        Nothing,    "IHz"    },
  285.     {"#:Z",        "ES",        Nothing,    "IHz"    },
  286.     {"#:X",        "ES",        Nothing,    "IHz"    },
  287.     {"#:J",        "ES",        Nothing,    "IHz"    },
  288.     {"#:CH",    "ES",        Nothing,    "IHz"    },
  289.     {"#:SH",    "ES",        Nothing,    "IHz"    },
  290.     {"#:",        "E",        "S ",        Silent    },
  291.     {"#:",        "ELY",        Nothing,    "lIY"    },
  292.     {"#:",        "EMENT",    Anything,    "mEHnt"    },
  293.     {Anything,    "EFUL",        Anything,    "fUHl"    },
  294.     {Anything,    "EE",        Anything,    "IY"    },
  295.     {Anything,    "EARN",        Anything,    "ERn"    },
  296.     {Nothing,    "EAR",        "^",        "ER"    },
  297.     {Anything,    "EAD",        Anything,    "EHd"    },
  298.     {"#:",        "EA",        Nothing,    "IYAX"    },
  299.     {Anything,    "EA",        "SU",        "EH"    },
  300.     {Anything,    "EA",        Anything,    "IY"    },
  301.     {Anything,    "EIGH",        Anything,    "EY"    },
  302.     {Anything,    "EI",        Anything,    "IY"    },
  303.     {Nothing,    "EYE",        Anything,    "AY"    },
  304.     {Anything,    "EY",        Anything,    "IY"    },
  305.     {Anything,    "EU",        Anything,    "yUW"    },
  306.     {Anything,    "E",        Anything,    "EH"    },
  307.     {Anything,    0,        Anything,    Silent    },
  308.     };
  309.  
  310. /*
  311. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  312. */
  313. static Rule F_rules[] =
  314.     {
  315.     {Anything,    "FUL",        Anything,    "fUHl"    },
  316.     {Anything,    "F",        Anything,    "f"    },
  317.     {Anything,    0,        Anything,    Silent    },
  318.     };
  319.  
  320. /*
  321. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  322. */
  323. static Rule G_rules[] =
  324.     {
  325.     {Anything,    "GIV",        Anything,    "gIHv"    },
  326.     {Nothing,    "G",        "I^",        "g"    },
  327.     {Anything,    "GE",        "T",        "gEH"    },
  328.     {"SU",        "GGES",        Anything,    "gjEHs"    },
  329.     {Anything,    "GG",        Anything,    "g"    },
  330.     {" B#",        "G",        Anything,    "g"    },
  331.     {Anything,    "G",        "+",        "j"    },
  332.     {Anything,    "GREAT",    Anything,    "grEYt"    },
  333.     {"#",        "GH",        Anything,    Silent    },
  334.     {Anything,    "G",        Anything,    "g"    },
  335.     {Anything,    0,        Anything,    Silent    },
  336.     };
  337.  
  338. /*
  339. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  340. */
  341. static Rule H_rules[] =
  342.     {
  343.     {Nothing,    "HAV",        Anything,    "hAEv"    },
  344.     {Nothing,    "HERE",        Anything,    "hIYr"    },
  345.     {Nothing,    "HOUR",        Anything,    "AWER"    },
  346.     {Anything,    "HOW",        Anything,    "hAW"    },
  347.     {Anything,    "H",        "#",        "h"    },
  348.     {Anything,    "H",        Anything,    Silent    },
  349.     {Anything,    0,        Anything,    Silent    },
  350.     };
  351.  
  352. /*
  353. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  354. */
  355. static Rule I_rules[] =
  356.     {
  357.     {Nothing,    "IN",        Anything,    "IHn"    },
  358.     {Nothing,    "I",        Nothing,    "AY"    },
  359.     {Anything,    "IN",        "D",        "AYn"    },
  360.     {Anything,    "IER",        Anything,    "IYER"    },
  361.     {"#:R",        "IED",        Anything,    "IYd"    },
  362.     {Anything,    "IED",        Nothing,    "AYd"    },
  363.     {Anything,    "IEN",        Anything,    "IYEHn"    },
  364.     {Anything,    "IE",        "T",        "AYEH"    },
  365.     {" :",        "I",        "%",        "AY"    },
  366.     {Anything,    "I",        "%",        "IY"    },
  367.     {Anything,    "IE",        Anything,    "IY"    },
  368.     {Anything,    "I",        "^+:#",        "IH"    },
  369.     {Anything,    "IR",        "#",        "AYr"    },
  370.     {Anything,    "IZ",        "%",        "AYz"    },
  371.     {Anything,    "IS",        "%",        "AYz"    },
  372.     {Anything,    "I",        "D%",        "AY"    },
  373.     {"+^",        "I",        "^+",        "IH"    },
  374.     {Anything,    "I",        "T%",        "AY"    },
  375.     {"#:^",        "I",        "^+",        "IH"    },
  376.     {Anything,    "I",        "^+",        "AY"    },
  377.     {Anything,    "IR",        Anything,    "ER"    },
  378.     {Anything,    "IGH",        Anything,    "AY"    },
  379.     {Anything,    "ILD",        Anything,    "AYld"    },
  380.     {Anything,    "IGN",        Nothing,    "AYn"    },
  381.     {Anything,    "IGN",        "^",        "AYn"    },
  382.     {Anything,    "IGN",        "%",        "AYn"    },
  383.     {Anything,    "IQUE",        Anything,    "IYk"    },
  384.     {Anything,    "I",        Anything,    "IH"    },
  385.     {Anything,    0,        Anything,    Silent    },
  386.     };
  387.  
  388. /*
  389. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  390. */
  391. static Rule J_rules[] =
  392.     {
  393.     {Anything,    "J",        Anything,    "j"    },
  394.     {Anything,    0,        Anything,    Silent    },
  395.     };
  396.  
  397. /*
  398. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  399. */
  400. static Rule K_rules[] =
  401.     {
  402.     {Nothing,    "K",        "N",        Silent    },
  403.     {Anything,    "K",        Anything,    "k"    },
  404.     {Anything,    0,        Anything,    Silent    },
  405.     };
  406.  
  407. /*
  408. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  409. */
  410. static Rule L_rules[] =
  411.     {
  412.     {Anything,    "LO",        "C#",        "lOW"    },
  413.     {"L",        "L",        Anything,    Silent    },
  414.     {"#:^",        "L",        "%",        "AXl"    },
  415.     {Anything,    "LEAD",        Anything,    "lIYd"    },
  416.     {Anything,    "L",        Anything,    "l"    },
  417.     {Anything,    0,        Anything,    Silent    },
  418.     };
  419.  
  420. /*
  421. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  422. */
  423. static Rule M_rules[] =
  424.     {
  425.     {Anything,    "MOV",        Anything,    "mUWv"    },
  426.     {Anything,    "M",        Anything,    "m"    },
  427.     {Anything,    0,        Anything,    Silent    },
  428.     };
  429.  
  430. /*
  431. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  432. */
  433. static Rule N_rules[] =
  434.     {
  435.     {"E",        "NG",        "+",        "nj"    },
  436.     {Anything,    "NG",        "R",        "NGg"    },
  437.     {Anything,    "NG",        "#",        "NGg"    },
  438.     {Anything,    "NGL",        "%",        "NGgAXl"},
  439.     {Anything,    "NG",        Anything,    "NG"    },
  440.     {Anything,    "NK",        Anything,    "NGk"    },
  441.     {Nothing,    "NOW",        Nothing,    "nAW"    },
  442.     {Anything,    "N",        Anything,    "n"    },
  443.     {Anything,    0,        Anything,    Silent    },
  444.     };
  445.  
  446. /*
  447. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  448. */
  449. static Rule O_rules[] =
  450.     {
  451.     {Anything,    "OF",        Nothing,    "AXv"    },
  452.     {Anything,    "OROUGH",    Anything,    "EROW"    },
  453.     {"#:",        "OR",        Nothing,    "ER"    },
  454.     {"#:",        "ORS",        Nothing,    "ERz"    },
  455.     {Anything,    "OR",        Anything,    "AOr"    },
  456.     {Nothing,    "ONE",        Anything,    "wAHn"    },
  457.     {Anything,    "OW",        Anything,    "OW"    },
  458.     {Nothing,    "OVER",        Anything,    "OWvER"    },
  459.     {Anything,    "OV",        Anything,    "AHv"    },
  460.     {Anything,    "O",        "^%",        "OW"    },
  461.     {Anything,    "O",        "^EN",        "OW"    },
  462.     {Anything,    "O",        "^I#",        "OW"    },
  463.     {Anything,    "OL",        "D",        "OWl"    },
  464.     {Anything,    "OUGHT",    Anything,    "AOt"    },
  465.     {Anything,    "OUGH",        Anything,    "AHf"    },
  466.     {Nothing,    "OU",        Anything,    "AW"    },
  467.     {"H",        "OU",        "S#",        "AW"    },
  468.     {Anything,    "OUS",        Anything,    "AXs"    },
  469.     {Anything,    "OUR",        Anything,    "AOr"    },
  470.     {Anything,    "OULD",        Anything,    "UHd"    },
  471.     {"^",        "OU",        "^L",        "AH"    },
  472.     {Anything,    "OUP",        Anything,    "UWp"    },
  473.     {Anything,    "OU",        Anything,    "AW"    },
  474.     {Anything,    "OY",        Anything,    "OY"    },
  475.     {Anything,    "OING",        Anything,    "OWIHNG"},
  476.     {Anything,    "OI",        Anything,    "OY"    },
  477.     {Anything,    "OOR",        Anything,    "AOr"    },
  478.     {Anything,    "OOK",        Anything,    "UHk"    },
  479.     {Anything,    "OOD",        Anything,    "UHd"    },
  480.     {Anything,    "OO",        Anything,    "UW"    },
  481.     {Anything,    "O",        "E",        "OW"    },
  482.     {Anything,    "O",        Nothing,    "OW"    },
  483.     {Anything,    "OA",        Anything,    "OW"    },
  484.     {Nothing,    "ONLY",        Anything,    "OWnlIY"},
  485.     {Nothing,    "ONCE",        Anything,    "wAHns"    },
  486.     {Anything,    "ON'T",        Anything,    "OWnt"    },
  487.     {"C",        "O",        "N",        "AA"    },
  488.     {Anything,    "O",        "NG",        "AO"    },
  489.     {" :^",        "O",        "N",        "AH"    },
  490.     {"I",        "ON",        Anything,    "AXn"    },
  491.     {"#:",        "ON",        Nothing,    "AXn"    },
  492.     {"#^",        "ON",        Anything,    "AXn"    },
  493.     {Anything,    "O",        "ST ",        "OW"    },
  494.     {Anything,    "OF",        "^",        "AOf"    },
  495.     {Anything,    "OTHER",    Anything,    "AHDHER"},
  496.     {Anything,    "OSS",        Nothing,    "AOs"    },
  497.     {"#:^",        "OM",        Anything,    "AHm"    },
  498.     {Anything,    "O",        Anything,    "AA"    },
  499.     {Anything,    0,        Anything,    Silent    },
  500.     };
  501.  
  502. /*
  503. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  504. */
  505. static Rule P_rules[] =
  506.     {
  507.     {Anything,    "PH",        Anything,    "f"    },
  508.     {Anything,    "PEOP",        Anything,    "pIYp"    },
  509.     {Anything,    "POW",        Anything,    "pAW"    },
  510.     {Anything,    "PUT",        Nothing,    "pUHt"    },
  511.     {Anything,    "P",        Anything,    "p"    },
  512.     {Anything,    0,        Anything,    Silent    },
  513.     };
  514.  
  515. /*
  516. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  517. */
  518. static Rule Q_rules[] =
  519.     {
  520.     {Anything,    "QUAR",        Anything,    "kwAOr"    },
  521.     {Anything,    "QU",        Anything,    "kw"    },
  522.     {Anything,    "Q",        Anything,    "k"    },
  523.     {Anything,    0,        Anything,    Silent    },
  524.     };
  525.  
  526. /*
  527. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  528. */
  529. static Rule R_rules[] =
  530.     {
  531.     {Nothing,    "RE",        "^#",        "rIY"    },
  532.     {Anything,    "R",        Anything,    "r"    },
  533.     {Anything,    0,        Anything,    Silent    },
  534.     };
  535.  
  536. /*
  537. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  538. */
  539. static Rule S_rules[] =
  540.     {
  541.     {Anything,    "SH",        Anything,    "SH"    },
  542.     {"#",        "SION",        Anything,    "ZHAXn"    },
  543.     {Anything,    "SOME",        Anything,    "sAHm"    },
  544.     {"#",        "SUR",        "#",        "ZHER"    },
  545.     {Anything,    "SUR",        "#",        "SHER"    },
  546.     {"#",        "SU",        "#",        "ZHUW"    },
  547.     {"#",        "SSU",        "#",        "SHUW"    },
  548.     {"#",        "SED",        Nothing,    "zd"    },
  549.     {"#",        "S",        "#",        "z"    },
  550.     {Anything,    "SAID",        Anything,    "sEHd"    },
  551.     {"^",        "SION",        Anything,    "SHAXn"    },
  552.     {Anything,    "S",        "S",        Silent    },
  553.     {".",        "S",        Nothing,    "z"    },
  554.     {"#:.E",    "S",        Nothing,    "z"    },
  555.     {"#:^##",    "S",        Nothing,    "z"    },
  556.     {"#:^#",    "S",        Nothing,    "s"    },
  557.     {"U",        "S",        Nothing,    "s"    },
  558.     {" :#",        "S",        Nothing,    "z"    },
  559.     {Nothing,    "SCH",        Anything,    "sk"    },
  560.     {Anything,    "S",        "C+",        Silent    },
  561.     {"#",        "SM",        Anything,    "zm"    },
  562.     {"#",        "SN",        "'",        "zAXn"    },
  563.     {Anything,    "S",        Anything,    "s"    },
  564.     {Anything,    0,        Anything,    Silent    },
  565.     };
  566.  
  567. /*
  568. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  569. */
  570. static Rule T_rules[] =
  571.     {
  572.     {Nothing,    "THE",        Nothing,    "DHAX"    },
  573.     {Anything,    "TO",        Nothing,    "tUW"    },
  574.     {Anything,    "THAT",        Nothing,    "DHAEt"    },
  575.     {Nothing,    "THIS",        Nothing,    "DHIHs"    },
  576.     {Nothing,    "THEY",        Anything,    "DHEY"    },
  577.     {Nothing,    "THERE",    Anything,    "DHEHr"    },
  578.     {Anything,    "THER",        Anything,    "DHER"    },
  579.     {Anything,    "THEIR",    Anything,    "DHEHr"    },
  580.     {Nothing,    "THAN",        Nothing,    "DHAEn"    },
  581.     {Nothing,    "THEM",        Nothing,    "DHEHm"    },
  582.     {Anything,    "THESE",    Nothing,    "DHIYz"    },
  583.     {Nothing,    "THEN",        Anything,    "DHEHn"    },
  584.     {Anything,    "THROUGH",    Anything,    "THrUW"    },
  585.     {Anything,    "THOSE",    Anything,    "DHOWz"    },
  586.     {Anything,    "THOUGH",    Nothing,    "DHOW"    },
  587.     {Nothing,    "THUS",        Anything,    "DHAHs"    },
  588.     {Anything,    "TH",        Anything,    "TH"    },
  589.     {"#:",        "TED",        Nothing,    "tIHd"    },
  590.     {"S",        "TI",        "#N",        "CH"    },
  591.     {Anything,    "TI",        "O",        "SH"    },
  592.     {Anything,    "TI",        "A",        "SH"    },
  593.     {Anything,    "TIEN",        Anything,    "SHAXn"    },
  594.     {Anything,    "TUR",        "#",        "CHER"    },
  595.     {Anything,    "TU",        "A",        "CHUW"    },
  596.     {Nothing,    "TWO",        Anything,    "tUW"    },
  597.     {Anything,    "T",        Anything,    "t"    },
  598.     {Anything,    0,        Anything,    Silent    },
  599.     };
  600.  
  601. /*
  602. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  603. */
  604. static Rule U_rules[] =
  605.     {
  606.     {Nothing,    "UN",        "I",        "yUWn"    },
  607.     {Nothing,    "UN",        Anything,    "AHn"    },
  608.     {Nothing,    "UPON",        Anything,    "AXpAOn"},
  609.     {"T",        "UR",        "#",        "UHr"    },
  610.     {"S",        "UR",        "#",        "UHr"    },
  611.     {"R",        "UR",        "#",        "UHr"    },
  612.     {"D",        "UR",        "#",        "UHr"    },
  613.     {"L",        "UR",        "#",        "UHr"    },
  614.     {"Z",        "UR",        "#",        "UHr"    },
  615.     {"N",        "UR",        "#",        "UHr"    },
  616.     {"J",        "UR",        "#",        "UHr"    },
  617.     {"TH",        "UR",        "#",        "UHr"    },
  618.     {"CH",        "UR",        "#",        "UHr"    },
  619.     {"SH",        "UR",        "#",        "UHr"    },
  620.     {Anything,    "UR",        "#",        "yUHr"    },
  621.     {Anything,    "UR",        Anything,    "ER"    },
  622.     {Anything,    "U",        "^ ",        "AH"    },
  623.     {Anything,    "U",        "^^",        "AH"    },
  624.     {Anything,    "UY",        Anything,    "AY"    },
  625.     {" G",        "U",        "#",        Silent    },
  626.     {"G",        "U",        "%",        Silent    },
  627.     {"G",        "U",        "#",        "w"    },
  628.     {"#N",        "U",        Anything,    "yUW"    },
  629.     {"T",        "U",        Anything,    "UW"    },
  630.     {"S",        "U",        Anything,    "UW"    },
  631.     {"R",        "U",        Anything,    "UW"    },
  632.     {"D",        "U",        Anything,    "UW"    },
  633.     {"L",        "U",        Anything,    "UW"    },
  634.     {"Z",        "U",        Anything,    "UW"    },
  635.     {"N",        "U",        Anything,    "UW"    },
  636.     {"J",        "U",        Anything,    "UW"    },
  637.     {"TH",        "U",        Anything,    "UW"    },
  638.     {"CH",        "U",        Anything,    "UW"    },
  639.     {"SH",        "U",        Anything,    "UW"    },
  640.     {Anything,    "U",        Anything,    "yUW"    },
  641.     {Anything,    0,        Anything,    Silent    },
  642.     };
  643.  
  644. /*
  645. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  646. */
  647. static Rule V_rules[] =
  648.     {
  649.     {Anything,    "VIEW",        Anything,    "vyUW"    },
  650.     {Anything,    "V",        Anything,    "v"    },
  651.     {Anything,    0,        Anything,    Silent    },
  652.     };
  653.  
  654. /*
  655. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  656. */
  657. static Rule W_rules[] =
  658.     {
  659.     {Nothing,    "WERE",        Anything,    "wER"    },
  660.     {Anything,    "WA",        "S",        "wAA"    },
  661.     {Anything,    "WA",        "T",        "wAA"    },
  662.     {Anything,    "WHERE",    Anything,    "WHEHr"    },
  663.     {Anything,    "WHAT",        Anything,    "WHAAt"    },
  664.     {Anything,    "WHOL",        Anything,    "hOWl"    },
  665.     {Anything,    "WHO",        Anything,    "hUW"    },
  666.     {Anything,    "WH",        Anything,    "WH"    },
  667.     {Anything,    "WAR",        Anything,    "wAOr"    },
  668.     {Anything,    "WOR",        "^",        "wER"    },
  669.     {Anything,    "WR",        Anything,    "r"    },
  670.     {Anything,    "W",        Anything,    "w"    },
  671.     {Anything,    0,        Anything,    Silent    },
  672.     };
  673.  
  674. /*
  675. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  676. */
  677. static Rule X_rules[] =
  678.     {
  679.     {Anything,    "X",        Anything,    "ks"    },
  680.     {Anything,    0,        Anything,    Silent    },
  681.     };
  682.  
  683. /*
  684. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  685. */
  686. static Rule Y_rules[] =
  687.     {
  688.     {Anything,    "YOUNG",    Anything,    "yAHNG"    },
  689.     {Nothing,    "YOU",        Anything,    "yUW"    },
  690.     {Nothing,    "YES",        Anything,    "yEHs"    },
  691.     {Nothing,    "Y",        Anything,    "y"    },
  692.     {"#:^",        "Y",        Nothing,    "IY"    },
  693.     {"#:^",        "Y",        "I",        "IY"    },
  694.     {" :",        "Y",        Nothing,    "AY"    },
  695.     {" :",        "Y",        "#",        "AY"    },
  696.     {" :",        "Y",        "^+:#",        "IH"    },
  697.     {" :",        "Y",        "^#",        "AY"    },
  698.     {Anything,    "Y",        Anything,    "IH"    },
  699.     {Anything,    0,        Anything,    Silent    },
  700.     };
  701.  
  702. /*
  703. **    LEFT_PART    MATCH_PART    RIGHT_PART    OUT_PART
  704. */
  705. static Rule Z_rules[] =
  706.     {
  707.     {Anything,    "Z",        Anything,    "z"    },
  708.     {Anything,    0,        Anything,    Silent    },
  709.     };
  710.  
  711. Rule *Rules[] =
  712.     {
  713.     punct_rules,
  714.     A_rules, B_rules, C_rules, D_rules, E_rules, F_rules, G_rules, 
  715.     H_rules, I_rules, J_rules, K_rules, L_rules, M_rules, N_rules, 
  716.     O_rules, P_rules, Q_rules, R_rules, S_rules, T_rules, U_rules, 
  717.     V_rules, W_rules, X_rules, Y_rules, Z_rules
  718.     };
  719. -------------------------------------------------------------------------------
  720. **FILE**        PHONEME.C        Translate a single word.
  721. -------------------------------------------------------------------------------
  722. #include <stdio.h>
  723. #include <ctype.h>
  724.  
  725. #define FALSE (0)
  726. #define TRUE (!0)
  727.  
  728. /*
  729. **    English to Phoneme translation.
  730. **
  731. **    Rules are made up of four parts:
  732. **    
  733. **        The left context.
  734. **        The text to match.
  735. **        The right context.
  736. **        The phonemes to substitute for the matched text.
  737. **
  738. **    Procedure:
  739. **
  740. **        Seperate each block of letters (apostrophes included) 
  741. **        and add a space on each side.  For each unmatched 
  742. **        letter in the word, look through the rules where the 
  743. **        text to match starts with the letter in the word.  If 
  744. **        the text to match is found and the right and left 
  745. **        context patterns also match, output the phonemes for 
  746. **        that rule and skip to the next unmatched letter.
  747. **
  748. **
  749. **    Special Context Symbols:
  750. **
  751. **        #    One or more vowels
  752. **        :    Zero or more consonants
  753. **        ^    One consonant.
  754. **        .    One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
  755. **            consonants)
  756. **        %    One of ER, E, ES, ED, ING, ELY (a suffix)
  757. **            (Right context only)
  758. **        +    One of E, I or Y (a "front" vowel)
  759. */
  760.  
  761. typedef char *Rule[4];    /* A rule is four character pointers */
  762.  
  763. extern Rule *Rules[];    /* An array of pointers to rules */
  764.  
  765. int isvowel(chr)
  766.     char chr;
  767.     {
  768.     return (chr == 'A' || chr == 'E' || chr == 'I' || 
  769.         chr == 'O' || chr == 'U');
  770.     }
  771.  
  772. int isconsonant(chr)
  773.     char chr;
  774.     {
  775.     return (isupper(chr) && !isvowel(chr));
  776.     }
  777.  
  778. xlate_word(word)
  779.     char word[];
  780.     {
  781.     int index;    /* Current position in word */
  782.     int type;    /* First letter of match part */
  783.  
  784.     index = 1;    /* Skip the initial blank */
  785.     do
  786.         {
  787.         if (isupper(word[index]))
  788.             type = word[index] - 'A' + 1;
  789.         else
  790.             type = 0;
  791.  
  792.         index = find_rule(word, index, Rules[type]);
  793.         }
  794.     while (word[index] != '\0');
  795.     }
  796.  
  797. find_rule(word, index, rules)
  798.     char word[];
  799.     int index;
  800.     Rule *rules;
  801.     {
  802.     Rule *rule;
  803.     char *left, *match, *right, *output;
  804.     int remainder;
  805.  
  806.     for (;;)    /* Search for the rule */
  807.         {
  808.         rule = rules++;
  809.         match = (*rule)[1];
  810.  
  811.         if (match == 0)    /* bad symbol! */
  812.             {
  813.             fprintf(stderr,
  814. "Error: Can't find rule for: '%c' in \"%s\"\n", word[index], word);
  815.             return index+1;    /* Skip it! */
  816.             }
  817.  
  818.         for (remainder = index; *match != '\0'; match++, remainder++)
  819.             {
  820.             if (*match != word[remainder])
  821.                 break;
  822.             }
  823.  
  824.         if (*match != '\0')    /* found missmatch */
  825.             continue;
  826. /*
  827. printf("\nWord: \"%s\", Index:%4d, Trying: \"%s/%s/%s\" = \"%s\"\n",
  828.     word, index, (*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]);
  829. */
  830.         left = (*rule)[0];
  831.         right = (*rule)[2];
  832.  
  833.         if (!leftmatch(left, &word[index-1]))
  834.             continue;
  835. /*
  836. printf("leftmatch(\"%s\",\"...%c\") succeded!\n", left, word[index-1]);
  837. */
  838.         if (!rightmatch(right, &word[remainder]))
  839.             continue;
  840. /*
  841. printf("rightmatch(\"%s\",\"%s\") succeded!\n", right, &word[remainder]);
  842. */
  843.         output = (*rule)[3];
  844. /*
  845. printf("Success: ");
  846. */
  847.         outstring(output);
  848.         return remainder;
  849.         }
  850.     }
  851.  
  852.  
  853. leftmatch(pattern, context)
  854.     char *pattern;    /* first char of pattern to match in text */
  855.     char *context;    /* last char of text to be matched */
  856.     {
  857.     char *pat;
  858.     char *text;
  859.     int count;
  860.  
  861.     if (*pattern == '\0')    /* null string matches any context */
  862.         {
  863.         return TRUE;
  864.         }
  865.  
  866.     /* point to last character in pattern string */
  867.     count = strlen(pattern);
  868.     pat = pattern + (count - 1);
  869.  
  870.     text = context;
  871.  
  872.     for (; count > 0; pat--, count--)
  873.         {
  874.         /* First check for simple text or space */
  875.         if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
  876.             if (*pat != *text)
  877.                 return FALSE;
  878.             else
  879.                 {
  880.                 text--;
  881.                 continue;
  882.                 }
  883.  
  884.         switch (*pat)
  885.             {
  886.         case '#':    /* One or more vowels */
  887.             if (!isvowel(*text))
  888.                 return FALSE;
  889.  
  890.             text--;
  891.  
  892.             while (isvowel(*text))
  893.                 text--;
  894.             break;
  895.  
  896.         case ':':    /* Zero or more consonants */
  897.             while (isconsonant(*text))
  898.                 text--;
  899.             break;
  900.  
  901.         case '^':    /* One consonant */
  902.             if (!isconsonant(*text))
  903.                 return FALSE;
  904.             text--;
  905.             break;
  906.  
  907.         case '.':    /* B, D, V, G, J, L, M, N, R, W, Z */
  908.             if (*text != 'B' && *text != 'D' && *text != 'V'
  909.                && *text != 'G' && *text != 'J' && *text != 'L'
  910.                && *text != 'M' && *text != 'N' && *text != 'R'
  911.                && *text != 'W' && *text != 'Z')
  912.                 return FALSE;
  913.             text--;
  914.             break;
  915.  
  916.         case '+':    /* E, I or Y (front vowel) */
  917.             if (*text != 'E' && *text != 'I' && *text != 'Y')
  918.                 return FALSE;
  919.             text--;
  920.             break;
  921.  
  922.         case '%':
  923.         default:
  924.             fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
  925.             return FALSE;
  926.             }
  927.         }
  928.  
  929.     return TRUE;
  930.     }
  931.  
  932.  
  933. rightmatch(pattern, context)
  934.     char *pattern;    /* first char of pattern to match in text */
  935.     char *context;    /* last char of text to be matched */
  936.     {
  937.     char *pat;
  938.     char *text;
  939.  
  940.     if (*pattern == '\0')    /* null string matches any context */
  941.         return TRUE;
  942.  
  943.     pat = pattern;
  944.     text = context;
  945.  
  946.     for (pat = pattern; *pat != '\0'; pat++)
  947.         {
  948.         /* First check for simple text or space */
  949.         if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
  950.             if (*pat != *text)
  951.                 return FALSE;
  952.             else
  953.                 {
  954.                 text++;
  955.                 continue;
  956.                 }
  957.  
  958.         switch (*pat)
  959.             {
  960.         case '#':    /* One or more vowels */
  961.             if (!isvowel(*text))
  962.                 return FALSE;
  963.  
  964.             text++;
  965.  
  966.             while (isvowel(*text))
  967.                 text++;
  968.             break;
  969.  
  970.         case ':':    /* Zero or more consonants */
  971.             while (isconsonant(*text))
  972.                 text++;
  973.             break;
  974.  
  975.         case '^':    /* One consonant */
  976.             if (!isconsonant(*text))
  977.                 return FALSE;
  978.             text++;
  979.             break;
  980.  
  981.         case '.':    /* B, D, V, G, J, L, M, N, R, W, Z */
  982.             if (*text != 'B' && *text != 'D' && *text != 'V'
  983.                && *text != 'G' && *text != 'J' && *text != 'L'
  984.                && *text != 'M' && *text != 'N' && *text != 'R'
  985.                && *text != 'W' && *text != 'Z')
  986.                 return FALSE;
  987.             text++;
  988.             break;
  989.  
  990.         case '+':    /* E, I or Y (front vowel) */
  991.             if (*text != 'E' && *text != 'I' && *text != 'Y')
  992.                 return FALSE;
  993.             text++;
  994.             break;
  995.  
  996.         case '%':    /* ER, E, ES, ED, ING, ELY (a suffix) */
  997.             if (*text == 'E')
  998.                 {
  999.                 text++;
  1000.                 if (*text == 'L')
  1001.                     {
  1002.                     text++;
  1003.                     if (*text == 'Y')
  1004.                         {
  1005.                         text++;
  1006.                         break;
  1007.                         }
  1008.                     else
  1009.                         {
  1010.                         text--; /* Don't gobble L */
  1011.                         break;
  1012.                         }
  1013.                     }
  1014.                 else
  1015.                 if (*text == 'R' || *text == 'S' 
  1016.                    || *text == 'D')
  1017.                     text++;
  1018.                 break;
  1019.                 }
  1020.             else
  1021.             if (*text == 'I')
  1022.                 {
  1023.                 text++;
  1024.                 if (*text == 'N')
  1025.                     {
  1026.                     text++;
  1027.                     if (*text == 'G')
  1028.                         {
  1029.                         text++;
  1030.                         break;
  1031.                         }
  1032.                     }
  1033.                 return FALSE;
  1034.                 }
  1035.             else
  1036.             return FALSE;
  1037.  
  1038.         default:
  1039.             fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
  1040.             return FALSE;
  1041.             }
  1042.         }
  1043.  
  1044.     return TRUE;
  1045.     }
  1046. -------------------------------------------------------------------------------
  1047. **FILE**        PARSE.C          Split a file into words.
  1048. -------------------------------------------------------------------------------
  1049. #include <stdio.h>
  1050. #include <ctype.h>
  1051.  
  1052. #define MAX_LENGTH 128
  1053.  
  1054. static FILE *In_file;
  1055. static FILE *Out_file;
  1056.  
  1057. static int Char, Char1, Char2, Char3;
  1058.  
  1059. /*
  1060. ** main(argc, argv)
  1061. **    int argc;
  1062. **    char *argv[];
  1063. **
  1064. **    This is the main program.  It takes up to two file names (input
  1065. **    and output)  and translates the input file to phoneme codes
  1066. **    (see ENGLISH.C) on the output file.
  1067. */
  1068. main(argc, argv)
  1069.     int argc;
  1070.     char *argv[];
  1071.     {
  1072.     if (argc > 3)
  1073.         {
  1074.         fputs("Usage: PHONEME [infile [outfile]]\n", stderr);
  1075.         exit();
  1076.         }
  1077.  
  1078.     if (argc == 1)
  1079.         {
  1080.         fputs("Enter english text:\n", stderr);
  1081.         }
  1082.  
  1083.     if (argc > 1)
  1084.         {
  1085.         In_file = fopen(argv[1], "r");
  1086.         if (In_file == 0)
  1087.             {
  1088.             fputs("Error: Cannot open input file.\n", stderr);
  1089.             fputs("Usage: PHONEME [infile [outfile]]\n", stderr);
  1090.             exit();
  1091.             }
  1092.         }
  1093.     else
  1094.         In_file = stdin;
  1095.  
  1096.     if (argc > 2)
  1097.         {
  1098.         Out_file = fopen(argv[2], "w");
  1099.         if (Out_file == 0)
  1100.             {
  1101.             fputs("Error: Cannot create output file.\n", stderr);
  1102.             fputs("Usage: PHONEME [infile [outfile]]\n", stderr);
  1103.             exit();
  1104.             }
  1105.         }
  1106.     else
  1107.         Out_file = stdout;
  1108.  
  1109.     xlate_file();
  1110.     }
  1111.  
  1112. outstring(string)
  1113.     char *string;
  1114.     {
  1115.     while (*string != '\0')
  1116.         outchar(*string++);
  1117.     }
  1118.  
  1119. outchar(chr)
  1120.     int chr;
  1121.     {
  1122.     fputc(chr,Out_file);
  1123.     }
  1124.  
  1125.  
  1126. int makeupper(character)
  1127.     int character;
  1128.     {
  1129.     if (islower(character))
  1130.         return toupper(character);
  1131.     else
  1132.         return character;
  1133.     }
  1134.  
  1135. new_char()
  1136.     {
  1137.     /*
  1138.     If the cache is full of newline, time to prime the look-ahead
  1139.     again.  If an EOF is found, fill the remainder of the queue with
  1140.     EOF's.
  1141.     */
  1142.     if (Char == '\n'  && Char1 == '\n' && Char2 == '\n' && Char3 == '\n')
  1143.         {    /* prime the pump again */
  1144.         Char = getc(In_file);
  1145.         if (Char == EOF)
  1146.             {
  1147.             Char1 = EOF;
  1148.             Char2 = EOF;
  1149.             Char3 = EOF;
  1150.             return Char;
  1151.             }
  1152.         if (Char == '\n')
  1153.             return Char;
  1154.  
  1155.         Char1 = getc(In_file);
  1156.         if (Char1 == EOF)
  1157.             {
  1158.             Char2 = EOF;
  1159.             Char3 = EOF;
  1160.             return Char;
  1161.             }
  1162.         if (Char1 == '\n')
  1163.             return Char;
  1164.  
  1165.         Char2 = getc(In_file);
  1166.         if (Char2 == EOF)
  1167.             {
  1168.             Char3 = EOF;
  1169.             return Char;
  1170.             }
  1171.         if (Char2 == '\n')
  1172.             return Char;
  1173.  
  1174.         Char3 = getc(In_file);
  1175.         }
  1176.     else
  1177.         {
  1178.         /*
  1179.         Buffer not full of newline, shuffle the characters and
  1180.         either get a new one or propagate a newline or EOF.
  1181.         */
  1182.         Char = Char1;
  1183.         Char1 = Char2;
  1184.         Char2 = Char3;
  1185.         if (Char3 != '\n' && Char3 != EOF)
  1186.             Char3 = getc(In_file);
  1187.         }
  1188.     return Char;
  1189.     }
  1190.  
  1191. /*
  1192. ** xlate_file()
  1193. **
  1194. **    This is the input file translator.  It sets up the first character
  1195. **    and uses it to determine what kind of text follows.
  1196. */
  1197. xlate_file()
  1198.     {
  1199.     /* Prime the queue */
  1200.     Char = '\n';
  1201.     Char1 = '\n';
  1202.     Char2 = '\n';
  1203.     Char3 = '\n';
  1204.     new_char();    /* Fill Char, Char1, Char2 and Char3 */
  1205.  
  1206.     while (Char != EOF)    /* All of the words in the file */
  1207.         {
  1208.         if (isdigit(Char))
  1209.             have_number();
  1210.         else
  1211.         if (isalpha(Char) || Char == '\'')
  1212.             have_letter();
  1213.         else
  1214.         if (Char == '$' && isdigit(Char1))
  1215.             have_dollars();
  1216.         else
  1217.             have_special();
  1218.         }
  1219.     }
  1220.  
  1221. have_dollars()
  1222.     {
  1223.     long int value;
  1224.  
  1225.     value = 0L;
  1226.     for (new_char() ; isdigit(Char) || Char == ',' ; new_char())
  1227.         {
  1228.         if (Char != ',')
  1229.             value = 10 * value + (Char-'0');
  1230.         }
  1231.  
  1232.     say_cardinal(value);    /* Say number of whole dollars */
  1233.  
  1234.     /* Found a character that is a non-digit and non-comma */
  1235.  
  1236.     /* Check for no decimal or no cents digits */
  1237.     if (Char != '.' || !isdigit(Char1))
  1238.         {
  1239.         if (value == 1L)
  1240.             outstring("dAAlER ");
  1241.         else
  1242.             outstring("dAAlAArz ");
  1243.         return;
  1244.         }
  1245.  
  1246.     /* We have '.' followed by a digit */
  1247.  
  1248.     new_char();    /* Skip the period */
  1249.  
  1250.     /* If it is ".dd " say as " DOLLARS AND n CENTS " */
  1251.     if (isdigit(Char1) && !isdigit(Char2))
  1252.         {
  1253.         if (value == 1L)
  1254.             outstring("dAAlER ");
  1255.         else
  1256.             outstring("dAAlAArz ");
  1257.         if (Char == '0' && Char1 == '0')
  1258.             {
  1259.             new_char();    /* Skip tens digit */
  1260.             new_char();    /* Skip units digit */
  1261.             return;
  1262.             }
  1263.  
  1264.         outstring("AAnd ");
  1265.         value = (Char-'0')*10 + Char1-'0';
  1266.         say_cardinal(value);
  1267.  
  1268.         if (value == 1L)
  1269.             outstring("sEHnt ");
  1270.         else
  1271.             outstring("sEHnts ");
  1272.         new_char();    /* Used Char (tens digit) */
  1273.         new_char();    /* Used Char1 (units digit) */
  1274.         return;
  1275.         }
  1276.  
  1277.     /* Otherwise say as "n POINT ddd DOLLARS " */
  1278.  
  1279.     outstring("pOYnt ");
  1280.     for ( ; isdigit(Char) ; new_char())
  1281.         {
  1282.         say_ascii(Char);
  1283.         }
  1284.  
  1285.     outstring("dAAlAArz ");
  1286.  
  1287.     return;
  1288.     }
  1289.  
  1290. have_special()
  1291.     {
  1292.     if (Char == '\n')
  1293.         outchar('\n');
  1294.     else
  1295.     if (!isspace(Char))
  1296.         say_ascii(Char);
  1297.  
  1298.     new_char();
  1299.     return;
  1300.     }
  1301.  
  1302.  
  1303. have_number()
  1304.     {
  1305.     long int value;
  1306.     int lastdigit;
  1307.  
  1308.     value = Char - '0';
  1309.     lastdigit = Char;
  1310.  
  1311.     for (new_char() ; isdigit(Char) ; new_char())
  1312.         {
  1313.         value = 10 * value + (Char-'0');
  1314.         lastdigit = Char;
  1315.         }
  1316.  
  1317.     /* Recognize ordinals based on last digit of number */
  1318.     switch (lastdigit)
  1319.         {
  1320.     case '1':    /* ST */
  1321.         if (makeupper(Char) == 'S' && makeupper(Char1) == 'T' &&
  1322.             !isalpha(Char2) && !isdigit(Char2))
  1323.             {
  1324.             say_ordinal(value);
  1325.             new_char();    /* Used Char */
  1326.             new_char();    /* Used Char1 */
  1327.             return;
  1328.             }
  1329.         break;
  1330.  
  1331.     case '2':    /* ND */
  1332.         if (makeupper(Char) == 'N' && makeupper(Char1) == 'D' &&
  1333.             !isalpha(Char2) && !isdigit(Char2))
  1334.             {
  1335.             say_ordinal(value);
  1336.             new_char();    /* Used Char */
  1337.             new_char();    /* Used Char1 */
  1338.             return;
  1339.             }
  1340.         break;
  1341.  
  1342.     case '3':    /* RD */
  1343.         if (makeupper(Char) == 'R' && makeupper(Char1) == 'D' &&
  1344.             !isalpha(Char2) && !isdigit(Char2))
  1345.             {
  1346.             say_ordinal(value);
  1347.             new_char();    /* Used Char */
  1348.             new_char();    /* Used Char1 */
  1349.             return;
  1350.             }
  1351.         break;
  1352.  
  1353.     case '0':    /* TH */
  1354.     case '4':    /* TH */
  1355.     case '5':    /* TH */
  1356.     case '6':    /* TH */
  1357.     case '7':    /* TH */
  1358.     case '8':    /* TH */
  1359.     case '9':    /* TH */
  1360.         if (makeupper(Char) == 'T' && makeupper(Char1) == 'H' &&
  1361.             !isalpha(Char2) && !isdigit(Char2))
  1362.             {
  1363.             say_ordinal(value);
  1364.             new_char();    /* Used Char */
  1365.             new_char();    /* Used Char1 */
  1366.             return;
  1367.             }
  1368.         break;
  1369.         }
  1370.  
  1371.     say_cardinal(value);
  1372.  
  1373.     /* Recognize decimal points */
  1374.     if (Char == '.' && isdigit(Char1))
  1375.         {
  1376.         outstring("pOYnt ");
  1377.         for (new_char() ; isdigit(Char) ; new_char())
  1378.             {
  1379.             say_ascii(Char);
  1380.             }
  1381.         }
  1382.  
  1383.     /* Spell out trailing abbreviations */
  1384.     if (isalpha(Char))
  1385.         {
  1386.         while (isalpha(Char))
  1387.             {
  1388.             say_ascii(Char);
  1389.             new_char();
  1390.             }
  1391.         }
  1392.  
  1393.     return;
  1394.     }
  1395.  
  1396.  
  1397. have_letter()
  1398.     {
  1399.     char buff[MAX_LENGTH];
  1400.     int count;
  1401.  
  1402.     count = 0;
  1403.     buff[count++] = ' ';    /* Required initial blank */
  1404.  
  1405.     buff[count++] = makeupper(Char);
  1406.  
  1407.     for (new_char() ; isalpha(Char) || Char == '\'' ; new_char())
  1408.         {
  1409.         buff[count++] = makeupper(Char);
  1410.         if (count > MAX_LENGTH-2)
  1411.             {
  1412.             buff[count++] = ' ';
  1413.             buff[count++] = '\0';
  1414.             xlate_word(buff);
  1415.             count = 1;
  1416.             }
  1417.         }
  1418.  
  1419.     buff[count++] = ' ';    /* Required terminating blank */
  1420.     buff[count++] = '\0';
  1421.  
  1422.     /* Check for AAANNN type abbreviations */
  1423.     if (isdigit(Char))
  1424.         {
  1425.         spell_word(buff);
  1426.         return;
  1427.         }
  1428.     else
  1429.     if (strlen(buff) == 3)     /* one character, two spaces */
  1430.         say_ascii(buff[1]);
  1431.     else
  1432.     if (Char == '.')        /* Possible abbreviation */
  1433.         abbrev(buff);
  1434.     else
  1435.         xlate_word(buff);
  1436.  
  1437.     if (Char == '-' && isalpha(Char1))
  1438.         new_char();    /* Skip hyphens */
  1439.  
  1440.     }
  1441.  
  1442. /* Handle abbreviations.  Text in buff was followed by '.' */
  1443. abbrev(buff)
  1444.     char buff[];
  1445.     {
  1446.     if (strcmp(buff, " DR ") == 0)
  1447.         {
  1448.         xlate_word(" DOCTOR ");
  1449.         new_char();
  1450.         }
  1451.     else
  1452.     if (strcmp(buff, " MR ") == 0)
  1453.         {
  1454.         xlate_word(" MISTER ");
  1455.         new_char();
  1456.         }
  1457.     else
  1458.     if (strcmp(buff, " MRS ") == 0)
  1459.         {
  1460.         xlate_word(" MISSUS ");
  1461.         new_char();
  1462.         }
  1463.     else
  1464.     if (strcmp(buff, " PHD ") == 0)
  1465.         {
  1466.         spell_word(" PHD ");
  1467.         new_char();
  1468.         }
  1469.     else
  1470.         xlate_word(buff);
  1471.     }
  1472. -------------------------------------------------------------------------------
  1473. **FILE**        SPELLWORD.C      Spell an ASCII character or word.
  1474. -------------------------------------------------------------------------------
  1475. #include <stdio.h>
  1476.  
  1477. static char *Ascii[] =
  1478.     {
  1479. "nUWl ","stAArt AXv hEHdER ","stAArt AXv tEHkst ","EHnd AXv tEHkst ",
  1480. "EHnd AXv trAEnsmIHSHAXn",
  1481. "EHnkwAYr ","AEk ","bEHl ","bAEkspEYs ","tAEb ","lIHnIYfIYd ",
  1482. "vERtIHkAXl tAEb ","fAOrmfIYd ","kAErAYj rIYtERn ","SHIHft AWt ",
  1483. "SHIHft IHn ","dIHlIYt ","dIHvIHs kAAntrAAl wAHn ","dIHvIHs kAAntrAAl tUW ",
  1484. "dIHvIHs kAAntrAAl THrIY ","dIHvIHs kAAntrAAl fOWr ","nAEk ","sIHnk ",
  1485. "EHnd tEHkst blAAk ","kAEnsEHl ","EHnd AXv mEHsIHj ","sUWbstIHtUWt ",
  1486. "EHskEYp ","fAYEHld sIYpERAEtER ","grUWp sIYpERAEtER ","rIYkAOrd sIYpERAEtER ",
  1487. "yUWnIHt sIYpERAEtER ","spEYs ","EHksklAEmEYSHAXn mAArk ","dAHbl kwOWt ",
  1488. "nUWmbER sAYn ","dAAlER sAYn ","pERsEHnt ","AEmpERsAEnd ","kwOWt ",
  1489. "OWpEHn pEHrEHn ","klOWz pEHrEHn ","AEstEHrIHsk ","plAHs ","kAAmmAX ",
  1490. "mIHnAHs ","pIYrIYAAd ","slAESH ",
  1491.  
  1492. "zIHrOW ","wAHn ","tUW ","THrIY ","fOWr ",
  1493. "fAYv ","sIHks ","sEHvAXn ","EYt ","nAYn ",
  1494.  
  1495. "kAAlAXn ","sEHmIHkAAlAXn ","lEHs DHAEn ","EHkwAXl sAYn ","grEYtER DHAEn ",
  1496. "kwEHsCHAXn mAArk ","AEt sAYn ",
  1497.  
  1498. "EY ","bIY ","sIY ","dIY ","IY ","EHf ","jIY  ",
  1499. "EYtCH ","AY ","jEY ","kEY ","EHl ","EHm ","EHn ","AA ","pIY ",
  1500. "kw ","AAr ","EHz ","tIY ","AHw ","vIY ",
  1501. "dAHblyUWw ","EHks ","wAYIY ","zIY ",
  1502.  
  1503. "lEHft brAEkEHt ","bAEkslAESH ","rAYt brAEkEHt ","kAErEHt ",
  1504. "AHndERskAOr ","AEpAAstrAAfIH ",
  1505.  
  1506. "EY ","bIY ","sIY ","dIY ","IY ","EHf ","jIY  ",
  1507. "EYtCH ","AY ","jEY ","kEY ","EHl ","EHm ","EHn ","AA ","pIY ",
  1508. "kw ","AAr ","EHz ","tIY ","AHw ","vIY ",
  1509. "dAHblyUWw ","EHks ","wAYIY ","zIY ",
  1510.  
  1511. "lEHft brEYs ","vERtIHkAXl bAAr ","rAYt brEYs ","tAYld ","dEHl ",
  1512.     };
  1513.  
  1514. say_ascii(character)
  1515.     int character;
  1516.     {
  1517.     outstring(Ascii[character&0x7F]);
  1518.     }
  1519.  
  1520. spell_word(word)
  1521.     char *word;
  1522.     {
  1523.     for (word++ ; word[1] != '\0' ; word++)
  1524.         outstring(Ascii[(*word)&0x7F]);
  1525.     }
  1526. -------------------------------------------------------------------------------
  1527. **FILE**        SAYNUM.C         Say a cardinal or ordinal number (long int).
  1528. -------------------------------------------------------------------------------
  1529. #include <stdio.h>
  1530.  
  1531. /*
  1532. **              Integer to Readable ASCII Conversion Routine.
  1533. **
  1534. ** Synopsis:
  1535. **
  1536. **      say_cardinal(value)
  1537. **          long int     value;          -- The number to output
  1538. **
  1539. **    The number is translated into a string of phonemes
  1540. **
  1541. */
  1542.  
  1543. static char *Cardinals[] = 
  1544.     {
  1545.     "zIHrOW ",    "wAHn ",    "tUW ",        "THrIY ",
  1546.     "fOWr ",    "fAYv ",    "sIHks ",    "sEHvAXn ",
  1547.     "EYt ",        "nAYn ",        
  1548.     "tEHn ",    "IYlEHvAXn ",    "twEHlv ",    "THERtIYn ",
  1549.     "fOWrtIYn ",    "fIHftIYn ",     "sIHkstIYn ",    "sEHvEHntIYn ",
  1550.     "EYtIYn ",    "nAYntIYn "
  1551.     } ;
  1552.  
  1553. static char *Twenties[] = 
  1554.     {
  1555.     "twEHntIY ",    "THERtIY ",    "fAOrtIY ",    "fIHftIY ",
  1556.     "sIHkstIY ",    "sEHvEHntIY ",    "EYtIY ",    "nAYntIY "
  1557.     } ;
  1558.  
  1559. static char *Ordinals[] = 
  1560.     {
  1561.     "zIHrOWEHTH ",    "fERst ",    "sEHkAHnd ",    "THERd ",
  1562.     "fOWrTH ",    "fIHfTH ",    "sIHksTH ",    "sEHvEHnTH ",
  1563.     "EYtTH ",    "nAYnTH ",        
  1564.     "tEHnTH ",    "IYlEHvEHnTH ",    "twEHlvTH ",    "THERtIYnTH ",
  1565.     "fAOrtIYnTH ",    "fIHftIYnTH ",     "sIHkstIYnTH ",    "sEHvEHntIYnTH ",
  1566.     "EYtIYnTH ",    "nAYntIYnTH "
  1567.     } ;
  1568.  
  1569. static char *Ord_twenties[] = 
  1570.     {
  1571.     "twEHntIYEHTH ","THERtIYEHTH ",    "fOWrtIYEHTH ",    "fIHftIYEHTH ",
  1572.     "sIHkstIYEHTH ","sEHvEHntIYEHTH ","EYtIYEHTH ",    "nAYntIYEHTH "
  1573.     } ;
  1574.  
  1575.  
  1576. /*
  1577. ** Translate a number to phonemes.  This version is for CARDINAL numbers.
  1578. **     Note: this is recursive.
  1579. */
  1580. say_cardinal(value)
  1581.     long int value;
  1582.     {
  1583.     if (value < 0)
  1584.         {
  1585.         outstring("mAYnAHs ");
  1586.         value = (-value);
  1587.         if (value < 0)    /* Overflow!  -32768 */
  1588.             {
  1589.             outstring("IHnfIHnIHtIY ");
  1590.             return;
  1591.             }
  1592.         }
  1593.  
  1594.     if (value >= 1000000000L)    /* Billions */
  1595.         {
  1596.         say_cardinal(value/1000000000L);
  1597.         outstring("bIHlIYAXn ");
  1598.         value = value % 1000000000;
  1599.         if (value == 0)
  1600.             return;        /* Even billion */
  1601.         if (value < 100)    /* as in THREE BILLION AND FIVE */
  1602.             outstring("AEnd ");
  1603.         }
  1604.  
  1605.     if (value >= 1000000L)    /* Millions */
  1606.         {
  1607.         say_cardinal(value/1000000L);
  1608.         outstring("mIHlIYAXn ");
  1609.         value = value % 1000000L;
  1610.         if (value == 0)
  1611.             return;        /* Even million */
  1612.         if (value < 100)    /* as in THREE MILLION AND FIVE */
  1613.             outstring("AEnd ");
  1614.         }
  1615.  
  1616.     /* Thousands 1000..1099 2000..99999 */
  1617.     /* 1100 to 1999 is eleven-hunderd to ninteen-hunderd */
  1618.     if ((value >= 1000L && value <= 1099L) || value >= 2000L)
  1619.         {
  1620.         say_cardinal(value/1000L);
  1621.         outstring("THAWzAEnd ");
  1622.         value = value % 1000L;
  1623.         if (value == 0)
  1624.             return;        /* Even thousand */
  1625.         if (value < 100)    /* as in THREE THOUSAND AND FIVE */
  1626.             outstring("AEnd ");
  1627.         }
  1628.  
  1629.     if (value >= 100L)
  1630.         {
  1631.         outstring(Cardinals[value/100]);
  1632.         outstring("hAHndrEHd ");
  1633.         value = value % 100;
  1634.         if (value == 0)
  1635.             return;        /* Even hundred */
  1636.         }
  1637.  
  1638.     if (value >= 20)
  1639.         {
  1640.         outstring(Twenties[(value-20)/ 10]);
  1641.         value = value % 10;
  1642.         if (value == 0)
  1643.             return;        /* Even ten */
  1644.         }
  1645.  
  1646.     outstring(Cardinals[value]);
  1647.     return;
  1648.     } 
  1649.  
  1650.  
  1651. /*
  1652. ** Translate a number to phonemes.  This version is for ORDINAL numbers.
  1653. **     Note: this is recursive.
  1654. */
  1655. say_ordinal(value)
  1656.     long int value;
  1657.     {
  1658.  
  1659.     if (value < 0)
  1660.         {
  1661.         outstring("mAHnAXs ");
  1662.         value = (-value);
  1663.         if (value < 0)    /* Overflow!  -32768 */
  1664.             {
  1665.             outstring("IHnfIHnIHtIY ");
  1666.             return;
  1667.             }
  1668.         }
  1669.  
  1670.     if (value >= 1000000000L)    /* Billions */
  1671.         {
  1672.         say_cardinal(value/1000000000L);
  1673.         value = value % 1000000000;
  1674.         if (value == 0)
  1675.             {
  1676.             outstring("bIHlIYAXnTH ");
  1677.             return;        /* Even billion */
  1678.             }
  1679.         outstring("bIHlIYAXn ");
  1680.         if (value < 100)    /* as in THREE BILLION AND FIVE */
  1681.             outstring("AEnd ");
  1682.         }
  1683.  
  1684.     if (value >= 1000000L)    /* Millions */
  1685.         {
  1686.         say_cardinal(value/1000000L);
  1687.         value = value % 1000000L;
  1688.         if (value == 0)
  1689.             {
  1690.             outstring("mIHlIYAXnTH ");
  1691.             return;        /* Even million */
  1692.             }
  1693.         outstring("mIHlIYAXn ");
  1694.         if (value < 100)    /* as in THREE MILLION AND FIVE */
  1695.             outstring("AEnd ");
  1696.         }
  1697.  
  1698.     /* Thousands 1000..1099 2000..99999 */
  1699.     /* 1100 to 1999 is eleven-hunderd to ninteen-hunderd */
  1700.     if ((value >= 1000L && value <= 1099L) || value >= 2000L)
  1701.         {
  1702.         say_cardinal(value/1000L);
  1703.         value = value % 1000L;
  1704.         if (value == 0)
  1705.             {
  1706.             outstring("THAWzAEndTH ");
  1707.             return;        /* Even thousand */
  1708.             }
  1709.         outstring("THAWzAEnd ");
  1710.         if (value < 100)    /* as in THREE THOUSAND AND FIVE */
  1711.             outstring("AEnd ");
  1712.         }
  1713.  
  1714.     if (value >= 100L)
  1715.         {
  1716.         outstring(Cardinals[value/100]);
  1717.         value = value % 100;
  1718.         if (value == 0)
  1719.             {
  1720.             outstring("hAHndrEHdTH ");
  1721.             return;        /* Even hundred */
  1722.             }
  1723.         outstring("hAHndrEHd ");
  1724.         }
  1725.  
  1726.     if (value >= 20)
  1727.         {
  1728.         if ((value%10) == 0)
  1729.             {
  1730.             outstring(Ord_twenties[(value-20)/ 10]);
  1731.             return;        /* Even ten */
  1732.             }
  1733.         outstring(Twenties[(value-20)/ 10]);
  1734.         value = value % 10;
  1735.         }
  1736.  
  1737.     outstring(Ordinals[value]);
  1738.     return;
  1739.     } 
  1740. -------------------------------------------------------------------------------
  1741. **FILE**        End of Distribution
  1742. -------------------------------------------------------------------------------
  1743.  
  1744.                 -John A. Wasser
  1745.  
  1746. Work address:
  1747. ARPAnet:        WASSER%VIKING.DEC@decwrl.ARPA
  1748. Usenet:         {allegra,Shasta,decvax}!decwrl!dec-rhea!dec-viking!wasser
  1749. Easynet:        VIKING::WASSER
  1750. Telephone:      (617)486-2505
  1751. USPS:           Digital Equipment Corp.
  1752.                 Mail stop: LJO2/E4
  1753.                 30 Porter Rd
  1754.                 Littleton, MA  01460
  1755.