home *** CD-ROM | disk | FTP | other *** search
Text File | 1989-05-09 | 9.4 KB | 366 lines | [TEXT/PICN] |
- ############################################################################
- #
- # rsg.icn
- #
- # This program generates randomly selected strings ("sen-
- # tences") from a grammar specified by the user. Grammars are
- # basically context-free and resemble BNF in form, although there
- # are a number of extensions.
- #
- # The program works interactively, allowing the user to build,
- # test, modify, and save grammars. Input to rsg consists of various
- # kinds of specifications, which can be intermixed:
- #
- # Productions define nonterminal symbols in a syntax similar to
- # the rewriting rules of BNF with various alternatives consisting
- # of the concatenation of nonterminal and terminal symbols. Gen-
- # eration specifications cause the generation of a specified number
- # of sentences from the language defined by a given nonterminal
- # symbol. Grammar output specifications cause the definition of a
- # specified nonterminal or the entire current grammar to be written
- # to a given file. Source specifications cause subsequent input to
- # be read from a specified file.
- #
- # In addition, any line beginning with # is considered to be a
- # comment, while any line beginning with = causes the rest of that
- # line to be used subsequently as a prompt to the user whenever rsg
- # is ready for input (there normally is no prompt). A line consist-
- # ing of a single = stops prompting.
- #
- # Productions: Examples of productions are:
- #
- # <expr>::=<term>|<term>+<expr>
- # <term>::=<elem>|<elem>*<term>
- # <elem>::=x|y|z|(<expr>)
- #
- # Productions may occur in any order. The definition for a nonter-
- # minal symbol can be changed by specifying a new production for
- # it.
- #
- # There are a number of special devices to facilitate the defin-
- # ition of grammars, including eight predefined, built-in nontermi-
- # nal symbols:
- #
- # symbol definition
- # <lb> <
- # <rb> >
- # <vb> |
- # <nl> newline
- # <> empty string
- # <&lcase> any single lowercase letter
- # <&ucase> any single uppercase letter
- # <&digit> any single digit
- #
- # In addition, if the string between a < and a > begins and ends
- # with a single quotation mark, it stands for any single character
- # between the quotation marks. For example,
- #
- # <'xyz'>
- #
- # is equivalent to
- #
- # x|y|z
- #
- # Generation Specifications: A generation specification consists of
- # a nonterminal symbol followed by a nonnegative integer. An exam-
- # ple is
- #
- # <expr>10
- #
- # which specifies the generation of 10 <expr>s. If the integer is
- # omitted, it is assumed to be 1. Generated sentences are written
- # to standard output.
- #
- # Grammar Output Specifications: A grammar output specification
- # consists of a nonterminal symbol, followed by ->, followed by a
- # file name. Such a specification causes the current definition of
- # the nonterminal symbol to be written to the given file. If the
- # file is omitted, standard output is assumed. If the nonterminal
- # symbol is omitted, the entire grammar is written out. Thus,
- #
- # ->
- #
- # causes the entire grammar to be written to standard output.
- #
- # Source Specifications: A source specification consists of @ fol-
- # lowed by a file name. Subsequent input is read from that file.
- # When an end of file is encountered, input reverts to the previous
- # file. Input files can be nested.
- #
- # Options: The following parameter string options are available:
- #
- # -s n Set the seed for random generation to n. The default
- # seed is 0.
- #
- # -l n Terminate generation if the number of symbols remaining
- # to be processed exceeds n. The default is limit is 1000.
- #
- # -t Trace the generation of sentences. Trace output goes to
- # standard error output.
- #
- # Diagnostics: Syntactically erroneous input lines are noted but
- # are otherwise ignored. Specifications for a file that cannot be
- # opened are noted and treated as erroneous.
- #
- # If an undefined nonterminal symbol is encountered during gen-
- # eration, an error message that identifies the undefined symbol is
- # produced, followed by the partial sentence generated to that
- # point. Exceeding the limit of symbols remaining to be generated
- # as specified by the -l option is handled similarly.
- #
- # Caveats: Generation may fail to terminate because of a loop in
- # the rewriting rules or, more seriously, because of the progres-
- # sive accumulation of nonterminal symbols. The latter problem can
- # be identified by using the -t option and controlled by using the
- # -l option. The problem often can be circumvented by duplicating
- # alternatives that lead to fewer rather than more nonterminal sym-
- # bols. For example, changing
- #
- # <term>::=<elem>|<elem>*<term>
- #
- # to
- #
- # <term>::=<elem>|<elem>|<elem>*<term>
- #
- # increases the probability of selecting <elem> from 1/2 to 2/3.
- #
- # There are many possible extensions to the program. One of the
- # most useful would be a way to specify the probability of select-
- # ing an alternative.
- #
- ############################################################################
- #
- # Links: getopt
- #
- ############################################################################
- link getopt
-
- global defs, filelist, in, limit, prompt, tswitch
-
- record nonterm(name)
- record charset(chars)
-
- procedure main(args)
- local line, plist, s, opts
- # procedures to try on input lines
- plist := [define,generate,grammar,source,comment,prompter,error]
- defs := table() # table of definitions
- defs["lb"] := [["<"]] # built-in definitions
- defs["rb"] := [[">"]]
- defs["vb"] := [["|"]]
- defs["nl"] := [["\n"]]
- defs[""] := [[""]]
- defs["&lcase"] := [[charset(&lcase)]]
- defs["&ucase"] := [[charset(&ucase)]]
- defs["&digit"] := [[charset(&digits)]]
-
- opts := getopt(args,"tl+s+")[1]
- limit := \opts["l"] | 1000
- tswitch := \opts["t"]
- &random := \opts["s"]
-
- filelist := [&input] # stack of input files
- prompt := ""
- while in := pop(filelist) do { # process all files
- repeat {
- line := read(in) | break
- while line[-1] == "\\" do line := line[1:-1] || read(in) | break
- (!plist)(line)
- }
- close(in)
- }
- end
-
- # process alternatives
- #
- procedure alts(defn)
- local alist
- alist := []
- defn ? while put(alist,syms(tab(upto('|') | 0))) do move(1) | break
- return alist
- end
-
- # look for comment
- #
- procedure comment(line)
- if line[1] == "#" then return
- end
-
- # look for definition
- #
- procedure define(line)
- return line ?
- defs[(="<",tab(find(">::=")))] := (move(4),alts(tab(0)))
- end
-
- # define nonterminal
- #
- procedure defnon(sym)
- local chars, name
- if sym ? {
- ="'" &
- chars := cset(tab(-1)) &
- ="'"
- }
- then return charset(chars)
- else return nonterm(sym)
- end
-
- # note erroneous input line
- #
- procedure error(line)
- write("*** erroneous line: ",line)
- return
- end
-
- # generate sentences
- #
- procedure gener(goal)
- local pending, symbol
- pending := [nonterm(goal)]
- while symbol := get(pending) do {
- if \tswitch then
- write(&errout,symimage(symbol),listimage(pending))
- case type(symbol) of {
- "string": writes(symbol)
- "charset": writes(?symbol.chars)
- "nonterm": {
- pending := ?\defs[symbol.name] ||| pending | {
- write(&errout,"*** undefined nonterminal: <",symbol.name,">")
- break
- }
- if *pending > \limit then {
- write(&errout,"*** excessive symbols remaining")
- break
- }
- }
- }
- }
- write()
- end
-
- # look for generation specification
- #
- procedure generate(line)
- local goal, count
- if line ? {
- ="<" &
- goal := tab(upto('>')) \ 1 &
- move(1) &
- count := (pos(0) & 1) | integer(tab(0))
- }
- then {
- every 1 to count do
- gener(goal)
- return
- }
- else fail
- end
-
- # get right hand side of production
- #
- procedure getrhs(a)
- local rhs
- rhs := ""
- every rhs ||:= listimage(!a) || "|"
- return rhs[1:-1]
- end
-
- # look for request to write out grammar
- #
- procedure grammar(line)
- local file, out, name
- if line ? {
- name := tab(find("->")) &
- move(2) &
- file := tab(0) &
- out := if *file = 0 then &output else {
- open(file,"w") | {
- write(&errout,"*** cannot open ",file)
- fail
- }
- }
- }
- then {
- (*name = 0) | (name[1] == "<" & name[-1] == ">") | fail
- pwrite(name,out)
- if *file ~= 0 then close(out)
- return
- }
- else fail
- end
-
- # produce image of list of grammar symbols
- #
- procedure listimage(a)
- local s, x
- s := ""
- every x := !a do
- s ||:= symimage(x)
- return s
- end
-
- # look for new prompt symbol
- #
- procedure prompter(line)
- if line[1] == "=" then {
- prompt := line[2:0]
- return
- }
- end
-
- # write out grammar
- #
- procedure pwrite(name,ofile)
- local nt, a
- static builtin
- initial builtin := ["lb","rb","vb","nl","","&lcase","&ucase","&digit"]
- if *name = 0 then {
- a := sort(defs,3)
- while nt := get(a) do {
- if nt == !builtin then {
- get(a)
- next
- }
- write(ofile,"<",nt,">::=",getrhs(get(a)))
- }
- }
- else write(ofile,name,"::=",getrhs(\defs[name[2:-1]])) |
- write("*** undefined nonterminal: ",name)
- end
-
- # look for file with input
- #
- procedure source(line)
- local new
- return line ? {
- ="@" &
- (new := open(tab(0))) &
- push(filelist,in) &
- (in := new)
- }
-
- end
-
- # produce string image of grammar symbol
- #
- procedure symimage(x)
- return case type(x) of {
- "string": x
- "nonterm": "<" || x.name || ">"
- "charset": "<'" || x.chars || "'>"
- }
- end
-
- # process the symbols in an alternative
- #
- procedure syms(alt)
- local slist
- static nonbrack
- initial nonbrack := ~'<'
- slist := []
- alt ? while put(slist,tab(many(nonbrack)) |
- defnon(2(="<",tab(upto('>')),move(1))))
- return slist
- end
-