home *** CD-ROM | disk | FTP | other *** search
- ############################################################################
- #
- # File: csgen.icn
- #
- # Subject: Program to generate context-sensitive sentences
- #
- # Author: Ralph E. Griswold
- #
- # Date: June 10, 1988
- #
- ###########################################################################
- #
- # This program accepts a context-sensitive production grammar
- # and generates randomly selected sentences from the corresponding
- # language.
- #
- # Uppercase letters stand for nonterminal symbols and -> indi-
- # cates the lefthand side can be rewritten by the righthand side.
- # Other characters are considered to be terminal symbols. Lines
- # beginning with # are considered to be comments and are ignored.
- # A line consisting of a nonterminal symbol followed by a colon and
- # a nonnegative integer i is a generation specification for i
- # instances of sentences for the language defined by the nontermi-
- # nal (goal) symbol. An example of input to csgen is:
- #
- # # a(n)b(n)c(n)
- # # Salomaa, p. 11.
- # # Attributed to M. Soittola.
- # #
- # X->abc
- # X->aYbc
- # Yb->bY
- # Yc->Zbcc
- # bZ->Zb
- # aZ->aaY
- # aZ->aa
- # X:10
- #
- # The output of csgen for this example is
- #
- # aaabbbccc
- # aaaaaaaaabbbbbbbbbccccccccc
- # abc
- # aabbcc
- # aabbcc
- # aaabbbccc
- # aabbcc
- # abc
- # aaaabbbbcccc
- # aaabbbccc
- #
- #
- # A positive integer followed by a colon can be prefixed to a
- # production to replicate that production, making its selection
- # more likely. For example,
- #
- # 3:X->abc
- #
- # is equivalent to
- #
- # X->abc
- # X->abc
- # X->abc
- #
- # Option: The -t option writes a trace of the derivations to stan-
- # dard error output.
- #
- # Limitations: Nonterminal symbols can only be represented by sin-
- # gle uppercase letters, and there is no way to represent uppercase
- # letters as terminal symbols.
- #
- # There can be only one generation specification and it must
- # appear as the last line of input.
- #
- # Comments: Generation of context-sensitive strings is a slow pro-
- # cess. It may not terminate, either because of a loop in the
- # rewriting rules or because of the progressive accumulation of
- # nonterminal symbols. The program avoids deadlock, in which there
- # are no possible rewrites for a string in the derivation.
- #
- # This program would be improved if the specification of nonter-
- # minal symbols were more general, as in rsg.
- #
- ############################################################################
- #
- # Links: options
- #
- ############################################################################
-
- link options
-
- global xlist
-
- procedure main(args)
- local line, goal, count, s, opts, deadlock
-
- opts := options(args,"x")
- deadlock := \opts["x"]
- while line := read() do # read in grammar
- if line[1] == "#" then next
- else if xpairs(line) then next
- else {
- line ? (goal := move(1),move(1),count := (0 < integer(tab(0))))
- break
- }
- if /count then stop("no goal specification")
- every 1 to count do { # generate sentences
- s := goal
- while upto(&ucase,s) do { # test for nonterminal
- if \deadlock then write(&errout,s)
- # quit on deadlock
- if not(s ? replace(!xlist)) then break next
- until s ?:= replace(?xlist) # make replacement
- }
- write(s)
- }
- end
-
- # replace left hand side by right hand side
- #
- procedure replace(a)
- suspend tab(find(a[1])) || (move(*a[1]),a[2]) || tab(0)
- end
-
- # enter rewriting rule
- #
- procedure xpairs(s)
- local i, a
- initial xlist := []
- if s ? {
- # handle optional replication factor
- i := 1(0 < integer(tab(upto(':'))),move(1)) | 1 &
- a := [tab(find("->")),(move(2),tab(0))]
- }
- then {
- every 1 to i do put(xlist,a)
- return
- }
- end
-