home *** CD-ROM | disk | FTP | other *** search
- Path: sparky!uunet!spool.mu.edu!olivea!pagesat!spssig.spss.com!uchinews!ellis!goer
- From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
- Newsgroups: comp.lang.icon
- Subject: parser generator, part 2
- Message-ID: <1993Jan3.211844.28467@midway.uchicago.edu>
- Date: 3 Jan 93 21:18:44 GMT
- Sender: news@uchinews.uchicago.edu (News System)
- Reply-To: goer@midway.uchicago.edu
- Organization: University of Chicago
- Lines: 926
-
- ---- Cut Here and feed the following to sh ----
- #!/bin/sh
- # this is ibpag.02 (part 2 of a multipart archive)
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file ibpag.icn continued
- #
- if test ! -r _shar_seq_.tmp; then
- echo 'Please unpack part 1 first!'
- exit 1
- fi
- (read Scheck
- if test "$Scheck" != 2; then
- echo Please unpack part "$Scheck" next!
- exit 1
- else
- exit 0
- fi
- ) < _shar_seq_.tmp || exit 1
- if test ! -f _shar_wnt_.tmp; then
- echo 'x - still skipping ibpag.icn'
- else
- echo 'x - continuing file ibpag.icn'
- sed 's/^X//' << 'SHAR_EOF' >> 'ibpag.icn' &&
- X write(f2, "\t\t # be in, and push that state onto the state stack.")
- X write(f2, "\t\t #")
- X write(f2, "\t\t push(state_stack,")
- X write(f2, "\t\t\t glst[state_stack[1]][act.sym])")
- X write(f2, "\t\t #")
- X write(f2, "\t\t # Call the code associated with the current")
- X write(f2, "\t\t # reduction, and push the result onto the stack.")
- X write(f2, "\t\t # For more results, push a coexpression instead.")
- X write(f2, "\t\t #")
- X write(f2, "\t\t push(value_stack, (proc(act.procname)!arglist)) | {")
- X write(f2, "\t\t\t# On failure, return the stacks to the state")
- X write(f2, "\t\t\t# they were just after the last reduction (i.e.")
- X write(f2, "\t\t\t# before any tokens for the current production")
- X write(f2, "\t\t\t# were pushed onto the stack).")
- X write(f2, "\t\t\tpop(state_stack)")
- X write(f2, "\t\t\treturn iparse_error(alst, state_stack, value_stack,")
- X write(f2, "\t\t\t\t\t token, next_token, err_state + 1)")
- X write(f2, "\t\t }")
- X write(f2, "\t\t}")
- X write(f2, "\t\t\"AC\" : {")
- X write(f2, "\t\t #")
- X write(f2, "\t\t # We're done. Return the last result.")
- X write(f2, "\t\t #")
- X write(f2, "\t\t return value_stack[1]")
- X write(f2, "\t }")
- X write(f2, "\t }")
- X write(f2, "\t}")
- X write(f2, " }")
- X write(f2, " write(&errout, \"iparse: unexpected end of input\")")
- X write(f2, " fail")
- X write(f2, "")
- X write(f2, "end")
- X write(f2, "")
- X write(f2, "")
- X write(f2, "#")
- X write(f2, "# iparse_error: list x list x list x TOK x coexpression x integer -> ?")
- X write(f2, "# (alst, state_stack, value_stack, token,")
- X write(f2, "#\t\t\t next_token, err_state) -> ?")
- X write(f2, "#")
- X write(f2, "# Where alst is the action list, where state_stack is the state")
- X write(f2, "# stack used by iparse, where value stack is the value stack used")
- X write(f2, "# by iparse, where token is the current lookahead TOK record,")
- X write(f2, "# where next_token is the coexpression from which we get our")
- X write(f2, "# tokens, and where err_state indicates how many recursive calls")
- X write(f2, "# we've made to the parser via the error handler without a")
- X write(f2, "# recovery.")
- X write(f2, "#")
- X write(f2, "# Recursively calls iparse, attempting to restart the parser after")
- X write(f2, "# an error. Increments global \"errors\" variable (a count of the")
- X write(f2, "# number of errors encountered, minus cascading series of errors).")
- X write(f2, "#")
- X write(f2, "procedure iparse_error(alst, state_stack, value_stack,")
- X write(f2, "\t\t token, next_token, err_state)")
- X write(f2, "")
- X write(f2, " local sym, i, state_stack2, value_stack2, next_token2")
- X write(f2, " static tlst")
- X write(f2, " #global line_number, errors")
- X write(f2, " initial {")
- X every lname := "tlst" do {
- X encode(variable(lname)) ? {
- X writes(f2, "\t", lname, " := decode(\"")
- X if write(f2, move(47), "_") then {
- X while write(f2, "\t ",move(60), "_")
- X write(f2, "\t ", tab(0), "\")")
- X }
- X else write(f2, tab(0), "\")")
- X }
- X }
- X write(f2, " }")
- X write(f2, "")
- X write(f2, " #")
- X write(f2, " # Check to see how many attempts we have made at a resync. If")
- X write(f2, " # this is a new error series, increment the global \"errors\" count.")
- X write(f2, " #")
- X write(f2, " if err_state > 3 then {")
- X write(f2, "\tif \\fail_on_error then fail")
- X write(f2, "\telse stop(\"iparse_error: unable to resync after error; aborting\")")
- X write(f2, " }")
- X write(f2, " if err_state = 1 then")
- X write(f2, "\terrors +:= 1\t\t# errors is global")
- X write(f2, "")
- X write(f2, " # If \"error\" is in tlst, then there are error productions in the")
- X write(f2, " # grammar. See if we can back into one from here. Don't try this")
- X write(f2, " # for error states greater than 1. Otherwise we'll get a")
- X write(f2, " # cascading series of stack truncations.")
- X write(f2, " #")
- X write(f2, " if err_state = 1 then {")
- X write(f2, "\tif member(tlst, \"error\") then {")
- X write(f2, "\t every i := 1 to 2 do {")
- X write(f2, "\t\tif \\alst[state_stack[i]][\"error\"] then {")
- X write(f2, "\t\t state_stack2 := state_stack[i:0] | break")
- X write(f2, "\t\t value_stack2 := value_stack[i:0]")
- X write(f2, "\t\t next_token2 := create TOK(\"error\") | token | |@next_token")
- X write(f2, "\t\t return iparse(&null, state_stack2, value_stack2,")
- X write(f2, "\t\t\t\t next_token2, err_state)")
- X write(f2, "\t\t}")
- X write(f2, "\t }")
- X write(f2, "\t}")
- X write(f2, " }")
- X write(f2, "")
- X write(f2, " if \\fail_on_error then fail")
- X write(f2, " #")
- X write(f2, " # Check to see if the grammar even has this pre-terminal.")
- X write(f2, " #")
- X write(f2, " if not member(tlst, token.sym) then {")
- X write(f2, "\twrites(&errout, \"iparse_error: unknown token, \", token.sym)")
- X write(f2, "\twrite(\", in line \", 0 < \\line_number) | write()")
- X write(f2, " }")
- X write(f2, " # Only note the first in a series of cascading errors.")
- X write(f2, " else if err_state = 1 then {")
- X write(f2, "\twrites(&errout, \"iparse_error: syntax error\")")
- X write(f2, "\twrite(\" line \", 0 < \\line_number) | write()")
- X write(f2, " }")
- X write(f2, "")
- X write(f2, " #")
- X write(f2, " # Now, try to shift in the next input token to see if we can")
- X write(f2, " # resync the parser. Stream argument is null because next_token")
- X write(f2, " # has already been created.")
- X write(f2, " #")
- X write(f2, " return iparse(&null, state_stack, value_stack, next_token, err_state)")
- X write(f2, "")
- X write(f2, "end")
- X write(f2, "link structs")
- X write(f2, "")
- X write(f2, "#")
- X write(f2, "# dump_lists: file x list x list -> (null)")
- X write(f2, "# (f, gl, al) -> (null)")
- X write(f2, "#")
- X write(f2, "# Where f is an open file, gl is the goto list, and al is the")
- X write(f2, "# action list. Writes to file f a human-readable dump of the goto")
- X write(f2, "# and action list.")
- X write(f2, "#")
- X write(f2, "procedure dump_lists(f, al, gl)")
- X write(f2, "")
- X write(f2, " local TAB, look_list, red_list, i, sym, act")
- X write(f2, "")
- X write(f2, " TAB := \"\\t\"")
- X write(f2, " look_list := list()")
- X write(f2, " red_list := list()")
- X write(f2, "")
- X write(f2, " every i := 1 to *al do {")
- X write(f2, "\tevery INSERT(look_list, key(\\al[i]))")
- X write(f2, "\tif /al[i] then")
- X write(f2, "\t write(&errout, \"dump_lists: warning! state \", i, \" is null\")")
- X write(f2, " }")
- X write(f2, "")
- X write(f2, " writes(f, TAB)")
- X write(f2, " every i := 1 to *look_list do")
- X write(f2, "\twrites(f, look_list[i], TAB)")
- X write(f2, " write(f)")
- X write(f2, " every i := 1 to *al do {")
- X write(f2, "\twrites(f, i, TAB)")
- X write(f2, "\tact := \"\"")
- X write(f2, "\tevery sym := !look_list do {")
- X write(f2, "\t if \\al[i][sym] then {")
- X write(f2, "\t\t# al[i][sym][1] will fail for the accept action; hence")
- X write(f2, "\t\t# the \"\". Otherwise al[i][sym][1] selects that state")
- X write(f2, "\t\t# field of a SH or RE record.")
- X write(f2, "\t\twrites(f, map(type(al[i][sym])), al[i][sym][1] | \"\")")
- X write(f2, "\t\tif type(al[i][sym]) == \"RE\" then {")
- X write(f2, "\t\t INSERT(red_list, al[i][sym].sym)")
- X write(f2, "\t\t writes(f, al[i][sym].sym)")
- X write(f2, "\t\t}")
- X write(f2, "\t }")
- X write(f2, "\t writes(f,TAB)")
- X write(f2, "\t}")
- X write(f2, "\twrite(f)")
- X write(f2, " }")
- X write(f2, " write(f)")
- X write(f2, "")
- X write(f2, " writes(f, TAB)")
- X write(f2, " every i := 1 to *red_list do")
- X write(f2, "\twrites(f, red_list[i], TAB)")
- X write(f2, " write(f)")
- X write(f2, " every i := 1 to *gl do {")
- X write(f2, "\twrites(f, i, TAB)")
- X write(f2, "\tact := \"\"")
- X write(f2, "\tevery sym := !red_list do {")
- X write(f2, "\t if \\(\\gl[i])[sym] then")
- X write(f2, "\t\twrites(f, gl[i][sym])")
- X write(f2, "\t writes(f, TAB)")
- X write(f2, "\t}")
- X write(f2, "\twrite(f)")
- X write(f2, " }")
- X write(f2, "")
- X write(f2, "end")
- X write(f2, "")
- X write(f2, "#")
- X write(f2, "# INSERT: set or list x record -> set or list")
- X write(f2, "# \t (sset, rec) -> sset")
- X write(f2, "#")
- X write(f2, "# Where sset is a homogenous set or list of records, rec is a")
- X write(f2, "# record, and the return value is sset, with rec added, iff an")
- X write(f2, "# equivalent record was not there already. Otherwise, sset is")
- X write(f2, "# returned unchanged. INSERT(), _unlike insert(), FAILS IF REC")
- X write(f2, "# IS ALREADY PRESENT IN SSET.")
- X write(f2, "#")
- X write(f2, "# This procedure is used by dump_lists() above. If you delete")
- X write(f2, "# dump_lists(), delete this as well, as also Equiv() below.")
- X write(f2, "#")
- X write(f2, "procedure INSERT(sset, rec)")
- X write(f2, "")
- X write(f2, " local addto, Eq")
- X write(f2, " #")
- X write(f2, " # Decide how to add members to sset, depending on its type.")
- X write(f2, " #")
- X write(f2, "\tcase type(sset) of {")
- X write(f2, "\t \"set\" : { addto := insert; Eq := equiv }")
- X write(f2, "\t \"list\" : { addto := put; Eq := Equiv }")
- X write(f2, "\t default : stop(\"INSERT: wrong type argument (\",type(sset),\")\")")
- X write(f2, "\t}")
- X write(f2, "")
- X write(f2, " # Rudumentary error check to be sure the object to be inserted")
- X write(f2, " # into sset is of the same time as the objects already there.")
- X write(f2, " #")
- X write(f2, " if *sset > 0 then")
- X write(f2, "\ttype(rec) == type(sset[1]) |")
- X write(f2, "\t stop(\"INSERT: unexpected type difference\")")
- X write(f2, "")
- X write(f2, " #")
- X write(f2, " # If a rec-like item isn't in sset, add it to sset.")
- X write(f2, " #")
- X write(f2, " if Eq(!sset, rec) then fail")
- X write(f2, " else return addto(sset, rec)")
- X write(f2, "")
- X write(f2, "end")
- X write(f2, "\t")
- X write(f2, "")
- X write(f2, "#")
- X write(f2, "# Equiv: struct x struct -> struct")
- X write(f2, "# (x1, x2) -> x2")
- X write(f2, "#")
- X write(f2, "# Where x1 and x2 are arbitrary structures. Returns x2 if x1 and")
- X write(f2, "# x2 are structurally equivalent (even if not identical). Taken")
- X write(f2, "# from the IPL file \"structs.icn,\" and gutted so that it assumes")
- X write(f2, "# all structures are \"ordered\" (i.e. not sets or tables). Has no")
- X write(f2, "# way of handling procedures or files, either. (Pretty limited,")
- X write(f2, "# huh?)")
- X write(f2, "#")
- X write(f2, "procedure Equiv(x1, x2, done)")
- X write(f2, "")
- X write(f2, " local code, i")
- X write(f2, "")
- X write(f2, " if x1 === x2 then return x2\t\t# Covers everything but structures.")
- X write(f2, " if type(x1) ~== type(x2) then fail\t# Must be same type.")
- X write(f2, " if *x1 ~= *x2 then fail")
- X write(f2, "")
- X write(f2, " image(x1) ? (code := (=\"record\" | type(x1)))")
- X write(f2, " case code of {")
- X write(f2, " \"list\" | \"record\" :")
- X write(f2, "\t every i := *x1 to 1 by -1 do")
- X write(f2, "\t Equiv(x1[i],x2[i]) | fail")
- X write(f2, " \"set\" | \"table\" : stop(\"error: Equiv used (wrongly) for equiv.\")")
- X write(f2, " \"procedure\" | \"file\" : stop(\"error: Equiv used (wrongly) for equiv.\")")
- X write(f2, " default : fail")
- X write(f2, " }")
- X write(f2, " return x2")
- X write(f2, "")
- X write(f2, "end")
- X
- X if \DEBUG then
- X dump_lists(&errout, alst, glst)
- X if \VERBOSE then
- X write(&errout, "Done.")
- X
- Xend
- SHAR_EOF
- echo 'File ibpag.icn is complete' &&
- true || echo 'restore of ibpag.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= maketbls.icn ==============
- if test -f 'maketbls.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping maketbls.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting maketbls.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'maketbls.icn' &&
- X############################################################################
- X#
- X# Name: maketbls.icn
- X#
- X# Title: make (state & goto) tables for IBPAG
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.27
- X#
- X############################################################################
- X#
- X# Given a table of productions (global, ptbl; see makeptbl.icn),
- X# CONST_TABLE (below) creates a state and goto table, which ibpag.icn
- X# then merges with the original source file.
- X#
- X############################################################################
- X#
- X# Links: codeobj, rewrap
- X#
- X# See also: ibpag.icn, preproc.icn
- X#
- X############################################################################
- X
- X# link codeobj, rewrap
- Xlink codeobj
- X
- Xrecord item(LHS, RHS, POS, LOOK, by_rule)
- Xrecord ACT(str, state, by_rule, POS, sym, size)
- X
- X# Declared in preproc.icn -
- X# record symbol(str, terminal)
- X# record rule(LHS, RHS, priority, associativity, procname)
- X
- X# start_symbol is set to "S" by default in CONST_STATES()
- Xglobal ptbl, alst, glst, start_symbol
- X# declared in ibpag.icn
- X# global DEBUG, VERBOSE
- X
- X#
- X# CONST_TABLE: -> (null)
- X#
- X# Operates entirely via side-effects. Alst will become the action
- X# list and glst will become the goto list. The action list is
- X# used to determine whether to shift, reduce, or accept; the goto
- X# list indicates what state to go to after a reduction. Their
- X# format, in general, is: Offset = state; value = table of
- X# directives. They are, in other words, lists of tables.
- X#
- Xprocedure CONST_TABLE()
- X
- X local C, i, j, l, it, act, next_state
- X static big_item
- X initial big_item :=
- X item(start_symbol || "'", [symbol(start_symbol)], 2, "$", rule(,,1))
- X
- X C := CONST_STATES()
- X alst := list(*C); every !alst := table()
- X glst := list(*C); every !glst := table()
- X
- X every l := C[i := 1 to *C] do {
- X if \VERBOSE then
- X write(&errout, "CONST_TABLE: entering actions for state ", i)
- X every it := !l do {
- X # If we have a complete production, enter a reduce action
- X # into the action list. A special sub-case of reduce is
- X # accept (which occurs when the state contains (S' -> S.,
- X # $)).
- X if it.POS > *it.RHS then {
- X if Equiv(it, big_item)
- X then act := ACT("accept", &null, it.by_rule, it.POS)
- X else act := ACT("reduce", &null, it.by_rule, it.POS)
- X # Check to see if we have a conflict; if so, resolve.
- X if not (/alst[i][it.LOOK] := act) then
- X resolve(alst, act, it.LOOK, i)
- X }
- X else {
- X # If it's a terminal, see if GOTO_ITEMS for that
- X # symbol and the current state = another state; if so,
- X # enter shift + a jump to that state into the action
- X # list.
- X if \it.RHS[it.POS].terminal then {
- X next_state := GOTO_ITEMS(l, it.RHS[it.POS])
- X if Equiv(next_state, C[j := 1 to *C]) then {
- X# C[j] := next_state
- X # create an action to enter into the action list
- X act := ACT("shift", j, it.by_rule, it.POS)
- X # If the table entry is occupied, resolve conflict.
- X if not (/alst[i][it.RHS[it.POS].str] := act) then
- X resolve(alst, act, it.RHS[it.POS].str, i)
- X }
- X }
- X }
- X }
- X }
- X
- X glst := list(*C)
- X # Do we ever get conflicts here?
- X every l := C[i := 1 to *C] do {
- X if \VERBOSE then
- X write(&errout, "CONST_TABLE: entering gotos for state ", i)
- X every it := !l do {
- X \it.RHS[it.POS].terminal & next
- X next_state := GOTO_ITEMS(l, it.RHS[it.POS])
- X if Equiv(next_state, C[j := 1 to *C]) then {
- X# C[j] := next_state
- X # If the dot is at the end of the RHS, then we can't
- X # enter any goto for that state. There has to be a
- X # nonterminal after the dot.
- X if it.RHS[it.POS] then {
- X /glst[i] := table()
- X glst[i][it.RHS[it.POS].str] := j
- X }
- X }
- X }
- X }
- X
- X return
- X
- Xend
- X
- X
- X#
- X# resolve: resolve conflicts in action list
- X#
- X# Abort on reduce/reduce conflicts. There is no reason why these
- X# should be present in the grammar. Resolve shift/reduce
- X# conflicts in favor of a shift, in cases where the priorities are
- X# the same, unless the first rule is left associative (which
- X# implies a reduce). Shift/reduce conflicts for rules without an
- X# associativity are errors, and bring about termination of
- X# processing.
- X#
- Xprocedure resolve(l, act, subscr, i)
- X
- X if Equiv(l[i][subscr], act)
- X then fail
- X # Use the rule with the highest priority.
- X if l[i][subscr].by_rule.priority ~= act.by_rule.priority then {
- X if \VERBOSE then
- X show_conflict(act, l[i][subscr], subscr, i)
- X if l[i][subscr].by_rule.priority < act.by_rule.priority then {
- X l[i][subscr] := act
- X if \VERBOSE then
- X write(&errout, "first rule's precedence is higher")
- X } else {
- X if \VERBOSE then
- X write(&errout, "second rule's precedence is higher")
- X }
- X }
- X # precedences are the same; resolve via associativity and defaults
- X else {
- X #
- X # If the priorities are the same, then resolve the conflict or
- X # abort.
- X #
- X # Still to be done: Handle associativities.
- X #
- X case act.str of {
- X "shift" : {
- X if l[i][subscr].str == "reduce" then {
- X if \VERBOSE then {
- X show_conflict(act, l[i][subscr], subscr, i)
- X write(&errout, "first rule is ",
- X act.by_rule.associativity, " associative")
- X if act.by_rule.associativity ~==
- X l[i][subscr].by_rule.associativity
- X then write(&errout, "associativities differ!")
- X }
- X case act.by_rule.associativity of {
- X "none" : oh_no(&null, 50)
- X "left" : if \VERBOSE then
- X write(&errout, "resolving in favor of reduce")
- X "right": {
- X l[i][subscr] := act
- X if \VERBOSE then
- X write(&errout, "resolving in favor of shift")
- X }
- X }
- X }
- X # else do nothing -
- X # Shift-shift conflicts are not errors. A shift is a
- X # shift.
- X }
- X "reduce" : {
- X if l[i][subscr].str == "reduce" then {
- X # Flag reduce-reduce conflicts as errors for now.
- X # Yacc uses the first rule in the grammar.
- X show_conflict(act, l[i][subscr], subscr, i)
- X oh_no(&null, 51)
- X }
- X else {
- X if \VERBOSE then {
- X show_conflict(act, l[i][subscr], subscr, i)
- X write(&errout, "first rule is ",
- X act.by_rule.associativity, " associative")
- X if act.by_rule.associativity ~==
- X l[i][subscr].by_rule.associativity
- X then write(&errout, "associativities differ!")
- X }
- X case act.by_rule.associativity of {
- X "none" : oh_no(&null, 50)
- X "right": if \VERBOSE then
- X write(&errout, "resolving in favor of shift")
- X "left" : {
- X l[i][subscr] := act
- X if \VERBOSE then
- X write(&errout, "resolving in favor of reduce")
- X }
- X }
- X }
- X }
- X }
- X }
- X return
- X
- Xend
- X
- X
- X#
- X# CONST_STATES: -> list of lists
- X# -> C
- X#
- X# Where C is a list of lists containing item records. Each list
- X# in C represents a state. Uses the global table ptbl, which is
- X# of the form keys = LHS, values = lists of rule records.
- X#
- X# Calls itself recursively, and in this case takes one argument.
- X# On the first call introduces the production (S' -> .S, $), which
- X# is used as the first state in C from which the others are built.
- X#
- X# Argument two (i) is used for recursive calls and should be
- X# ignored.
- X#
- Xprocedure CONST_STATES(C, i)
- X
- X local C2, it, sym, item_list, next_items
- X # global ptbl, start_symbol
- X
- X # write(&errout, "CONST_STATES: performing closure on S'")
- X /C := [ CLOSURE([item(start_symbol || "'",
- X [symbol(start_symbol)], 1, "$", rule(,,1))]) ]
- X C2 := copy(C)
- X /i := 0
- X
- X every item_list := C[i := i+1 to *C] do {
- X if \VERBOSE then
- X write(&errout, "CONST_STATES: examining item list #", i)
- X if \DEBUG then
- X write(&errout, item_list_2_string(C[i]))
- X next_items := list()
- X every it := !item_list do
- X INSERT(next_items, it.RHS[it.POS])
- X every sym := !next_items do
- X INSERT(C2, GOTO_ITEMS(item_list, sym))
- X }
- X if *C2 > *C
- X then return CONST_STATES(C2, i)
- X else return C
- X
- Xend
- X
- X
- X#
- X# FIRST(RHS): list of symbol records -> set
- X# (RHS) -> fset
- X#
- X# Where RHS is the remaining symbols in some item after the "dot,"
- X# where fset is a set of strings representing terminals beginning
- X# sequences derivable from X. (A production is a statement in the
- X# grammar of the form LHS -> RHS, where LHS is a nonterminal
- X# symbol, and RHS is a sequence of zero or more terminal or
- X# nonterminal symbols; here productions are implemented via rule
- X# records.) If passed an empty RHS, FIRST returns another empty
- X# list.
- X#
- X# FIRST() uses the global grammar table ptbl.
- X#
- X# The structure of ptbl: key = LHS (string), value = rule list
- X# i.e. list of rule records). Keys are always nonterminals, as
- X# there is no need to record terminals (they appear only in the
- X# RHS of rules).
- X#
- Xprocedure FIRST(RHS, seen)
- X
- X local X, fset, i, check_later_list, chunk
- X #global ptbl
- X
- X # write(&errout, ximage(RHS))
- X fset := set()
- X
- X every X := !RHS do {
- X
- X delete(fset, "")
- X
- X # If X is a terminal symbol, just stick it into fset.
- X if \X.terminal then
- X insert(fset, X.str)
- X else {
- X #
- X # X is not a terminal, check to see if X -> aA is a
- X # production (where a is a terminal, and A is any series
- X # of terminals, nonterminals, or nothing at all...).
- X #
- X /seen := set()
- X insert(seen, X.str)
- X check_later_list := []
- X every i := 1 to *ptbl[X.str] do {
- X if \ptbl[X.str][i].RHS[1].terminal
- X then insert(fset, ptbl[X.str][i].RHS[1].str)
- X else {
- X if ptbl[X.str][i].RHS[1].str == X.str
- X then INSERT(check_later_list, ptbl[X.str][i])
- X else {
- X member(seen, ptbl[X.str][i].RHS[1].str) & next
- X #
- X # For productions X -> Y1, Y2...Yn, where Y is a
- X # nonterminal, compute FIRST recursively for the
- X # list [Y1, Y2, ...]. If Y1 is equivalent to X,
- X # then store that rule so that if FIRST(X)
- X # otherwise contains an epsilon move, we can go
- X # back and calculate FIRST(Y2), and so on.
- X # Keep track of what nonterminals we've already
- X # seen, so as not to calculate any twice.
- X #
- X fset ++:= FIRST(ptbl[X.str][i].RHS, seen)
- X }
- X }
- X }
- X }
- X #
- X # If fset contains e at this point, go back and try again,
- X # then first try to compute FIRST() for elements 2 and later
- X # of productions in the check_later_list. Otherwise, try
- X # computing FIRST for the next symbol in RHS. If this fails,
- X # then resign ourselves to e belonging in fset.
- X #
- X if not member(fset, "") then break
- X else {
- X every i := 1 to *check_later_list do {
- X chunk := check_later_list[i].RHS[2:0] | next
- X while (/chunk[1].terminal, chunk[1].str == X.str) do
- X pop(chunk) | { break next }
- X fset ++:= FIRST(chunk, seen)
- X }
- X next
- X }
- X }
- X
- X # writes(&errout, "returning ")
- X # every writes(&errout, !fset, " ")
- X # write(&errout)
- X return fset
- X
- Xend
- X
- X
- X#
- X# CLOSURE: list -> list
- X# (item_list) -> closure_list
- X#
- X# Where item_list and closure_list are list of item records.
- X# CLOSURE uses ptbl (global, created via makeptbl). Ptbl is a
- X# table, keys = LHS, values = lists of rule records. CLOSURE
- X# uses two additional arguments on recursive calls. Don't use
- X# them yourself!
- X#
- X# CLOSURE breaks the items in item_list into smaller items,
- X# and combines these with the items already in item_list, re-
- X# turning a new list including both the members of item_list
- X# and the new members.
- X#
- Xprocedure CLOSURE(item_list, i, added)
- X
- X local it, terminals, terminal, tmpset, r, LHS
- X #global ptbl
- X
- X if \DEBUG then {
- X write(&errout, "CLOSURE: CLOSing item list")
- X write(&errout, item_list_2_string(item_list))
- X }
- X
- X /i := 0
- X /added := table()
- X every it := item_list[i := i+1 to *item_list] do {
- X
- X # write(&errout, "CLOSURE: performing closure on item ", i)
- X #
- X # Put the as-yet unexpanded parts of item_list can be expanded
- X # into new items, and add them to the full closure list. Keep
- X # track of LHSs we've already seen. Loops are possible!
- X #
- X # If the dot stands before a terminal, then go to the next
- X # item.
- X repeat {
- X if \it.RHS[it.POS].terminal then {
- X #
- X # If an epsilon move is next, then increment POS, and
- X # look at it again. If we leave the POS alone, then
- X # later a shift action will be entered into the action
- X # table for an epsilon token. Since there are no
- X # epsilon tokens, this will break our parse!
- X #
- X if it.RHS[it.POS].str == "" then {
- X it.POS +:= 1
- X next
- X } else { break next } # don't expand nonterminals!
- X } else {
- X #
- X # If the "dot" is at the end of the RHS, it.RHS[it.POS]
- X # will fail. If it does fail, get the next it. If it
- X # succeeds, then just do a plain break and continue
- X # with the expansion process.
- X #
- X if LHS := it.RHS[it.POS].str
- X then break else { break next }
- X }
- X }
- X # If we get to here, the dot is at a nonterminal, so expand!
- X # Record LHSs we've alread seen.
- X /added[LHS] := set()
- X
- X # LHS is a string, RHS is a list of symbols; FIRST(X) returns
- X # a list of strings (no need to use symbols here, since we
- X # know everything in FIRST(X) is a terminal)
- X #
- X # write(ximage(ptbl)); write(ximage(LHS)); write(ximage(it))
- X #
- X tmpset := set()
- X every r := !ptbl[LHS] do {
- X # Change perhaps to: if *it.RHS - it.POS = 0 then {
- X # i.e. if the dot's at the end of the RHS...
- X if *it.RHS[it.POS+1:0] = 0 then {
- X #
- X # Check to see if we've already done all the
- X # productions for the current LHS and the lookahead
- X # symbol being used.
- X #
- X member(added[LHS], it.LOOK) | {
- X put(item_list, item(LHS, r.RHS, 1, it.LOOK, r))
- X #
- X # Keep track of lookahead symbols seen for this LHS.
- X insert(tmpset, it.LOOK)
- X }
- X } else {
- X terminals := FIRST(it.RHS[it.POS+1:0])
- X if delete(terminals, member(terminals, "")) then {
- X member(added[LHS], it.LOOK) | {
- X put(item_list, item(LHS, r.RHS, 1, it.LOOK, r))
- X insert(tmpset, it.LOOK)
- X }
- X }
- X every terminal := !terminals do {
- X member(added[LHS], terminal) | {
- X put(item_list, item(LHS, r.RHS, 1, terminal, r))
- X insert(tmpset, terminal)
- X }
- X }
- X }
- X }
- X #
- X # Record all the lookahead symbols seen for this LHS. On
- X # subsequent recursive calls, we won't bother to redo work
- X # already done! Afterwards, loop back for another item.
- X #
- X added[LHS] ++:= tmpset
- X }
- X
- X if *item_list > i then {
- X if \DEBUG then
- X write(&errout, "CLOSURE: calling closure recursively")
- X return CLOSURE(item_list, i, added)
- X } else {
- X # write(&errout, "CLOSURE: finished recursive calls")
- X #
- X # Sortff sorts on field 5, then 4 (where the 5th field is the
- X # same), then 3. It can take any no. of args. We only need
- X # 3, though. The sort order ensures that any time we perform
- X # a closure on an item list, that item list will have a
- X # consistent order. This makes it possible to check whether a
- X # given item list already exists using Equiv.
- X return sortff(item_list, 5, 4, 3)
- X }
- X
- Xend
- X
- X
- X#
- X# GOTO_ITEMS: list x symbol record -> list
- X# (item_list, X) -> item_list_2
- X#
- X# Where item_list_2 is the list of all items (A -> aX.b) where X
- X# is a symbol, such that (A -> a.Xb) is in item_list. Fails if
- X# item.POS for every item in item_list is greater than *item.RHS;
- X# fails also if X is not equivalent to any item.RHS[item.POS] in
- X# item_list.
- X#
- Xprocedure GOTO_ITEMS(item_list, X)
- X
- X local it, it2, item_list_2
- X static item_terminal_table, item_nonterminal_table
- X initial {
- X item_terminal_table := table()
- X item_nonterminal_table := table()
- X }
- X
- X # See if we've already performed this same calculation.
- X #
- X if \X.terminal
- X then item_list_2 := \(\item_terminal_table[item_list])[X.str]
- X else item_list_2 := \(\item_nonterminal_table[item_list])[X.str]
- X if \item_list_2 then return item_list_2
- X
- X item_list_2 := list()
- X every it := !item_list do {
- X # Subscripting operation fails if the dot's at end.
- X if Equiv(it.RHS[it.POS], X)
- X then {
- X it2 := copy(it)
- X it2.POS +:= 1
- X put(item_list_2, it2)
- X }
- X }
- X
- X item_list_2 := CLOSURE(item_list_2)
- X #
- X # Keep track of item lists and symbols we've already seen.
- X #
- X if \X.terminal then {
- X /item_terminal_table[item_list] := table()
- X /item_terminal_table[item_list][X.str] := item_list_2
- X } else {
- X /item_nonterminal_table[item_list] := table()
- X /item_nonterminal_table[item_list][X.str] := item_list_2
- X }
- X
- X if *item_list_2 > 0 then
- X return item_list_2
- X else fail
- X
- Xend
- X
- X
- X#
- X# item_list_2_string: item list -> string
- X#
- X# Turn an item list into a human readable list of items, indented
- X# four spaces from the left-hand margin.
- X#
- Xprocedure item_list_2_string(l)
- X
- X local s
- X
- X # Make sure we have the expected type entries in l.
- X type(l[1]) == "item" |
- X stop("error (item_list_2_string): wrong type list")
- X
- X s := ""
- X every s ||:= " " || rule_2_string(!l) || "\n"
- X return trim(s, '\n')
- X
- Xend
- X
- X
- X#
- X# rule_2_string: item or rule record -> string
- X#
- X# Utility for making item and rule records human-readable.
- X#
- Xprocedure rule_2_string(r, action)
- X
- X local r_string, sym
- X
- X if r_string := \r.LHS || " ::= " then {
- X every sym := !r.RHS do {
- X if \sym.terminal
- X then r_string ||:= sym.str || " "
- X else r_string ||:= "<" || sym.str || "> "
- X }
- X }
- X # Accept action has no left or right-hand side.
- X else r_string := "(accept) "
- X
- X if type(r) == "item"
- X then r_string ||:= "POS "|| r.POS || "; lookahead " || r.LOOK
- X else if \action
- X then r_string ||:= "POS "|| action.POS || " (action = "|| action.str || ")"
- X
- X return trim(r_string)
- X
- Xend
- X
- X
- X#
- X# show_conflict: deep psychological thriller
- X#
- Xprocedure show_conflict(action1, action2, token, i)
- X
- X write(&errout, "shift/reduce conflict, state ", i,
- X ", lookahead ", token, ":")
- X # action parameters may be null
- X write(&errout, "\t1: ", rule_2_string(action1.by_rule, action1))
- X write(&errout, "\t2: ", rule_2_string(action2.by_rule, action2))
- X
- Xend
- X
- X
- X#
- X# sortff: like sortf() except takes unlimited no. of field args
- X#
- Xprocedure sortff(arglst[])
- X
- X local sortfield, i, old_i
- SHAR_EOF
- true || echo 'restore of maketbls.icn failed'
- fi
- echo 'End of part 2'
- echo 'File maketbls.icn is continued in part 3'
- echo 3 > _shar_seq_.tmp
- exit 0
- --
-
- -Richard L. Goerwitz goer%midway@uchicago.bitnet
- goer@midway.uchicago.edu rutgers!oddjob!ellis!goer
-