NetNews Usenet Archive 1992 #31

home *** CD-ROM | disk | FTP | other *** search

/ NetNews Usenet Archive 1992 #31 / NN_1992_31.iso / spool / comp / lang / icon / 562 < prev next >

Wrap

Internet Message Format | 1993-01-03 | 32.0 KB

Path: sparky!uunet!spool.mu.edu!olivea!pagesat!spssig.spss.com!uchinews!ellis!goer From: goer@ellis.uchicago.edu (Richard L. Goerwitz) Newsgroups: comp.lang.icon Subject: parser generator, part 3 Message-ID: <1993Jan3.211920.28528@midway.uchicago.edu> Date: 3 Jan 93 21:19:20 GMT References: <1993Jan3.211757.28395@midway.uchicago.edu> Sender: news@uchinews.uchicago.edu (News System) Reply-To: goer@midway.uchicago.edu Organization: University of Chicago Lines: 1017 ---- Cut Here and feed the following to sh ---- #!/bin/sh # this is ibpag.03 (part 3 of a multipart archive) # do not concatenate these parts, unpack them in order with /bin/sh # file maketbls.icn continued # if test ! -r _shar_seq_.tmp; then echo 'Please unpack part 1 first!' exit 1 fi (read Scheck if test "$Scheck" != 3; then echo Please unpack part "$Scheck" next! exit 1 else exit 0 fi ) < _shar_seq_.tmp || exit 1 if test ! -f _shar_wnt_.tmp; then echo 'x - still skipping maketbls.icn' else echo 'x - continuing file maketbls.icn' sed 's/^X//' << 'SHAR_EOF' >> 'maketbls.icn' && X X *arglst[1] <= 1 | *arglst = 1 & { return arglst[1] } X sortfield := arglst[2] | { return sortf(arglst[1]) } X arglst[1] := sortf(arglst[1], sortfield) X X old_i := 1 X every i := old_i+1 to *arglst[1] do { X if not (arglst[1][old_i][sortfield] === arglst[1][i][sortfield]) X then { X return sortff!(push(arglst[3:0], arglst[1][old_i : i])) ||| X sortff!(push(arglst[2:0], arglst[1][i : 0])) X } X } X return sortff!(push(arglst[3:0], arglst[1])) X Xend SHAR_EOF echo 'File maketbls.icn is complete' && true || echo 'restore of maketbls.icn failed' rm -f _shar_wnt_.tmp fi # ============= preproc.icn ============== if test -f 'preproc.icn' -a X"$1" != X"-c"; then echo 'x - skipping preproc.icn (File already exists)' rm -f _shar_wnt_.tmp else > _shar_wnt_.tmp echo 'x - extracting preproc.icn (Text)' sed 's/^X//' << 'SHAR_EOF' > 'preproc.icn' && X############################################################################ X# X# Name: preproc.icn X# X# Title: file preprocessing utilities for IBPAG X# X# Author: Richard L. Goerwitz X# X# Version: 1.18 X# X############################################################################ X# X# This file contains the preprocessing subsystem for IBPAG. X# Essentially, the routines contained here read the file, find the X# defines, do macro substitutions, find the start_symbol declaration X# (if there is one), and find and store rule definitions, outputting X# Icon procedures in their place. The stored rule definitions later X# get used by CONST_STATES, and turned into a parser. X# X############################################################################ X# X# Links: none X# X# See also: ibpag.icn, maketbls.icn, debugme.icn, and esp. itokens.icn X# X############################################################################ X X# ximage is only for debugging X# link ximage X X# declared in maketbls.icn X# global ptbl, start_symbol, line_number X Xrecord symbol(str, terminal) Xrecord rule(LHS, RHS, priority, associativity, procname) X X# X# makeptbl: file -> table X# (f) -> ptbl X# X# Where f is a file containing IBPAG source code, and ptbl is a X# table of the productions contained in f; writes a preprocessed X# version of f to f2 (stdout by default) as a side-effect (an X# important one, nonetheless). If a "start_symbol X" declaration X# is encountered, it also sets the start-symbol to "X" (default is X# "S"). X# X# The structure of ptbl: key = LHS (string), value = rule list X# (i.e. list of rule records). Keys are always nonterminals, as X# there is no need to record terminals (they appear only in the X# RHS of rules). X# X# Ptbl is global, so there really isn't any need to return it. It X# is used by almost every routine in maketbls.icn. X# Xprocedure makeptbl(f, f2) X X local rulenum, state, separator, T, r, astr, new_r, RHS X # global ptbl, start_symbol X initial { X start_symbol := "S" X ptbl := table() X } X X rulenum := 0 X state := 0 X separator := "" X # X # Iparse_tokens is in itokens.icn and suspends TOK records X # having a sym and str field. The sym field contains symbol X # names; the str field contains their actual string values in the X # source file. X # X every T := \iparse_tokens(f) do { X X # X # Check for null sym field (iparse_tokens uses TOK(&null, X # "\n") to signal the presence of a syntactically meaningless X # newline; we want to print it later on so as to maintain the X # original line structure), but we don't want to actually X # parse it here. X # X if \T.sym then { X X # Note that this little automaton passes its input through X # only under certain conditions in states 0 and 8. X # Otherwise it is either reading a rule or a start_symbol X # definition. X # X case state of { X X 0 : { X # Typical case: We are looking for the start of X # the next start_symbol or rule declaration. If X # neither is found, do nuttin' except to pass the X # T record on to the printing routine below. X # X case T.sym of { X "STARTSYM" : { X state := 9 X next X } X "RULE" : { X r := rule(,,,,"_" || right(rulenum +:= 1, 5, "0")) X state := 1 X next X } X default : &null X } X } X X 1 : { X # We are in a rule def. Look for the priority X # next. If we don't get an INTLIT or REALLIT, X # then assign a default priority, and see if we X # have an IDENT. If so, then see if it's the X # associativity; if so, set the associativity; if X # not, assign a default associativity and then see X # if we have a rule LHS. If not, then we have an X # error. X # X if T.sym == ("INTLIT"|"REALLIT") then { X r.priority := real(T.str) X state := 2 X next X } else if T.sym == "IDENT" then { X # if you change the default priority here, do X # it in maketbls.icn, too (e.g. rule(,,1)) X r.priority := 1 X if T.str == ("left"|"right"|"none") X then { X r.associativity := T.str X state := 3 X next X } else { X r.associativity := "none" # default X r.LHS := T.str X state := 4 X next X } X } else oh_no("line "|| line_number, 11) X } X X 2 : { X # We have our priority; now get the associativity X # (looks to the tokenizer like an identifier). If X # the identifier doesn't have a string value of X # "right," "left," or "none," then assign a X # default associativity, and assume that the X # identifier is the LHS of a rule. X # X T.sym == "IDENT" | oh_no("line "|| line_number, 12) X if T.str == ("left"|"right"|"none") then { X r.associativity := T.str X state := 3 X next X } else { X r.associativity := "none" # default X r.LHS := T.str X state := 4 X next X } X } X X 3 : { X # Now read the LHS of the rule. X # X if T.sym == "IDENT" then { X r.LHS := T.str X state := 4 X next X } else oh_no("line "|| line_number, 1) X } X X 4 : { X # Now go for the RHS of the rule (which looks like X # the argument list to an Icon procedure). X # X if T.sym == "LPAREN" then { X r.RHS := [] X state := 5 X next X } else oh_no("line "|| line_number, 3) X } X X 5 : { X # We have the left parenthesis; now read the X # arguments. Note that the smallest argument list X # possible is the empty string, i.e. (""). The X # alternation operator, |, is permitted, but has X # different semantics than it does for Icon code. X # X case T.sym of { X "IDENT" : put(r.RHS, symbol(T.str)) & X state := 6 X "STRINGLIT" : put(r.RHS, symbol(no_quotes(T.str), 1)) & X state := 6 X default : oh_no("line "|| line_number, 2) X } X next X } X X 6 : { X # We have just read an element for the RHS of a X # rule; now we either close the current position X # with a comma, close the entire RHS with a X # parend, or insert an(other) alternative, via the X # BAR, for the last element. X # X case T.sym of { X "BAR" : r.RHS[-1] := [r.RHS[-1]] & X state := 7 X "COMMA" : state := 5 X "RPAREN" : { X astr := "" X every astr ||:= "arg" || (1 to *r.RHS) || "," X astr := trim(astr, ',') X write(f2, "procedure ", r.procname, "(", astr, ")") X separator := "" X every RHS := expand_rhs(r.RHS) do { X new_r := copy(r) X new_r.RHS := RHS X /ptbl[new_r.LHS] := [] X put(ptbl[new_r.LHS], new_r) X } X state := 8 X } X default : oh_no("line "|| line_number, 10) X } X next X } X X 7 : { X # Like state 5, only for elements encountered X # after a BAR. These get stuffed into a list, and X # later turned into RHSs that are identical except X # in cases where a BAR specified alternates for a X # given position. See expand_rhs(). X # X case T.sym of { X "IDENT" : put(r.RHS[-1], symbol(T.str)) & X state := 6 X "STRINGLIT" : put(r.RHS[-1], X symbol(no_quotes(T.str), 1)) & X state := 6 X default : oh_no("line "|| line_number, 6) X } X next X } X X 8 : { X # We're done the rule definition. We are again X # passing tokens through to the printing routine X # below. We're still looking for an "end" X # keyword, though. When we get it, go back to X # state 0. X # X case T.sym of { X "RULE" : oh_no("line "||line_number, 7) X "PROCEDURE" : oh_no("line "||line_number, 8) X "END" : state := 0 X default : &null X } X # Don't go for another token yet. NO "next"! X } X X 9 : { X # This state is selected by a preceding X # "START_SYMBOL" symbol. We don't pass input X # through (note the "next" below). Input gets X # passed through again when we hit state 0. X # X if T.sym == "IDENT" then { X start_symbol := T.str X state := 0 X next X } else oh_no("line "||line_number, 4) X } X } X } X X # This is the "printing" routine mentioned above... X # X # NB: Newlines that don't need to be present are signalled by X # a null sym field. See the procedure do_newline(). If this X # modelled the real Icon tokenizer, such newlines would be X # ignored. X # X if any(&digits ++ &letters ++'_.', \T.str, 1, 2) & \T.sym ~=="DOT" X then writes(f2, separator) X X writes(f2, T.str) X X if any(&digits ++ &letters ++'_.', \T.str,-1, 0) & \T.sym ~=="DOT" X then separator := " " else separator := "" X } X X# write(ximage(ptbl)) X # Ptbl is global, so this really isn't necessary. X return ptbl X Xend X X X# X# no_quotes: string -> string X# s -> s2 X# X# Where s is the literal value of some STRINGLIT, and s2 is that X# same literal value, with the enclosing quotation markes X# removed. E.g. "\"ab\"" -> "ab". X# Xprocedure no_quotes(s) X return s ? 2(="\"", tab(-1), ="\"") Xend X X X# X# expand_rhs: list -> list X# X# Expand_rhs takes a list in which the elements are either X# symbols or lists of symbols, and produces lists with only X# symbols. E.g. X# X# [[[symbol1], symbol2], symbol3] -> [symbol1, symbol3] X# [symbol2, symbol3] X# Xprocedure expand_rhs(RHS) X *RHS = 0 & { return RHS } X suspend [expand_elem(RHS[1])] ||| expand_rhs(RHS[2:0]) Xend X# X# Xprocedure expand_elem(elem) X if type(elem) == "symbol" X then return elem X else { X suspend expand_elem(elem[1]) X suspend elem[2] X } Xend X SHAR_EOF true || echo 'restore of preproc.icn failed' rm -f _shar_wnt_.tmp fi # ============= itokens.icn ============== if test -f 'itokens.icn' -a X"$1" != X"-c"; then echo 'x - skipping itokens.icn (File already exists)' rm -f _shar_wnt_.tmp else > _shar_wnt_.tmp echo 'x - extracting itokens.icn (Text)' sed 's/^X//' << 'SHAR_EOF' > 'itokens.icn' && X############################################################################ X# X# Name: itokens.icn X# X# Title: itokens (Icon source-file tokenizer) X# X# Author: Richard L. Goerwitz X# X# Version: 1.7 X# X############################################################################ X# X# This file contains a tokenizer to be used for Icon source files. X# Normally it would be incorporated into a package utilizing IBPAG, X# but it has a stub main procedure that makes it potentially a X# standalone package as well. X# X############################################################################ X# X# Links: slashupto X# X# Requires: coexpressions X# X############################################################################ X X#link ximage X#link slashupto #make sure you have version 1.2 or above X Xglobal next_c, line_number Xrecord TOK(sym, str) X X# X# stub main for testing X# X#procedure main() X# X# local separator X# separator := "" X# every T := \iparse_tokens(&input) do { X# # X# # Newlines that don't need to be present are signalled by a X# # null sym field. See the procedure do_newline(). If this X# # modelled the real Icon tokenizer, such newlines would be X# # ignored, and no token (not even a dummy token) would be X# # suspended. X# # X# if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT" X# then writes(separator) X# writes(T.str) X# if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT" X# then separator := " " else separator := "" X# } X# X#end X X# X# iparse_tokens: file -> TOK records (a generator) X# (stream) -> tokens X# X# Where file is an open input stream, and tokens are TOK records X# holding both the token type and actual token text. X# X# TOK records contain two parts, a preterminal symbol (the first X# "sym" field), and the actual text of the token ("str"). The X# parser only pays attention to the sym field, although the X# strings themselves get pushed onto the value stack. X# X# Note the following kludge: Unlike real Icon tokenizers, this X# procedure returns syntactially meaningless newlines as TOK X# records with a null sym field. Normally they would be ignored. X# I wanted to return them so they could be printed on the output X# stream, thus preserving the line structure of the original X# file, and making later diagnostic messages more usable. X# Xprocedure iparse_tokens(stream, getchar) X X local elem, whitespace, last_token, token, primitives, reserveds X static be_tbl, reserved_tbl, operators X initial { X X # Primitive Tokens X # X primitives := [ X ["identifier", "IDENT", "be"], X ["integer-literal", "INTLIT", "be"], X ["real-literal", "REALLIT", "be"], X ["string-literal", "STRINGLIT", "be"], X ["cset-literal", "CSETLIT", "be"], X ["end-of-file", "EOFX", "" ]] X X # Reserved Words X # X reserveds := [ X ["break", "BREAK", "be"], X ["by", "BY", "" ], X ["case", "CASE", "b" ], X ["create", "CREATE", "b" ], X ["default", "DEFAULT", "b" ], X ["do", "DO", "" ], X ["else", "ELSE", "" ], X ["end", "END", "b" ], X ["every", "EVERY", "b" ], X ["fail", "FAIL", "be"], X ["global", "GLOBAL", "" ], X ["if", "IF", "b" ], X ["initial", "INITIAL", "b" ], X ["invocable", "INVOCABLE", "" ], X ["link", "LINK", "" ], X ["local", "LOCAL", "b" ], X ["next", "NEXT", "be"], X ["not", "NOT", "b" ], X ["of", "OF", "" ], X ["procedure", "PROCEDURE", "" ], X ["record", "RECORD", "" ], X ["repeat", "REPEAT", "b" ], X ["return", "RETURN", "be"], X # Keyword beginning a rule definition. Like "procedure." X ["rule", "RULE", "" ], #<-NB X # Keyword beginning a start-symbol declaration. X ["start_symbol", "STARTSYM", "" ], X ["static", "STATIC", "b" ], X ["suspend", "SUSPEND", "be"], X ["then", "THEN", "" ], X ["to", "TO", "" ], X ["until", "UNTIL", "b" ], X ["while", "WHILE", "b" ]] X X # Operators X # X operators := [ X [":=", "ASSIGN", "" ], X ["@", "AT", "b" ], X ["@:=", "AUGACT", "" ], X ["&:=", "AUGAND", "" ], X ["=:=", "AUGEQ", "" ], X ["===:=", "AUGEQV", "" ], X [">=:=", "AUGGE", "" ], X [">:=", "AUGGT", "" ], X ["<=:=", "AUGLE", "" ], X ["<:=", "AUGLT", "" ], X ["~=:=", "AUGNE", "" ], X ["~===:=", "AUGNEQV", "" ], X ["==:=", "AUGSEQ", "" ], X [">>=:=", "AUGSGE", "" ], X [">>:=", "AUGSGT", "" ], X ["<<=:=", "AUGSLE", "" ], X ["<<:=", "AUGSLT", "" ], X ["~==:=", "AUGSNE", "" ], X ["\\", "BACKSLASH", "b" ], X ["!", "BANG", "b" ], X ["|", "BAR", "b" ], X ["^", "CARET", "b" ], X ["^:=", "CARETASGN", "b" ], X [":", "COLON", "" ], X [",", "COMMA", "" ], X ["||", "CONCAT", "b" ], X ["||:=", "CONCATASGN","" ], X ["&", "CONJUNC", "b" ], X [".", "DOT", "b" ], X ["--", "DIFF", "b" ], X ["--:=", "DIFFASGN", "" ], X ["===", "EQUIV", "b" ], X ["**", "INTER", "b" ], X ["**:=", "INTERASGN", "" ], X ["{", "LBRACE", "b" ], X ["[", "LBRACK", "b" ], X ["|||", "LCONCAT", "b" ], X ["|||:=", "LCONCATASGN","" ], X ["==", "LEXEQ", "b" ], X [">>=", "LEXGE", "" ], X [">>", "LEXGT", "" ], X ["<<=", "LEXLE", "" ], X ["<<", "LEXLT", "" ], X ["~==", "LEXNE", "b" ], X ["(", "LPAREN", "b" ], X ["-:", "MCOLON", "" ], X ["-", "MINUS", "b" ], X ["-:=", "MINUSASGN", "" ], X ["%", "MOD", "" ], X ["%:=", "MODASGN", "" ], X ["~===", "NOTEQUIV", "b" ], X ["=", "NUMEQ", "b" ], X [">=", "NUMGE", "" ], X [">", "NUMGT", "" ], X ["<=", "NUMLE", "" ], X ["<", "NUMLT", "" ], X ["~=", "NUMNE", "b" ], X ["+:", "PCOLON", "" ], X ["+", "PLUS", "b" ], X ["+:=", "PLUSASGN", "" ], X ["?", "QMARK", "b" ], X ["<-", "REVASSIGN", "" ], X ["<->", "REVSWAP", "" ], X ["}", "RBRACE", "e" ], X ["]", "RBRACK", "e" ], X [")", "RPAREN", "e" ], X [";", "SEMICOL", "" ], X ["?:=", "SCANASGN", "" ], X ["/", "SLASH", "b" ], X ["/:=", "SLASHASGN", "" ], X ["*", "STAR", "b" ], X ["*:=", "STARASGN", "" ], X [":=:", "SWAP", "" ], X ["~", "TILDE", "b" ], X ["++", "UNION", "b" ], X ["++:=", "UNIONASGN", "" ], X ["$(", "LBRACE", "b" ], X ["$)", "RBRACE", "e" ], X ["$<", "LBRACK", "b" ], X ["$>", "RBRACK", "e" ]] X X # static be_tbl, reserved_tbl X reserved_tbl := table() X every elem := !reserveds do X insert(reserved_tbl, elem[1], elem[2]) X be_tbl := table() X every elem := !primitives | !reserveds | !operators do { X insert(be_tbl, elem[2], elem[3]) X } X } X X /getchar := create { X line_number := 0 X ! ( 1(!stream, line_number +:=1) || "\n" ) X } X whitespace := ' \t' X /next_c := @getchar X X repeat { X case next_c of { X X "." : { X # Could be a real literal *or* a dot operator. Check X # following character to see if it's a digit. If so, X # it's a real literal. We can only get away with X # doing the dot here because it is not a substring of X # any longer identifier. If this gets changed, we'll X # have to move this code into do_operator(). X # X last_token := do_dot(getchar) X suspend last_token X# write(&errout, "next_c == ", image(next_c)) X next X } X X "\n" : { X # If do_newline fails, it means we're at the end of X # the input stream, and we should break out of the X # repeat loop. X # X every last_token := do_newline(getchar, last_token, be_tbl) X do suspend last_token X if next_c === &null then break X next X } X X "\#" : { X # Just a comment. Strip it by reading every character X # up to the next newline. The global var next_c X # should *always* == "\n" when this is done. X # X do_number_sign(getchar) X# write(&errout, "next_c == ", image(next_c)) X next X } X X "\"" : { X # Suspend as STRINGLIT everything from here up to the X # next non-backslashed quotation mark, inclusive X # (accounting for the _ line-continuation convention). X # X last_token := do_quotation_mark(getchar) X suspend last_token X# write(&errout, "next_c == ", image(next_c)) X next X } X X "'" : { X # Suspend as CSETLIT everything from here up to the X # next non-backslashed apostrophe, inclusive. X # X last_token := do_apostrophe(getchar) X suspend last_token X# write(&errout, "next_c == ", image(next_c)) X next X } X X &null : oh_no(&null, 5) # unexpected EOF message X X default : { X # If we get to here, we have either whitespace, an X # integer or real literal, an identifier or reserved X # word (both get handled by do_identifier), or an X # operator. The question of which we have can be X # determined by checking the first character. X # X if any(whitespace, next_c) then { X # Like all of the TOK forming procedures, X # do_whitespace resets next_c. X do_whitespace(getchar, whitespace) X # don't suspend any tokens X next X } X if any(&digits, next_c) then { X last_token := do_digits(getchar) X suspend last_token X next X } X if any(&letters ++ '_', next_c) then { X last_token := do_identifier(getchar, reserved_tbl) X suspend last_token X next X } X# write(&errout, "it's an operator") X last_token := do_operator(getchar, operators) X suspend last_token X next X } X } X } X X # If stream argument is nonnull, then we are in the top-level X # iparse_tokens(). If not, then we are in a recursive call, and X # we should not emit all this end-of-file crap. X # X if \stream then { X suspend TOK("EOFX") X return TOK("$") X } X else fail X Xend X X X# X# do_dot: coexpression -> TOK record X# getchar -> t X# X# Where getchar is the coexpression that produces the next X# character from the input stream and t is a token record whose X# sym field contains either "REALLIT" or "DOT". Essentially, X# do_dot checks the next char on the input stream to see if it's X# an integer. Since the preceding char was a dot, an integer X# tips us off that we have a real literal. Otherwise, it's just X# a dot operator. Note that do_dot resets next_c for the next X# cycle through the main case loop in the calling procedure. X# Xprocedure do_dot(getchar) X X local token X # global next_c X X# write(&errout, "it's a dot") X X # If dot's followed by a digit, then we have a real literal. X # X if any(&digits, next_c := @getchar) then { X# write(&errout, "dot -> it's a real literal") X token := "." || next_c X while any(&digits, next_c := @getchar) do X token ||:= next_c X if token ||:= (next_c == ("e"|"E")) then { X while (next_c := @getchar) == "0" X while any(&digits, next_c) do { X token ||:= next_c X next_c = @getchar X } X } X return TOK("REALLIT", token) X } X X # Dot not followed by an integer; so we just have a dot operator, X # and not a real literal. X # X# write(&errout, "dot -> just a plain dot") X return TOK("DOT", ".") X Xend X X X# X# do_newline: coexpression x TOK record x table -> TOK records X# (getchar, last_token, be_tbl) -> Ts (a generator) X# X# Where getchar is the coexpression that returns the next X# character from the input stream, last_token is the last TOK X# record suspended by the calling procedure, be_tbl is a table of X# tokens and their "beginner/ender" status, and Ts are TOK X# records. Note that do_newline resets next_c. Do_newline is a X# mess. What it does is check the last token suspended by the X# calling procedure to see if it was a beginner or ender. It X# then gets the next token by calling iparse_tokens again. If X# the next token is a beginner and the last token is an ender, X# then we have to suspend a SEMICOL token. In either event, both X# the last and next token are suspended. X# Xprocedure do_newline(getchar, last_token, be_tbl) X X local next_token X # global next_c X X# write(&errout, "it's a newline") X X # Go past any additional newlines. X # X while next_c == "\n" do { X # NL can be the last char in the getchar stream; if it *is*, X # then signal that it's time to break out of the repeat loop X # in the calling procedure. X # X next_c := @getchar | { X next_c := &null X fail X } X suspend TOK(&null, next_c == "\n") X } X X # If there was a last token (i.e. if a newline wasn't the first X # character of significance in the input stream), then check to X # see if it was an ender. If so, then check to see if the next X # token is a beginner. If so, then suspend a TOK("SEMICOL") X # record before suspending the next token. X # X if find("e", be_tbl[(\last_token).sym]) then { X# write(&errout, "calling iparse_tokens via do_newline") X# &trace := -1 X # First arg to iparse_tokens can be null here. X if next_token := iparse_tokens(&null, getchar) X then { X# write(&errout, "call of iparse_tokens via do_newline yields ", X# ximage(next_token)) X if find("b", be_tbl[next_token.sym]) X then suspend TOK("SEMICOL", "\n") X # X # See below. If this were like the real Icon parser, X # the following line would be commented out. X # X else suspend TOK(&null, "\n") X return next_token X } X else { X # X # If this were a *real* Icon tokenizer, it would not emit X # any record here, but would simply fail. Instead, we'll X # emit a dummy record with a null sym field. X # X return TOK(&null, "\n") X# &trace := 0 X# fail X } X } X X # See above. Again, if this were like Icon's own tokenizer, we X # would just fail here, and not return any TOK record. X # X# &trace := 0 X return TOK(&null, "\n") X# fail X Xend X X X# X# do_number_sign: coexpression -> &null X# getchar -> X# X# Where getchar is the coexpression that pops characters off the X# main input stream. Sets the global variable next_c. This X# procedure simply reads characters until it gets a newline, then X# returns with next_c == "\n". Since the starting character was X# a number sign, this has the effect of stripping comments. X# Xprocedure do_number_sign(getchar) X X # global next_c X X# write(&errout, "it's a number sign") X while next_c ~== "\n" do { X next_c := @getchar X } X X # Return to calling procedure to cycle around again with the new X # next_c already set. Next_c should always be "\n" at this point. X return X Xend X X X# X# do_quotation_mark: coexpression -> TOK record X# getchar -> t X# X# Where getchar is the coexpression that yields another character X# from the input stream, and t is a TOK record with "STRINGLIT" X# as its sym field. Puts everything upto and including the next X# non-backslashed quotation mark into the str field. Handles the X# underscore continuation convention. X# Xprocedure do_quotation_mark(getchar) X X local token X # global next_c X X # write(&errout, "it's a string literal") X token := "\"" X while next_c := @getchar do { X if next_c == "\n" & token[-1] == "_" then { X token := token[1:-1] X next X } else { X if slashupto("\"", token ||:= next_c, 2) X then { X next_c := @getchar X # resume outermost (repeat) loop in calling procedure, X # with the new (here explicitly set) next_c X return TOK("STRINGLIT", token) X } X } X } X Xend X X X# X# do_apostrophe: coexpression -> TOK record X# getchar -> t X# X# Where getchar is the coexpression that yields another character X# from the input stream, and t is a TOK record with "CSETLIT" X# as its sym field. Puts everything upto and including the next X# non-backslashed apostrope into the str field. X# Xprocedure do_apostrophe(getchar) X X local token X # global next_c X X# write(&errout, "it's a cset literal") X token := "'" X while next_c := @getchar do { X if slashupto("'", token ||:= next_c, 2) X then { X next_c := @getchar X # Return & resume outermost containing loop in calling X # procedure w/ new next_c. X return TOK("CSETLIT", token) X } X } X Xend X X X# X# do_digits: coexpression -> TOK record X# getchar -> t X# X# Where getchar is the coexpression that produces the next char X# on the input stream, and where t is a TOK record containing X# either "REALLIT" or "INTLIT" in its sym field, and the text of X# the numeric literal in its str field. X# Xprocedure do_digits(getchar) X X local token, tok_record X # global next_c X X # Assume integer literal until proven otherwise.... X tok_record := TOK("INTLIT") X X# write(&errout, "it's an integer or real literal") X token := ("0" ~== next_c) | "" X while any(&digits, next_c := @getchar) do X token ||:= next_c X if token ||:= (next_c == ("R"|"r")) then { X while any(&digits, next_c := @getchar) do X token ||:= next_c X } else { X if token ||:= (next_c == ".") then { X while any(&digits, next_c := @getchar) do X token ||:= next_c X tok_record := TOK("REALLIT") X } X if token ||:= (next_c == ("e"|"E")) then { X while any(&digits, next_c := @getchar) do X token ||:= next_c X tok_record := TOK("REALLIT") X } X } X tok_record.str := ("" ~== token) | 0 X return tok_record X Xend X X X# X# do_whitespace: coexpression x cset -> &null X# getchar x whitespace -> &null X# X# Where getchar is the coexpression producing the next char on X# the input stream. Do_whitespace just repeats until it finds a SHAR_EOF true || echo 'restore of itokens.icn failed' fi echo 'End of part 3' echo 'File itokens.icn is continued in part 4' echo 4 > _shar_seq_.tmp exit 0 -- -Richard L. Goerwitz goer%midway@uchicago.bitnet goer@midway.uchicago.edu rutgers!oddjob!ellis!goer