Programmer 7500

home *** CD-ROM | disk | FTP | other *** search

/ Programmer 7500 / MAX_PROGRAMMERS.iso / CLIPPER / MISC / FUZZY.ZIP / PARSER.B < prev next >

Wrap

Text File | 1986-11-30 | 26.6 KB | 753 lines

------------------------------------------------------------------------------- -- -- -- Library Unit: Parser -- -- -- -- Author: Bradley L. Richards -- -- -- -- Version Date Notes . . . -- -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- -- 1.0 22 May 86 Initial Version -- -- 1.1 19 Jun 86 Lotsa revisions due to Prover design -- -- 2.0 20 Jun 86 Version number change only (for consistancy) -- -- 2.05 13 Jul 86 Split spec and body into separate files -- -- 2.1 21 Jul 86 Demonstration Version -- -- 2.2 28 Jul 86 Added parse_read. Initial operational version -- -- 3.0 10 Oct 86 Final thesis product -- -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- -- -- -- Library units used: Data_def, listing, token, unchecked_deallocation -- -- -- -- Description: This package parses an input program, and constructs an -- -- abstract syntax tree for the program. Procedures appear in -- -- alphabetical order. Start_parser and stop_parser are the -- -- initialization and clean up routines. Parse_file, parse_read, and -- -- parse_request are also externally visible routines. Release is -- -- the routine which deallocated ASTs when they are no longer needed. -- -- -- ------------------------------------------------------------------------------- -- -- -- Package Body -- -- -- ------------------------------------------------------------------------------- package body parser is -- -- some forward definitions for our parsing routines -- function parse_element return argument_ptr; function parse_expression return AST_ptr; function parse_head return AST_ptr; function parse_list return p_list_ptr; function parse_predicate return AST_ptr; function parse_term return AST_ptr; -- -- Parse_to_bracket -- Parse to the first unbalanced right bracket to -- reach the end of the current string expression. Reaching an -- unbalanced right paren or a period also suffices. -- procedure parse_to_bracket is bracket_level : natural := 0; paren_level : natural := 0; begin loop exit when current_token.is_a = period; exit when current_token.is_a = end_of_file; exit when paren_level = 0 and current_token.is_a = right_paren; exit when bracket_level = 0 and current_token.is_a = right_bracket; if current_token.is_a = left_paren then paren_level := paren_level + 1; elsif current_token.is_a = right_paren then paren_level := paren_level - 1; elsif current_token.is_a = left_bracket then bracket_level := bracket_level + 1; elsif current_token.is_a = right_bracket then bracket_level := bracket_level - 1; end if; get_token; end loop; end parse_to_bracket; -- -- Parse_to_paren -- Parse to the first unbalanced right paren, or to -- a period. Skip embedded parentheses pairs. -- procedure parse_to_paren is paren_level : natural := 0; begin loop exit when current_token.is_a = period; exit when current_token.is_a = end_of_file; exit when paren_level = 0 and current_token.is_a = right_paren; if current_token.is_a = left_paren then paren_level := paren_level + 1; elsif current_token.is_a = right_paren then paren_level := paren_level - 1; end if; get_token; end loop; end parse_to_paren; procedure parse_to_period is begin loop exit when current_token.is_a = period; exit when current_token.is_a = end_of_file; get_token; end loop; end parse_to_period; -- -- Release -- Return memory to the system using UNCHECKED_DEALLOCATION. -- These routines must be filled in for an efficient system -- since the current Verdix Compiler does not include an -- automatic garbage collector. Routines deallocate all -- items within the passed structure UP TO the "stop" value. -- procedure release( tree, stop : AST_ptr ) is ptr : AST_ptr := tree; begin if ptr /= stop then case ptr.node_type is when implication => release(ptr.head, stop); release(ptr.tail, stop); when binary_operator => release(ptr.left_operand, stop); release(ptr.right_operand, stop); when unary_operator => release(ptr.operand, stop); when resolution_marker => release(ptr.subgoals, stop); when others => null; end case; free_AST(ptr); end if; end release; procedure start_parser( input_file, output_file : in string ) is begin start_token(input_file, output_file); end start_parser; procedure stop_parser is begin stop_token; end stop_parser; ------------------------------------------------------------------------------- -- -- -- Parsing Routines -- -- (in alphabetical order) -- -- -- ------------------------------------------------------------------------------- -- -- A general convention honored by the parsing routines is that each will -- leave current_token pointing to the token after whatever the routine just -- parsed. In the case of an error where the routine got lost, it calls one -- of parse_to_bracket, parse_to_paren, or parse_to_period to get the parser -- back into known territory. Note that these are hierarchical routines in -- a sense; parse_to_bracket will also stop when it finds an unbalanced -- right parenthesis or a period, and parse_to_paren will stop when it sees -- a period. The parsing routines are careful, in this case, not to parse -- past the token that represents the termination condition for one of the -- routines higher up in the call stack. -- -- -- parse_arguments -- parses the argument list associated with a -- predicate call. -- function parse_arguments return argument_ptr is arg, return_list : argument_ptr; begin -- -- on entry we know that the current token is a left parenthesis -- get_token; arg := parse_element; return_list := arg; -- we'll return a pointer to the front of the list loop if current_token.is_a = comma then -- another argument coming get_token; elsif current_token.is_a = right_bracket then error(pointer, "unbalanced right bracket"); get_token; elsif current_token.is_a = right_paren then -- end of argument list get_token; exit; elsif current_token.is_a = period then error(pointer, "no terminating ')' for argument list"); exit; else -- must be a syntax error error(pointer,"missing argument separator; comma inserted"); get_token; exit; end if; arg.next_arg := parse_element; if arg.next_arg /= null then -- parse_element succeeded arg := arg.next_arg; end if; end loop; return return_list; end parse_arguments; function parse_clause return AST_ptr is clause : AST_ptr := null; begin if current_token.is_a /= identifier then error(pointer,"invalid predicate name; clause ignored"); parse_to_period; else clause := new AST(implication); clause.head := parse_head; if current_token.is_a = period then -- just an assertion clause.tail := new AST'(fuzzy_value,1.0); elsif current_token.is_a = implication then -- we already knew that get_token; -- so skip it... clause.tail := parse_expression; else error(pointer,"':-' inserted"); clause.tail := parse_expression; end if; end if; if current_token.is_a /= period then if current_token.is_a = right_paren then error(pointer,"unbalanced right parenthesis"); parse_to_period; else error(pointer,"unknown parsing error"); end if; end if; get_token; return clause; end parse_clause; function parse_element return argument_ptr is arg, old : argument_ptr := null; duplicate, error_flg : boolean := false; begin case current_token.is_a is when character_lit => arg := new argument(character_lit); arg.char := current_token.char; when float_num => arg := new argument(float_num); arg.fp_num := current_token.fp_num; when integer_num => arg := new argument(integer_num); arg.int_num := current_token.int_num; when left_bracket => arg := new argument(prolog_list); arg.list := parse_list; when identifier => arg := new argument(predicate); arg.name := current_token.ident_name; get_token; if current_token.is_a = left_paren then arg.p_arguments := parse_arguments; end if; when variable => arg := new argument(variable); arg.v_name := current_token.var_name; when underline => arg := new argument(variable); arg.v_name := null; -- it's anonymous when others => error(pointer,"illegal or missing element"); -- -- if the next token isn't an element -- separator then we are totally lost, -- so give up on this argument list. -- if (current_token.is_a /= bar) and (current_token.is_a /= comma) then parse_to_paren; end if; error_flg := true; end case; if not error_flg then -- we're ok, and arg.is_a has a value if (arg.is_a /= prolog_list) and (arg.is_a /= predicate) then get_token; end if; end if; return arg; end parse_element; function parse_expression return AST_ptr is -- -- The basic parsing routines for expressions (which is to say the -- entire right half of a clause) are "parse_expression" and "parse_term." -- Basically, the parse grammar looks like this: -- -- E --> E binary_op T | unary_op T | T | -- E binary_op E | unary_op E -- T --> ( E ) | predicate_name -- -- -- Define a couple of characteristics of operators -- type ary_ness is (unary, binary); subtype precedence_value is integer range 0..255; -- -- Define the relative precedence of operators. In keeping with -- "Programming in Prolog" by Clocksin and Mellish, lower precedence -- operators are executed FIRST. This is counter-intuitive, but seemed -- better than conflicting with the definitions in such a popular book. -- The values assigned to operators is the same as given in the book. -- In the case of two operators of the same precedence, left-to-right -- execution occurs. The left-side/right-side precedence referred to -- in Clocksin & Mellish is NOT implemented. -- precedence : array (operators) of precedence_value := (semicolon => 254, comma => 253, bar => 252, hat => 251, rw_spy => 250, rw_nospy => 250, rw_not => 60, rw_is => 40, univ => 40, equal => 40, not_equal => 40, less_than => 40, less_or_equal => 40, greater_or_equal => 40, greater_than => 40, equality => 40, not_equality => 40, minus => 31, plus => 31, slash => 21, asterisk => 21, rw_mod => 11); -- -- miscellaneous variables -- left, right : AST_ptr; operator : token_type := null_token; -- initialize so we can test it later -- -- Ary -- returns unary or binary as the type of an operator -- function ary( operator : operators ) return ary_ness is begin if operator in binary_operators then return binary; elsif operator in unary_operators then return unary; else error(pointer,"parser.expression.ary called with invalid operator"); return binary; -- have to return something end if; end ary; -- -- Get_operator -- Since some operators are reserved words, where the -- specific word is buried as "current_token.word" this -- routine digs it out if necessary and returns a -- consistant item of subtype operators. -- function get_operator( thing : token_ptr ) return operators is begin if thing.is_a in operators then return thing.is_a; elsif thing.is_a = reserved_word then if (thing.word = rw_is) or else (thing.word = rw_mod) or else (thing.word = rw_nospy) or else (thing.word = rw_not) or else (thing.word = rw_spy) then return thing.word; end if; else error(pointer,"parser.expression.get_operator called with invalid operator"); return comma; -- have to return something end if; end get_operator; -- -- Not_operator -- We need to be able to tell if the current token is or -- is not an operator. It happens that "not_operator" is -- the version we need. -- function not_operator( thing : token_ptr ) return boolean is begin if thing.is_a in operators then return false; elsif thing.is_a = reserved_word then if (thing.word = rw_is) or else (thing.word = rw_mod) or else (thing.word = rw_nospy) or else (thing.word = rw_not) or else (thing.word = rw_spy) then return false; else return true; end if; else return true; end if; end not_operator; -- -- Build_expr -- This is the recursive routine which actually does most of -- the work. A single pass of this routine takes the -- input operator and its operands and builds an AST node -- which is returned as the new left operand. -- procedure build_expr( pending : precedence_value; left : in out AST_ptr; in_operator : operators; in_right : AST_ptr ) is operator : operators := in_operator; right : AST_ptr := in_right; op2, next_op : operators; right2 : AST_ptr; begin loop if not_operator(current_token) then -- -- At the end of this expression or subexpression? -- if current_token.is_a = period or current_token.is_a = right_paren then if ary(operator) = unary then left := new AST'(unary_operator, operator, right); else left := new AST'(binary_operator, operator, left,right); end if; else -- -- Hmmm...at this point the current token should be an -- operator of some sort. Since it isn't, give up. -- error(pointer,"invalid operator"); parse_to_paren; end if; exit; -- one way or another, we're done. else next_op := get_operator(current_token); -- -- If the next operator has a higher precedence, then execute -- it LATER. This means that we go ahead and compress the -- current operator and its operands into an AST node. But, -- if the right-hand side is null, it is a unary operator -- which must be evaluated before we'll even have an operand. -- if (precedence(next_op) >= precedence(operator)) and (right /= null) then if ary(operator) = unary then left := new AST'(unary_operator, operator, right); else left := new AST'(binary_operator, operator, left,right); end if; -- -- If the pending operator has a LOWER precedence then -- it needs executed before the next operator. Hence we -- exit to allow it to execute. -- exit when precedence(next_op) >= pending; -- -- still here, so shift in the next operator and operand -- operator := next_op; get_token; if not_operator(current_token) then right := parse_term; elsif ary(get_operator(current_token)) = unary then right := null; -- unary operator is legitimate operand else error(pointer,"illegal use of operator"); end if; else -- -- The next operator has to be executed before the current -- one. Make a recursive call to take care of it. -- op2 := next_op; get_token; if not_operator(current_token) then right2 := parse_term; elsif ary(get_operator(current_token)) = unary then right2 := null; -- unary operator is legitimate operand else error(pointer,"illegal use of operator"); end if; build_expr(precedence(operator), right, op2, right2); end if; end if; end loop; end build_expr; begin -- parse_expression if not_operator(current_token) then -- it must be the left-hand operand of -- a binary operator left := parse_term; else operator := get_operator(current_token); if ary(operator) /= unary then error(pointer,"missing left-hand operand for binary operator"); parse_to_paren; end if; end if; if not_operator(current_token) then -- -- A predicate name by itself is ok, so if the current token is a -- period things are fine. If it's anything else (we already know -- it's not a legal operator) we're lost. -- if current_token.is_a /= period then if operator = null_token then error(pointer,"missing operator"); else error(pointer,"missing operand"); end if; parse_to_paren; end if; else operator := get_operator(current_token); get_token; if not_operator(current_token) then right := parse_term; elsif ary(get_operator(current_token)) = unary then right := null; -- unary operator is legitimate operand else error(pointer,"illegal use of operator"); end if; build_expr(255, left, operator, right); end if; return left; end parse_expression; procedure parse_file( abstract_syntax_tree : out AST_ptr ) is clause, FP_program : AST_ptr := null; function first_node(node : AST_ptr) return AST_ptr is temp_node : AST_ptr := node; begin if temp_node /= null then while temp_node.prev /= null loop temp_node := temp_node.prev; end loop; end if; return temp_node; end first_node; begin get_token; while current_token.is_a /= end_of_file loop clause := parse_clause; if clause /= null then -- -- append new node to the last node in FP_program. Note that -- FP_program points to the last clause parsed. After appending -- the new node, then set the "next" pointer in the previous node. -- if FP_program /= null then -- this is not the first node FP_program.next := clause; clause.prev := FP_program; end if; FP_program := clause; end if; end loop; -- -- Point abstract_syntax_tree to the very first node in FP_program -- abstract_syntax_tree := first_node(FP_program); exception when unexpected_end_of_file => error(pointer,"unexpected end of file"); abstract_syntax_tree := first_node(FP_program); end parse_file; -- -- Parse_head -- Parse the head of a clause. This is currently limited to -- parsing a single predicate. -- function parse_head return AST_ptr is node : AST_ptr; begin node := parse_predicate; return node; end parse_head; -- -- Parse_list -- Parse a Prolog list structure. Note that lists appear -- only in argument lists. they can contain any of the elements which -- can occur elsewhere in an argument list. The structure of a list is -- similar to a LISP "cons" cell. -- function parse_list return p_list_ptr is has_tail, need_elt : boolean; temp_elt : argument_ptr; root, ptr, ptr2 : p_list_ptr; begin -- -- we know the current token is a left bracket -- get_token; need_elt := false; while current_token.is_a /= right_bracket loop temp_elt := parse_element; need_elt := false; if current_token.is_a = comma then has_tail := false; need_elt := true; get_token; elsif current_token.is_a = bar then has_tail := true; need_elt := true; get_token; elsif current_token.is_a = right_bracket then has_tail := false; elsif (current_token.is_a = period) or (current_token.is_a = right_paren) then error(pointer,"no terminating ']' for list"); exit; else -- we don't have what we expected error(pointer,"missing separator; comma inserted"); has_tail := false; need_elt := true; end if; if root = null then -- first element root := new p_list(has_tail); root.elt := temp_elt; ptr := root; else if ptr.has_tail then -- this element is the tail ptr.tail := temp_elt; if need_elt then error(pointer,"only a single element allowed in a tail"); parse_to_bracket; need_elt := false; end if; else -- a normal continuation of the list ptr2 := new p_list(has_tail); ptr2.elt := temp_elt; ptr.next_elt := ptr2; ptr := ptr2; end if; end if; end loop; if need_elt then error(pointer,"missing element in list"); end if; if current_token.is_a = right_bracket then -- finish off list get_token; end if; return root; end parse_list; -- -- Parse_predicate -- Parse a single predicate call -- function parse_predicate return AST_ptr is node, temp_node : AST_ptr := null; begin if current_token.is_a = reserved_word then if current_token.word in reserved_predicates then node := new AST(reserved_predicate); node.predicate := current_token.word; get_token; if current_token.is_a = left_paren then node.r_arguments := parse_arguments; end if; -- -- This section of code implements special handling for certain -- Fuzzy Prolog reserved predicates. For example, when the -- "fuzzy" predicate has an explicit floating point number as -- its argument, we go ahead and establish a fuzzy_value node. -- case node.predicate is when rw_fuzzy => if node.r_arguments.is_a = float_num then temp_node := new AST'(fuzzy_value, node.r_arguments.fp_num); release(node, null); node := temp_node; end if; when others => -- no special handling (or not implemented) null; end case; else error(pointer,"illegal use of operator"); end if; elsif current_token.is_a = identifier then node := new AST(predicate); node.name := current_token.ident_name; get_token; if current_token.is_a = left_paren then node.p_arguments := parse_arguments; end if; elsif current_token.is_a = cut then node := new AST'(reserved_predicate,cut,null); get_token; else error(pointer,"illegal use of operator"); end if; return node; end parse_predicate; -- -- Parse_read -- Return a single element for a READ predicate. Disallow -- variables as they make no sense in this context. -- procedure parse_read( elt : out argument_ptr; eof : out boolean ) is temp_elt : argument_ptr; begin get_token; temp_elt := parse_element; if current_token.is_a = end_of_file then eof := true; else if temp_elt.is_a = variable then error(pointer, "Variable not allowed on READ"); elt := null; else elt := temp_elt; end if; eof := false; end if; end parse_read; -- -- Parse_request -- Parse an interactive user request. This is assumed to be -- some expression (just like the tail of a clause). The -- short-hand list notation for file consultations is not -- currently supported. -- procedure parse_request( abstract_syntax_tree : out AST_ptr; eof : out boolean ) is begin get_token; abstract_syntax_tree := parse_expression; eof := (current_token.is_a = end_of_file); end parse_request; -- -- Parse_term -- Parse a term within an expression. Terms will either be -- predicate calls or subexpressions enclosed within -- parentheses. -- function parse_term return AST_ptr is node : AST_ptr; begin if current_token.is_a = left_paren then -- parse a subexpression get_token; -- consume the left parenthesis node := parse_expression; if current_token.is_a = right_paren then -- normal condition get_token; -- consume the right parenthesis elsif current_token.is_a = period then -- missing right parenthesis error(pointer,"right parenthesis inserted"); else error(pointer,"unknown parsing error in parse_term"); end if; elsif current_token.is_a = identifier then -- a predicate node := parse_predicate; elsif current_token.is_a = reserved_word then -- still a predicate node := parse_predicate; elsif current_token.is_a = cut then -- the "cut" predicate node := parse_predicate; elsif current_token.is_a = integer_num then node := new AST'(integer_num, current_token.int_num); get_token; elsif current_token.is_a = float_num then node := new AST'(float_num, current_token.fp_num); get_token; elsif current_token.is_a = character_lit then node := new AST'(character_lit, current_token.char); get_token; elsif current_token.is_a = variable then node := new AST'(variable, current_token.var_name); get_token; else error(pointer,"illegal or missing term"); end if; return node; end parse_term; end parser;