home *** CD-ROM | disk | FTP | other *** search
-
- -------------------------------------------------------------------------------
- -- --
- -- Library Unit: Parser --
- -- --
- -- Author: Bradley L. Richards --
- -- --
- -- Version Date Notes . . . --
- -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --
- -- 1.0 22 May 86 Initial Version --
- -- 1.1 19 Jun 86 Lotsa revisions due to Prover design --
- -- 2.0 20 Jun 86 Version number change only (for consistancy) --
- -- 2.05 13 Jul 86 Split spec and body into separate files --
- -- 2.1 21 Jul 86 Demonstration Version --
- -- 2.2 28 Jul 86 Added parse_read. Initial operational version --
- -- 3.0 10 Oct 86 Final thesis product --
- -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --
- -- --
- -- Library units used: Data_def, listing, token, unchecked_deallocation --
- -- --
- -- Description: This package parses an input program, and constructs an --
- -- abstract syntax tree for the program. Procedures appear in --
- -- alphabetical order. Start_parser and stop_parser are the --
- -- initialization and clean up routines. Parse_file, parse_read, and --
- -- parse_request are also externally visible routines. Release is --
- -- the routine which deallocated ASTs when they are no longer needed. --
- -- --
- -------------------------------------------------------------------------------
- -- --
- -- Package Body --
- -- --
- -------------------------------------------------------------------------------
- package body parser is
-
-
- --
- -- some forward definitions for our parsing routines
- --
- function parse_element return argument_ptr;
- function parse_expression return AST_ptr;
- function parse_head return AST_ptr;
- function parse_list return p_list_ptr;
- function parse_predicate return AST_ptr;
- function parse_term return AST_ptr;
-
-
- --
- -- Parse_to_bracket -- Parse to the first unbalanced right bracket to
- -- reach the end of the current string expression. Reaching an
- -- unbalanced right paren or a period also suffices.
- --
- procedure parse_to_bracket is
- bracket_level : natural := 0;
- paren_level : natural := 0;
- begin
- loop
- exit when current_token.is_a = period;
- exit when current_token.is_a = end_of_file;
- exit when paren_level = 0 and current_token.is_a = right_paren;
- exit when bracket_level = 0 and current_token.is_a = right_bracket;
- if current_token.is_a = left_paren then
- paren_level := paren_level + 1;
- elsif current_token.is_a = right_paren then
- paren_level := paren_level - 1;
- elsif current_token.is_a = left_bracket then
- bracket_level := bracket_level + 1;
- elsif current_token.is_a = right_bracket then
- bracket_level := bracket_level - 1;
- end if;
- get_token;
- end loop;
- end parse_to_bracket;
-
- --
- -- Parse_to_paren -- Parse to the first unbalanced right paren, or to
- -- a period. Skip embedded parentheses pairs.
- --
- procedure parse_to_paren is
- paren_level : natural := 0;
- begin
- loop
- exit when current_token.is_a = period;
- exit when current_token.is_a = end_of_file;
- exit when paren_level = 0 and current_token.is_a = right_paren;
- if current_token.is_a = left_paren then
- paren_level := paren_level + 1;
- elsif current_token.is_a = right_paren then
- paren_level := paren_level - 1;
- end if;
- get_token;
- end loop;
- end parse_to_paren;
-
- procedure parse_to_period is
- begin
- loop
- exit when current_token.is_a = period;
- exit when current_token.is_a = end_of_file;
- get_token;
- end loop;
- end parse_to_period;
-
-
- --
- -- Release -- Return memory to the system using UNCHECKED_DEALLOCATION.
- -- These routines must be filled in for an efficient system
- -- since the current Verdix Compiler does not include an
- -- automatic garbage collector. Routines deallocate all
- -- items within the passed structure UP TO the "stop" value.
- --
- procedure release( tree, stop : AST_ptr ) is
-
- ptr : AST_ptr := tree;
-
- begin
-
- if ptr /= stop then
- case ptr.node_type is
- when implication =>
- release(ptr.head, stop);
- release(ptr.tail, stop);
- when binary_operator =>
- release(ptr.left_operand, stop);
- release(ptr.right_operand, stop);
- when unary_operator =>
- release(ptr.operand, stop);
- when resolution_marker =>
- release(ptr.subgoals, stop);
- when others => null;
- end case;
- free_AST(ptr);
- end if;
-
- end release;
-
- procedure start_parser( input_file, output_file : in string ) is
- begin
- start_token(input_file, output_file);
- end start_parser;
-
- procedure stop_parser is
- begin
- stop_token;
- end stop_parser;
-
-
- -------------------------------------------------------------------------------
- -- --
- -- Parsing Routines --
- -- (in alphabetical order) --
- -- --
- -------------------------------------------------------------------------------
- --
- -- A general convention honored by the parsing routines is that each will
- -- leave current_token pointing to the token after whatever the routine just
- -- parsed. In the case of an error where the routine got lost, it calls one
- -- of parse_to_bracket, parse_to_paren, or parse_to_period to get the parser
- -- back into known territory. Note that these are hierarchical routines in
- -- a sense; parse_to_bracket will also stop when it finds an unbalanced
- -- right parenthesis or a period, and parse_to_paren will stop when it sees
- -- a period. The parsing routines are careful, in this case, not to parse
- -- past the token that represents the termination condition for one of the
- -- routines higher up in the call stack.
- --
-
- --
- -- parse_arguments -- parses the argument list associated with a
- -- predicate call.
- --
- function parse_arguments return argument_ptr is
- arg, return_list : argument_ptr;
- begin
- --
- -- on entry we know that the current token is a left parenthesis
- --
- get_token;
- arg := parse_element;
- return_list := arg; -- we'll return a pointer to the front of the list
- loop
- if current_token.is_a = comma then -- another argument coming
- get_token;
- elsif current_token.is_a = right_bracket then
- error(pointer, "unbalanced right bracket");
- get_token;
- elsif current_token.is_a = right_paren then -- end of argument list
- get_token;
- exit;
- elsif current_token.is_a = period then
- error(pointer, "no terminating ')' for argument list");
- exit;
- else -- must be a syntax error
- error(pointer,"missing argument separator; comma inserted");
- get_token;
- exit;
- end if;
- arg.next_arg := parse_element;
- if arg.next_arg /= null then -- parse_element succeeded
- arg := arg.next_arg;
- end if;
- end loop;
- return return_list;
- end parse_arguments;
-
-
- function parse_clause return AST_ptr is
- clause : AST_ptr := null;
- begin
- if current_token.is_a /= identifier
- then
- error(pointer,"invalid predicate name; clause ignored");
- parse_to_period;
- else
- clause := new AST(implication);
- clause.head := parse_head;
- if current_token.is_a = period then -- just an assertion
- clause.tail := new AST'(fuzzy_value,1.0);
- elsif current_token.is_a = implication then -- we already knew that
- get_token; -- so skip it...
- clause.tail := parse_expression;
- else
- error(pointer,"':-' inserted");
- clause.tail := parse_expression;
- end if;
- end if;
- if current_token.is_a /= period then
- if current_token.is_a = right_paren then
- error(pointer,"unbalanced right parenthesis");
- parse_to_period;
- else
- error(pointer,"unknown parsing error");
- end if;
- end if;
- get_token;
- return clause;
- end parse_clause;
-
- function parse_element return argument_ptr is
- arg, old : argument_ptr := null;
- duplicate, error_flg : boolean := false;
- begin
- case current_token.is_a is
- when character_lit => arg := new argument(character_lit);
- arg.char := current_token.char;
- when float_num => arg := new argument(float_num);
- arg.fp_num := current_token.fp_num;
- when integer_num => arg := new argument(integer_num);
- arg.int_num := current_token.int_num;
- when left_bracket => arg := new argument(prolog_list);
- arg.list := parse_list;
- when identifier => arg := new argument(predicate);
- arg.name := current_token.ident_name;
- get_token;
- if current_token.is_a = left_paren then
- arg.p_arguments := parse_arguments;
- end if;
- when variable => arg := new argument(variable);
- arg.v_name := current_token.var_name;
- when underline => arg := new argument(variable);
- arg.v_name := null; -- it's anonymous
- when others => error(pointer,"illegal or missing element");
- --
- -- if the next token isn't an element
- -- separator then we are totally lost,
- -- so give up on this argument list.
- --
- if (current_token.is_a /= bar) and
- (current_token.is_a /= comma) then
- parse_to_paren;
- end if;
- error_flg := true;
- end case;
- if not error_flg then -- we're ok, and arg.is_a has a value
- if (arg.is_a /= prolog_list) and (arg.is_a /= predicate) then
- get_token;
- end if;
- end if;
- return arg;
- end parse_element;
-
- function parse_expression return AST_ptr is
- --
- -- The basic parsing routines for expressions (which is to say the
- -- entire right half of a clause) are "parse_expression" and "parse_term."
- -- Basically, the parse grammar looks like this:
- --
- -- E --> E binary_op T | unary_op T | T |
- -- E binary_op E | unary_op E
- -- T --> ( E ) | predicate_name
- --
- --
- -- Define a couple of characteristics of operators
- --
- type ary_ness is (unary, binary);
- subtype precedence_value is integer range 0..255;
- --
- -- Define the relative precedence of operators. In keeping with
- -- "Programming in Prolog" by Clocksin and Mellish, lower precedence
- -- operators are executed FIRST. This is counter-intuitive, but seemed
- -- better than conflicting with the definitions in such a popular book.
- -- The values assigned to operators is the same as given in the book.
- -- In the case of two operators of the same precedence, left-to-right
- -- execution occurs. The left-side/right-side precedence referred to
- -- in Clocksin & Mellish is NOT implemented.
- --
- precedence : array (operators) of precedence_value :=
- (semicolon => 254, comma => 253,
- bar => 252, hat => 251, rw_spy => 250,
- rw_nospy => 250, rw_not => 60, rw_is => 40,
- univ => 40, equal => 40, not_equal => 40,
- less_than => 40, less_or_equal => 40, greater_or_equal => 40,
- greater_than => 40, equality => 40, not_equality => 40,
- minus => 31, plus => 31, slash => 21,
- asterisk => 21, rw_mod => 11);
- --
- -- miscellaneous variables
- --
- left, right : AST_ptr;
- operator : token_type := null_token; -- initialize so we can test it later
-
- --
- -- Ary -- returns unary or binary as the type of an operator
- --
- function ary( operator : operators ) return ary_ness is
- begin
- if operator in binary_operators then return binary;
- elsif operator in unary_operators then return unary;
- else error(pointer,"parser.expression.ary called with invalid operator");
- return binary; -- have to return something
- end if;
- end ary;
-
- --
- -- Get_operator -- Since some operators are reserved words, where the
- -- specific word is buried as "current_token.word" this
- -- routine digs it out if necessary and returns a
- -- consistant item of subtype operators.
- --
- function get_operator( thing : token_ptr ) return operators is
- begin
- if thing.is_a in operators then return thing.is_a;
- elsif thing.is_a = reserved_word then
- if (thing.word = rw_is) or else (thing.word = rw_mod) or else
- (thing.word = rw_nospy) or else (thing.word = rw_not) or else
- (thing.word = rw_spy) then return thing.word;
- end if;
- else
- error(pointer,"parser.expression.get_operator called with invalid operator");
- return comma; -- have to return something
- end if;
- end get_operator;
-
- --
- -- Not_operator -- We need to be able to tell if the current token is or
- -- is not an operator. It happens that "not_operator" is
- -- the version we need.
- --
- function not_operator( thing : token_ptr ) return boolean is
- begin
- if thing.is_a in operators then return false;
- elsif thing.is_a = reserved_word then
- if (thing.word = rw_is) or else (thing.word = rw_mod) or else
- (thing.word = rw_nospy) or else (thing.word = rw_not) or else
- (thing.word = rw_spy) then return false;
- else return true;
- end if;
- else return true;
- end if;
- end not_operator;
-
- --
- -- Build_expr -- This is the recursive routine which actually does most of
- -- the work. A single pass of this routine takes the
- -- input operator and its operands and builds an AST node
- -- which is returned as the new left operand.
- --
- procedure build_expr( pending : precedence_value; left : in out AST_ptr;
- in_operator : operators; in_right : AST_ptr ) is
-
- operator : operators := in_operator;
- right : AST_ptr := in_right;
- op2, next_op : operators;
- right2 : AST_ptr;
- begin
- loop
- if not_operator(current_token) then
- --
- -- At the end of this expression or subexpression?
- --
- if current_token.is_a = period or
- current_token.is_a = right_paren then
- if ary(operator) = unary then
- left := new AST'(unary_operator, operator, right);
- else
- left := new AST'(binary_operator, operator, left,right);
- end if;
- else
- --
- -- Hmmm...at this point the current token should be an
- -- operator of some sort. Since it isn't, give up.
- --
- error(pointer,"invalid operator");
- parse_to_paren;
- end if;
- exit; -- one way or another, we're done.
- else
- next_op := get_operator(current_token);
- --
- -- If the next operator has a higher precedence, then execute
- -- it LATER. This means that we go ahead and compress the
- -- current operator and its operands into an AST node. But,
- -- if the right-hand side is null, it is a unary operator
- -- which must be evaluated before we'll even have an operand.
- --
- if (precedence(next_op) >= precedence(operator)) and
- (right /= null) then
- if ary(operator) = unary then
- left := new AST'(unary_operator, operator, right);
- else
- left := new AST'(binary_operator, operator, left,right);
- end if;
- --
- -- If the pending operator has a LOWER precedence then
- -- it needs executed before the next operator. Hence we
- -- exit to allow it to execute.
- --
- exit when precedence(next_op) >= pending;
- --
- -- still here, so shift in the next operator and operand
- --
- operator := next_op;
- get_token;
- if not_operator(current_token) then
- right := parse_term;
- elsif ary(get_operator(current_token)) = unary then
- right := null; -- unary operator is legitimate operand
- else
- error(pointer,"illegal use of operator");
- end if;
- else
- --
- -- The next operator has to be executed before the current
- -- one. Make a recursive call to take care of it.
- --
- op2 := next_op;
- get_token;
- if not_operator(current_token) then
- right2 := parse_term;
- elsif ary(get_operator(current_token)) = unary then
- right2 := null; -- unary operator is legitimate operand
- else
- error(pointer,"illegal use of operator");
- end if;
- build_expr(precedence(operator), right, op2, right2);
- end if;
- end if;
- end loop;
- end build_expr;
-
- begin -- parse_expression
- if not_operator(current_token) then -- it must be the left-hand operand of
- -- a binary operator
- left := parse_term;
- else
- operator := get_operator(current_token);
- if ary(operator) /= unary then
- error(pointer,"missing left-hand operand for binary operator");
- parse_to_paren;
- end if;
- end if;
- if not_operator(current_token) then
- --
- -- A predicate name by itself is ok, so if the current token is a
- -- period things are fine. If it's anything else (we already know
- -- it's not a legal operator) we're lost.
- --
- if current_token.is_a /= period then
- if operator = null_token then
- error(pointer,"missing operator");
- else
- error(pointer,"missing operand");
- end if;
- parse_to_paren;
- end if;
- else
- operator := get_operator(current_token);
- get_token;
- if not_operator(current_token) then
- right := parse_term;
- elsif ary(get_operator(current_token)) = unary then
- right := null; -- unary operator is legitimate operand
- else
- error(pointer,"illegal use of operator");
- end if;
- build_expr(255, left, operator, right);
- end if;
- return left;
- end parse_expression;
-
-
- procedure parse_file( abstract_syntax_tree : out AST_ptr ) is
-
- clause, FP_program : AST_ptr := null;
-
- function first_node(node : AST_ptr) return AST_ptr is
- temp_node : AST_ptr := node;
- begin
- if temp_node /= null
- then
- while temp_node.prev /= null loop
- temp_node := temp_node.prev;
- end loop;
- end if;
- return temp_node;
- end first_node;
-
- begin
- get_token;
- while current_token.is_a /= end_of_file loop
- clause := parse_clause;
- if clause /= null then
- --
- -- append new node to the last node in FP_program. Note that
- -- FP_program points to the last clause parsed. After appending
- -- the new node, then set the "next" pointer in the previous node.
- --
- if FP_program /= null then -- this is not the first node
- FP_program.next := clause;
- clause.prev := FP_program;
- end if;
- FP_program := clause;
- end if;
- end loop;
- --
- -- Point abstract_syntax_tree to the very first node in FP_program
- --
- abstract_syntax_tree := first_node(FP_program);
- exception
- when unexpected_end_of_file =>
- error(pointer,"unexpected end of file");
- abstract_syntax_tree := first_node(FP_program);
- end parse_file;
-
- --
- -- Parse_head -- Parse the head of a clause. This is currently limited to
- -- parsing a single predicate.
- --
- function parse_head return AST_ptr is
- node : AST_ptr;
- begin
- node := parse_predicate;
- return node;
- end parse_head;
-
- --
- -- Parse_list -- Parse a Prolog list structure. Note that lists appear
- -- only in argument lists. they can contain any of the elements which
- -- can occur elsewhere in an argument list. The structure of a list is
- -- similar to a LISP "cons" cell.
- --
- function parse_list return p_list_ptr is
- has_tail, need_elt : boolean;
- temp_elt : argument_ptr;
- root, ptr, ptr2 : p_list_ptr;
- begin
- --
- -- we know the current token is a left bracket
- --
- get_token;
- need_elt := false;
- while current_token.is_a /= right_bracket loop
- temp_elt := parse_element;
- need_elt := false;
- if current_token.is_a = comma then
- has_tail := false;
- need_elt := true;
- get_token;
- elsif current_token.is_a = bar then
- has_tail := true;
- need_elt := true;
- get_token;
- elsif current_token.is_a = right_bracket then
- has_tail := false;
- elsif (current_token.is_a = period) or
- (current_token.is_a = right_paren) then
- error(pointer,"no terminating ']' for list");
- exit;
- else -- we don't have what we expected
- error(pointer,"missing separator; comma inserted");
- has_tail := false;
- need_elt := true;
- end if;
- if root = null then -- first element
- root := new p_list(has_tail);
- root.elt := temp_elt;
- ptr := root;
- else
- if ptr.has_tail then -- this element is the tail
- ptr.tail := temp_elt;
- if need_elt then
- error(pointer,"only a single element allowed in a tail");
- parse_to_bracket;
- need_elt := false;
- end if;
- else -- a normal continuation of the list
- ptr2 := new p_list(has_tail);
- ptr2.elt := temp_elt;
- ptr.next_elt := ptr2;
- ptr := ptr2;
- end if;
- end if;
- end loop;
- if need_elt then
- error(pointer,"missing element in list");
- end if;
- if current_token.is_a = right_bracket then -- finish off list
- get_token;
- end if;
- return root;
- end parse_list;
-
-
- --
- -- Parse_predicate -- Parse a single predicate call
- --
- function parse_predicate return AST_ptr is
- node, temp_node : AST_ptr := null;
- begin
- if current_token.is_a = reserved_word then
- if current_token.word in reserved_predicates then
- node := new AST(reserved_predicate);
- node.predicate := current_token.word;
- get_token;
- if current_token.is_a = left_paren then
- node.r_arguments := parse_arguments;
- end if;
- --
- -- This section of code implements special handling for certain
- -- Fuzzy Prolog reserved predicates. For example, when the
- -- "fuzzy" predicate has an explicit floating point number as
- -- its argument, we go ahead and establish a fuzzy_value node.
- --
- case node.predicate is
- when rw_fuzzy =>
- if node.r_arguments.is_a = float_num then
- temp_node := new AST'(fuzzy_value, node.r_arguments.fp_num);
- release(node, null);
- node := temp_node;
- end if;
- when others => -- no special handling (or not implemented)
- null;
- end case;
- else
- error(pointer,"illegal use of operator");
- end if;
- elsif current_token.is_a = identifier then
- node := new AST(predicate);
- node.name := current_token.ident_name;
- get_token;
- if current_token.is_a = left_paren then
- node.p_arguments := parse_arguments;
- end if;
- elsif current_token.is_a = cut then
- node := new AST'(reserved_predicate,cut,null);
- get_token;
- else
- error(pointer,"illegal use of operator");
- end if;
- return node;
- end parse_predicate;
-
-
- --
- -- Parse_read -- Return a single element for a READ predicate. Disallow
- -- variables as they make no sense in this context.
- --
- procedure parse_read( elt : out argument_ptr; eof : out boolean ) is
- temp_elt : argument_ptr;
- begin
- get_token;
- temp_elt := parse_element;
- if current_token.is_a = end_of_file then
- eof := true;
- else
- if temp_elt.is_a = variable then
- error(pointer, "Variable not allowed on READ");
- elt := null;
- else
- elt := temp_elt;
- end if;
- eof := false;
- end if;
- end parse_read;
-
-
- --
- -- Parse_request -- Parse an interactive user request. This is assumed to be
- -- some expression (just like the tail of a clause). The
- -- short-hand list notation for file consultations is not
- -- currently supported.
- --
- procedure parse_request( abstract_syntax_tree : out AST_ptr;
- eof : out boolean ) is
- begin
- get_token;
- abstract_syntax_tree := parse_expression;
- eof := (current_token.is_a = end_of_file);
- end parse_request;
-
-
- --
- -- Parse_term -- Parse a term within an expression. Terms will either be
- -- predicate calls or subexpressions enclosed within
- -- parentheses.
- --
- function parse_term return AST_ptr is
- node : AST_ptr;
- begin
- if current_token.is_a = left_paren then -- parse a subexpression
- get_token; -- consume the left parenthesis
- node := parse_expression;
- if current_token.is_a = right_paren then -- normal condition
- get_token; -- consume the right parenthesis
- elsif current_token.is_a = period then -- missing right parenthesis
- error(pointer,"right parenthesis inserted");
- else
- error(pointer,"unknown parsing error in parse_term");
- end if;
- elsif current_token.is_a = identifier then -- a predicate
- node := parse_predicate;
- elsif current_token.is_a = reserved_word then -- still a predicate
- node := parse_predicate;
- elsif current_token.is_a = cut then -- the "cut" predicate
- node := parse_predicate;
- elsif current_token.is_a = integer_num then
- node := new AST'(integer_num, current_token.int_num);
- get_token;
- elsif current_token.is_a = float_num then
- node := new AST'(float_num, current_token.fp_num);
- get_token;
- elsif current_token.is_a = character_lit then
- node := new AST'(character_lit, current_token.char);
- get_token;
- elsif current_token.is_a = variable then
- node := new AST'(variable, current_token.var_name);
- get_token;
- else
- error(pointer,"illegal or missing term");
- end if;
- return node;
- end parse_term;
-
- end parser;
-