home *** CD-ROM | disk | FTP | other *** search
- _A HOME BREW C++ PARSER_
- by John M. Dlugosz
-
- [LISTING ONE]
-
- /* TOKENS. */
-
- <error>
- <identifier> => KW_SEARCH
- <operator> => OP_SEARCH
- <punctuator> => OP_SEARCH
- <number>
- <string>
- <eof>
- <type>
-
- /* KEYWORDS. */
-
- auto break case cdecl char class const continue default delete do
- double else enum extern far float for friend goto huge if inline int
- interrupt long near new operator overload pascal private protected
- public register return short signed sizeof static struct switch this
- typedef union unsigned virtual void volatile while
-
- /* OPERATORS. */
-
- || &&
- < <= == > >=
- + - * / %
- ? ++ -- '->'
- ! ~ ^ '|' & >> <<
- = <<= != %= &= *= += -= /= |= >>= ^=
-
- /* PUNCTUATORS. */
-
- '...' . , : ; [ ] { } ( ) ::
-
- /* NONTERMINALS. */
-
- Input -> File_and_tell <eof>
-
- File_and_tell -> File => AllDoneNow 1 /* normal completion */
-
- File -> Item | File Item
-
- Item -> Declaration
- /* or Definition. not in yet. */
-
-
- /**************************************************************
- To recognize a declaration, the storage class and type appear
- once. They are remembered. Each declaration is seperated by
- commas, and share the same type. The FinishedDeclarator calls
- an action for each one found.
- ****************/
-
- Declaration
- -> StorageClass Type_w/const Declarators ;
-
- Declarators
- -> FinishedDeclarator
- -> FinishedDeclarator , Declarators
-
- FinishedDeclarator -> Declarator Initializer? => Declaration 1
-
- /*********************************/
-
-
- Initializer?
- ->
- -> = Expression
- -> = { Expression-List }
- /* -> ( Expression-List ) */
-
- Expression-List
- -> Expression
- -> Expression Expression-List
-
-
- StorageClass
- -> => StoreStorage 0
- -> static => StoreStorage 1
- -> extern => StoreStorage 2
- -> typedef => StoreStorage 3
- -> auto => StoreStorage 4
- -> register => StoreStorage 5
-
- Type_w/const /* const may appear before or after the type name */
- -> Const/Volatile? Type Const/Volatile? => StoreBaseConstVol
-
- Type
- -> char => StoreType 1
- -> signed char => StoreType 2
- -> unsigned char => StoreType 3
- -> int => StoreType 4
- -> short => StoreType 4
- -> short int => StoreType 4
- -> signed int => StoreType 4
- -> signed short => StoreType 4
- -> signed short int => StoreType 4
- -> unsigned => StoreType 5
- -> unsigned int => StoreType 5
- -> unsigned short => StoreType 5
- -> unsigned short int => StoreType 5
- -> long => StoreType 6
- -> signed long => StoreType 6
- -> unsigned long => StoreType 7
- -> float => StoreType 8
- -> double => StoreType 9
- -> long double => StoreType 10
- -> void => StoreType 11
- -> enum Tag => StoreType 12
- -> Class Tag => StoreType 13
- -> union Tag => StoreType 14
-
- Tag
- -> <identifier> => StoreTag 1
- -> <type> => StoreTag 2
-
- Class
- -> struct
- -> class
-
- OverloadableOp -> * | / | = | + /* and all the others */
-
- Elipses? -> | '...'
-
- /* Declarations */
-
- Declarator
- -> Decl2
- -> ReachAttribute * Const/Volatile? Declarator => TypeModifier 3
- -> ReachAttribute & Const/Volatile? Declarator => TypeModifier 4
-
- Decl2
- -> Decl2 ( Arg-Declaration-List ) => TypeModifier 1
- -> Decl2 [ ConstExp? ] => TypeModifier 2
- -> Decl3
-
- Decl3
- -> Dname => Dname 1
- -> ( Declarator )
-
- Const/Volatile? /* const or volotile, neither, or both */
- -> => ConstVol 0
- -> const => ConstVol 1
- -> volatile => ConstVol 2
- -> const volatile => ConstVol 3
- -> volatile const => ConstVol 3
-
-
- ReachAttribute
- -> => ReachType 0
- -> near => ReachType 4
- -> far => ReachType 8
-
- Dname
- -> SimpleDname
- -> <type> :: SimpleDname
-
- SimpleDname
- -> <identifier>
- -> <type>
- -> ~ <type>
- -> Operator-FunctionName
-
- Operator-FunctionName
- -> operator OverloadableOp /* overload operator */
- -> operator <type> /* conversion operator */
- /* this should really allow any abstract type definition, not just
- a simple type name. I'll change it later */
- -> operator <identifier> /* ERROR production */
-
-
- /* Argument list for function declarations */
-
- Arg-Declaration-List
- -> Start-Nested-Type A-Decl-List? Elipses? End-Nested-Type
-
- Start-Nested-Type -> => NestedType 1
- End-Nested-Type -> => NestedType 0
-
- A-Decl-List?
- ->
- -> A-Decl-List
-
- A-Decl-List
- -> A-Decl-List , Argument-Declaration
- -> Argument-Declaration
-
- Argument-Declaration
- -> StorageClass Type_w/const Declarator => Declaration 2
-
- /* Expressions */
-
- ConstExp?
- ->
- -> ConstExp
-
- ConstExp -> Expression /* semantics will check */
-
- Expression
- /* stub out for now */
- -> <identifier>
- -> <number>
- -> <string>
-
-
-
-
- [LISTING TWO]
-
-
- // the node class is central to date representation.
- // Everything it knows is in a node.
-
- enum node_flavor { //state the derived type from a node
- nf_base, nf_type, nf_def
- };
-
- class node {
- protected:
- node();
- virtual ~node();
- public:
- node_flavor flavor;
- virtual void print();
- };
-
- enum primary_t { type_void, type_char, type_int, type_long, type_float,
- type_double, type_ldouble, type_enum, type_class, type_union,
- type_pointer, type_reference, type_array, type_function };
-
- class def_node_list; //forward ref
-
- class type_node : public node {
- public:
- type_node* to_what;
- type_node ();
- ~type_node();
- void print();
- unsigned flags;
- primary_t primary;
- node* secondary() { return to_what; }
- atom tag;
- def_node_list* aggr;
- void stuff_primary (int x, atom tagname);
- bool isConst() { return flags&1; }
- bool isVol() { return flags&2; }
- bool isNear() { return flags&4; }
- bool isFar() { return flags&8; }
- bool isUnsigned() { return flags&16; }
- };
-
- class def_node : public node {
- public:
- atom name;
- int storage_class;
- type_node* type;
- void print();
- def_node (atom n, int store, type_node* t);
- };
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
- /* lists of nodes */
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- class node_list {
- node** list;
- int capacity;
- int count;
- public:
- node_list();
- ~node_list() { delete list; }
- node** access (int x);
- int size() { return count; }
- void add(node* n) { *access(count++) = n; }
- };
-
- #define create_list(TYPE) class TYPE##_node_list : public node_list { \
- public:\
- TYPE##_node*& operator[] (int x) { return *(TYPE##_node **)access(x); } }
-
- create_list (type);
- create_list (def);
-
-
-
- [LISTING THREE]
-
- /*****************************************************
- File: ACTIONS.CPP Copyright 1989 by John M. Dlugosz
- the Actions called from the parser
- *****************************************************/
-
- #include "usual.hpp"
- #include <stream.hpp>
- #include "atom.hpp"
- #include "node.hpp"
- #include "define.hpp"
-
- // #define SHORT return 0
- /* short out actions for testing parser only.
- if something suddenly goes wrong, I can stub
- out all the actions to make sure I'm not walking
- on data somewhere. */
- #define SHORT
- // the normal case.
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- static char last_token[81];
-
- void get_last_token()
- {
- /* copy last token from scanner into a nul-terminated string */
- int len= 80; //maximum sig. length
- extern char *T_beg, *T_end; //from the scanner
- char* source= T_beg;
- char* dest= last_token;
-
- //cout << (unsigned)T_beg << " " << T_end;
- //for (int x= 0; x < 5; x++) cout.put(T_beg[x]);
-
- while (len-- && source < T_end) *dest++ = *source++;
- *dest= '\0';
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- /* in a declaration, a storage class is the first thing I get. This starts
- it off. Then a ConstVol, a StoreType, and a second ConstVol. The const
- or volatile keyword may appear before or after the type, with equal
- effect. The two bits are ORed together for the final result.
- After this, I get one or more calls to Declaration.
- */
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- // the type I'm building
- struct working_type {
- type_node* this_type; //the tail
- type_node* root_type; //the head
- int base_type;
- atom tag_name; // if base_type has a name
- atom name; //The name of the thing being declared
- int storage_class;
- int const_vol;
- working_type* next;
- } MainType;
-
- working_type* Tx= &MainType;
- /* this is accessed through a pointer because a declarator can be encounted
- while parsing another declarator. This lets me stack them. */
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- static int const_vol_stack[50];
- static int const_vol_stacktop= 0;
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int Declaration (int x,...)
- {
- /* values for x: 1- global or local def.
- 2- parameters
- 3- struct/union list
- */
- SHORT;
- /* This finishes it off. A complete declaration has been found. */
- Tx->this_type->stuff_primary (Tx->base_type, Tx->tag_name);
- Tx->this_type->flags |= Tx->const_vol;
- // build the 'thing' from the type_node and the name.
- store_thing (Tx->root_type, Tx->name, Tx->storage_class, x);
- // Tx->root_type->print();
- // cout.put('\n');
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int StoreBaseConstVol (int x,...)
- {
- SHORT;
- // the first two calls to ConstVol apply here.
- Tx->const_vol = const_vol_stack[--const_vol_stacktop];
- Tx->const_vol |= const_vol_stack[--const_vol_stacktop];
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int StoreType (int x,...)
- {
- SHORT;
- Tx->base_type= x;
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int StoreTag (int x,...)
- {
- SHORT;
- /* called when a struct, union, or enum is parsed. The tag is the last
- token read. After this call, the StoreType call is made with 'union'
- or whatever. */
- get_last_token();
- Tx->tag_name= atoms[last_token];
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int StoreStorage (int x,...)
- {
- SHORT;
- /* this is the first thing called */
- Tx->storage_class= x;
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int Dname (int x,...)
- {
- SHORT;
- /* if x==1, the last token is the name of a thing being declared. If
- x==0, there is no name and this is an abstact declarator. Either
- way, build a type node and store the name. This overwrites the type
- node, as it will be the first thing called. */
-
- if (x) {
- get_last_token();
- Tx->name= atoms[last_token];
- }
- Tx->this_type= new type_node;
- Tx->root_type= Tx->this_type;
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int TypeModifier (int x,...)
- {
- SHORT;
- /* 1 t() 2 t[] 3 *t 4 &t */
-
- switch (x) {
- case 1:
- Tx->this_type->primary= type_function;
- // attach parameter list
- Tx->this_type->aggr= completed_def_list;
- break;
- case 2:
- Tx->this_type->primary= type_array;
- // >> attach size
- break;
- case 3:
- Tx->this_type->primary= type_pointer;
- Tx->this_type->flags |= const_vol_stack[--const_vol_stacktop];
- break;
- case 4:
- Tx->this_type->primary= type_reference;
- Tx->this_type->flags |= const_vol_stack[--const_vol_stacktop];
- break;
- }
- Tx->this_type->to_what= new type_node;
- Tx->this_type= Tx->this_type->to_what;
-
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int ConstVol (int x,...)
- {
- SHORT;
- /* 1-const 2-volatile 3-both */
- const_vol_stack[const_vol_stacktop++]= x;
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int ReachType (int x,...)
- {
- SHORT;
- /* 0-default 1-near 2-far */
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int NestedType (int x, ...)
- {
- SHORT;
- working_type* p;
- if (x) { //start nesting
- p= new working_type;
- p->next= Tx;
- Tx= p;
- }
- else { //restore old type
- p= Tx;
- Tx= Tx->next;
- delete p;
- }
- parameter_list (x); //pass on to DEFINE module
- return 0;
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- int AllDoneNow (int x, ...)
- {
- SHORT;
- cout << "parser complete. \n";
- for (int loop= 0; loop < global_stuff.size(); loop++) {
- global_stuff[loop]->print();
- cout.put ('\n');
- }
- return 0;
- }
-
-
- [LISTING FOUR]
-
- /*****************************************************
- File: ATOM.CPP Copyright 1989 by John M. Dlugosz
- store strings
- *****************************************************/
-
- #include "usual.hpp"
- #include "atom.hpp"
-
- extern "C" void* malloc (unsigned size);
- extern "C" void free (void*);
- extern "C" void* realloc (void*, unsigned);
-
- atom_storage atoms(16);
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- atom_storage::atom_storage (int size)
- {
- count= 0;
- capacity= size;
- list= (char**) malloc (size * sizeof(char*));
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- atom_storage::~atom_storage()
- {
- free (list);
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- extern "C" int strcmp(char*,char*);
- extern "C" char* strdup(char*);
-
- atom atom_storage::operator[] (char* s)
- {
- for (int loop= 0; loop < count; loop++) {
- if (!strcmp(s, list[loop])) return loop; //found it
- }
- if (count == capacity) { // make it bigger
- capacity += capacity/2;
- list= (char**)realloc(list,capacity*sizeof(char*));
- }
- list[count]= strdup(s);
- return count++;
- }
-
-
-
-
- [LISTING FIVE]
-
-
- typedef int atom;
-
- class atom_storage {
- char** list;
- int count;
- int capacity;
- public:
- atom_storage(int size);
- ~atom_storage();
- char* operator[] (atom x) { return list[x]; }
- atom operator[] (char*);
- };
-
- extern atom_storage atoms;
-
-
-
- [LISTING SIX]
-
- /*****************************************************
- File: DEFINE.CPP Copyright 1989 by John M. Dlugosz
- deal with definitions once they are parsed
- *****************************************************/
-
- #include "usual.hpp"
- #include <stream.hpp>
- #include "atom.hpp"
- #include "node.hpp"
- #include "define.hpp"
-
- bool local_level= FALSE;
-
- def_node_list global_stuff;
- struct p_list_struct {
- def_node_list* l;
- p_list_struct* next;
- };
- static p_list_struct *p_list;
- def_node_list* completed_def_list;
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- void store_thing (type_node* t, atom name, int storage_class, int param)
- {
- /* 'param' is passed through from the grammar. If 1, this is a declaration
- for a local or global object. If 2, this is part of a parameter list */
- def_node* n= new def_node (name, storage_class, t);
-
- // file it away somewhere
- switch (param) {
- case 1:
- if (name == -1) {
- // >> I need to get a standard error reporter
- cout << "abstract declarator ignored\n";
- }
- global_stuff.add (n);
- break;
- case 2:
- // >> check it over
- p_list->l->add(n);
- break;
- }
- }
-
- /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-
- void parameter_list (int x)
- {
- p_list_struct* p;
- if (x) {
- p= new p_list_struct;
- p->next= p_list;
- p->l= new def_node_list;
- p_list= p;
- }
- else {
- p= p_list;
- p_list= p_list->next;
- completed_def_list= p->l;
- delete p;
- }
- }
-
-
-