home *** CD-ROM | disk | FTP | other *** search
Lex Description | 1991-07-05 | 18.7 KB | 723 lines |
- %{
-
- /*****************************************************************************
- **** ****
- **** synan.y ****
- **** ****
- **** Copyright (C) A. Dwelly and W.W. Armstrong, 1990. ****
- **** ****
- **** All rights reserved. ****
- **** ****
- **** This is the syntax analyser for the small language `lf` that learns ****
- **** a function using the atree package. Atree is an adaptive ****
- **** logic network package based on work done by Prof. W. W. Armstrong ****
- **** and others in the Department of Computing Science, University of ****
- **** Alberta, and previous work at the Universite de Montreal, and at ****
- **** AT&T Bell Laboratories, Holmdel, N. J. The software demonstrates ****
- **** that networks consisting of many layers of linear threshold ****
- **** elements can indeed be effectively trained. ****
- **** ****
- **** License: ****
- **** A royalty-free license is granted for the use of this software for ****
- **** NON-COMMERCIAL PURPOSES ONLY. The software may be copied and ****
- **** modified provided this notice appears in its entirety and unchanged ****
- **** in all copies, whether changed or not. Persons modifying the code ****
- **** are requested to state the date, the changes made and who made them ****
- **** in the modification history. ****
- **** ****
- **** Warranty: ****
- **** No warranty of any kind is provided with this software. ****
- **** This software is not supported. Neither the authors, nor the ****
- **** University of Alberta, its officers, agents, servants or employees ****
- **** shall be liable or responsible in any way for any damage to ****
- **** property or direct personal or consequential injury of any nature ****
- **** whatsoever that may be suffered or sustained by any licensee, user ****
- **** or any other party as a consequence of the use or disposition of ****
- **** this software. ****
- **** ****
- **** Patent: ****
- **** The use of a digital circuit which transmits a signal indicating ****
- **** heuristic responsibility is protected by U. S. Patent 3,934,231 ****
- **** and others assigned to Dendronic Decisions Limited of Edmonton, ****
- **** W. W. Armstrong, President. ****
- **** ****
- **** A royalty-free license is granted for the use of this patent to ****
- **** run this software for NON-COMMERCIAL PURPOSES ONLY and the ****
- **** extension of this patent license to modified versions of this ****
- **** software is granted provided the purpose is NON-COMMERCIAL ONLY. ****
- **** ****
- **** Modification history: ****
- **** ****
- **** 09.02.10 Initial implementation, A.Dwelly ****
- **** 91.04.15 Port to PC and minor bug fixes, R. Manderscheid ****
- **** 91.05.20 Port to Windows, Monroe Thomas ****
- **** ****
- *****************************************************************************/
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include <windows.h>
- #include "atree.h"
- #include "lf.h"
-
- int line_no;
- int in_int;
- float in_real;
- int tuple_ptr;
- int table_ptr;
- int tmp_max_sz;
- int table_size;
- float **tmp_table;
- bool train_size_flag;
- bool test_size_flag;
- bool largest_flag;
- bool smallest_flag;
- extern prog_type prog;
- %}
-
- %token FUNCTION
- %token DOMAIN
- %token DIMENSIONS
- %token EQUALS
- %token INTEGER
- %token QUANTIZATION
- %token COLON
- %token TRAINING
- %token SET
- %token SIZE
- %token TEST
- %token CODING
- %token LARGEST
- %token SMALLEST
- %token REAL
- %token TREE
- %token MIN
- %token MAX
- %token CORRECT
- %token EPOCHS
- %token VOTE
- %token IDENTIFIER
-
- %% /* Program definition */
-
- program : function_spec tree_spec
- | tree_spec function_spec
- ;
-
- function_spec : FUNCTION dimension function_statements
-
- dimension : DOMAIN DIMENSIONS EQUALS INTEGER
- {
- /* Temporary */
-
- in_int++;
-
- prog.dimensions = in_int;
- prog.quant = (int *) malloc((unsigned)sizeof(int) * in_int);
- MEMCHECK(prog.quant);
-
- prog.quant_step =
- (float *)malloc((unsigned)sizeof(float) * in_int);
- MEMCHECK(prog.quant_step);
-
- prog.train_table =
- (float **)malloc((unsigned)sizeof(float *) * in_int);
- MEMCHECK(prog.train_table);
-
- prog.test_table =
- (float **)malloc((unsigned)sizeof(float *) * in_int);
- MEMCHECK(prog.test_table);
-
- prog.test_table_quant =
- (int **)malloc((unsigned) sizeof(int *) * in_int);
- MEMCHECK(prog.test_table_quant);
-
- prog.largest =
- (float *)malloc((unsigned)sizeof(float) * in_int);
- MEMCHECK(prog.largest);
-
- prog.smallest =
- (float *)malloc((unsigned)sizeof(float) * in_int);
- MEMCHECK(prog.smallest);
-
- prog.string_width =
- (int *)malloc((unsigned)sizeof(int) * in_int);
- MEMCHECK(prog.string_width);
-
- prog.walk_step = (int *)malloc((unsigned)sizeof(int) * in_int);
- MEMCHECK(prog.walk_step);
-
- #pragma warn -sus
-
- prog.random_walk =
- (LPBIT_VEC *)malloc((unsigned)sizeof(LPBIT_VEC) * in_int);
- MEMCHECK(prog.random_walk);
-
- #pragma warn .sus
-
- tmp_table = (float **)malloc((unsigned)sizeof(float *) * in_int);
- MEMCHECK(tmp_table);
- }
-
- function_statements : function_statement
- | function_statements function_statement
-
- function_statement : quantization
- | coding
- | train_table_size
- | train_table
- | test_table_size
- | test_table
- | largest
- | smallest
- ;
-
- quantization : QUANTIZATION EQUALS
- {
- tuple_ptr = 0;
- }
- quant_list
- {
- if (tuple_ptr > prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : too many elements in quantization list on line %d\n", line_no);
- }
- if (tuple_ptr < prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in quantization list on line %d\n", line_no);
- }
- }
- ;
-
- quant_list : INTEGER
- {
- prog.quant[tuple_ptr] = in_int;
- tuple_ptr++;
- }
- | quant_list INTEGER
- {
- prog.quant[tuple_ptr] = in_int;
- tuple_ptr++;
- }
- ;
-
- coding : CODING EQUALS
- {
- tuple_ptr = 0;
- }
- code_list
- {
- if (tuple_ptr > prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : too many elements in coding list on line %d\n", line_no);
- }
- if (tuple_ptr < prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in coding list on line %d\n", line_no);
- }
- }
- ;
-
- code_list : INTEGER
- {
- prog.string_width[tuple_ptr] = in_int;
- }
- COLON INTEGER
- {
- prog.walk_step[tuple_ptr] = in_int;
- tuple_ptr++;
- }
- | code_list INTEGER
- {
- prog.string_width[tuple_ptr] = in_int;
- }
- COLON INTEGER
- {
- prog.walk_step[tuple_ptr] = in_int;
- tuple_ptr++;
- }
- ;
- train_table_size : TRAINING SET SIZE EQUALS INTEGER
- {
- prog.trainset_sz = in_int;
- train_size_flag = TRUE;
- }
- ;
-
- train_table : TRAINING SET EQUALS
- {
- if (!train_size_flag)
- {
- printf("Semantics error : training set defined before size\n");
- exit(1);
- }
- else
- {
- int i;
-
- tmp_max_sz = prog.trainset_sz;
- tuple_ptr = 0;
- table_ptr = 0;
- for (i = 0; i < prog.dimensions; i++)
- {
- tmp_table[i] = (float *)malloc((unsigned)sizeof(float)*prog.trainset_sz);
- MEMCHECK(tmp_table[i]);
- }
- }
- }
- table
- {
- int i;
-
- if (tuple_ptr < prog.trainset_sz)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in training set\n");
- }
- for (i = 0; i < prog.dimensions; i++)
- {
- prog.train_table[i] = tmp_table[i];
- }
- }
-
- test_table_size : TEST SET SIZE EQUALS INTEGER
- {
- prog.testset_sz = in_int;
- test_size_flag = TRUE;
- }
- ;
-
- test_table : TEST SET EQUALS
- {
- if (!test_size_flag)
- {
- printf("Semantics error : test set defined before size\n");
- exit(1);
- }
- else
- {
- int i;
-
- tmp_max_sz = prog.testset_sz;
- tuple_ptr = 0;
- table_ptr = 0;
- for (i = 0; i < prog.dimensions; i++)
- {
- tmp_table[i] =
- (float *)malloc((unsigned)sizeof(float) * prog.testset_sz);
- MEMCHECK(tmp_table[i]);
- prog.test_table_quant[i] =
- (int *)malloc((unsigned) sizeof(int) * prog.testset_sz);
- MEMCHECK(prog.test_table_quant[i]);
- }
- }
- }
- table
- {
- int i;
-
- if (tuple_ptr < prog.testset_sz)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in test set\n");
- }
- for (i = 0; i < prog.dimensions; i++)
- {
- prog.test_table[i] = tmp_table[i];
- }
- }
- ;
-
- table : num
- {
- tmp_table[table_ptr][tuple_ptr] = in_real;
- table_ptr++;
- if (table_ptr == prog.dimensions)
- {
- table_ptr = 0;
- tuple_ptr++;
- }
- if (tuple_ptr > tmp_max_sz)
- {
- printf("Semantics error: too many elements in table\n");
- exit(1);
- }
- }
- | table num
- {
- tmp_table[table_ptr][tuple_ptr] = in_real;
- table_ptr++;
- if (table_ptr == prog.dimensions)
- {
- table_ptr = 0;
- tuple_ptr++;
- }
- if (tuple_ptr > tmp_max_sz)
- {
- printf("Semantics error: too many elements in table\n");
- exit(1);
- }
- }
- ;
-
- num : REAL
- | INTEGER
- {
- in_real = (float) in_int;
- }
- ;
-
- largest : LARGEST EQUALS
- {
- tuple_ptr = 0;
- largest_flag = TRUE;
- }
- largest_list
- {
- if (tuple_ptr > prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : too many elements in largest list on line %d\n", line_no);
- }
- if (tuple_ptr < prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in largest list on line %d\n", line_no);
- }
- }
- ;
-
- largest_list : num
- {
- prog.largest[tuple_ptr] = in_real;
- tuple_ptr++;
- }
- | largest_list num
- {
- prog.largest[tuple_ptr] = in_real;
- tuple_ptr++;
- }
- ;
-
- smallest : SMALLEST EQUALS
- {
- tuple_ptr = 0;
- smallest_flag = TRUE;
- }
- smallest_list
- {
- if (tuple_ptr > prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : too many elements in smallest list on line %d\n", line_no);
- }
- if (tuple_ptr < prog.dimensions)
- {
- prog.error = TRUE;
- printf("Semantics error : not enough elements in smallest list on line %d\n", line_no);
- }
- }
- ;
-
- smallest_list : num
- {
- prog.smallest[tuple_ptr] = in_real;
- tuple_ptr++;
- }
- | smallest_list num
- {
- prog.smallest[tuple_ptr] = in_real;
- tuple_ptr++;
- }
-
- tree_spec : TREE tree_statements
-
- tree_statements : tree_statement
- | tree_statements tree_statement
-
- tree_statement : tree_size
- | max_correct
- | max_epochs
- | vote_no
- ;
-
- tree_size : SIZE EQUALS INTEGER
- {
- prog.tree_sz = in_int;
- }
- ;
-
- max_correct : MIN CORRECT EQUALS INTEGER
- {
- prog.max_correct = in_int;
- }
- ;
-
- max_epochs : MAX EPOCHS EQUALS INTEGER
- {
- prog.max_epochs = in_int;
- }
- ;
-
- vote_no : VOTE EQUALS INTEGER
- {
- if (in_int % 2 != 1)
- {
- printf("Semantics error : vote number is even\n");
- exit(1);
- }
- prog.vote = in_int;
- }
- ;
- %%
-
-
- /* Lexical states */
-
- #define LEX_START 0
- #define LEX_INT 1
- #define LEX_DEC 2
- #define LEX_IDENT 3
- #define LEX_PUNCT 4
- #define LEX_COMMENT 5
- #define LEX_STOP 1000
-
- #define MAX_LEN_BUF 1000
-
- int lexstate;
- int nextchar;
- FILE *yyin;
-
- #define ISCOMMENT(c) (c == '#')
-
- void
- lexinit()
-
- {
- lexstate = LEX_START;
- nextchar = getc(yyin);
- }
-
- isextdigit(c)
-
- char c;
-
- {
- return((c == 'e') || (c == 'E') || (c == '+') || (c == '-') || isdigit(c));
- }
-
- iswhite(c)
-
- char c;
-
- {
- if (c == '\n')
- {
- line_no++;
- return(TRUE);
- }
- else
- {
- return((c == 0) || (c == ' ') || (c == '\t'));
- }
- }
-
- gettoken(str)
-
- char *str;
-
- {
-
- int i;
- int outcode;
-
- static struct tok
- {
- char *token;
- int code;
- } toktab[] =
- {
- "function" , FUNCTION,
- "dimension" , DIMENSIONS,
- "dimensions" , DIMENSIONS,
- "=" , EQUALS,
- "quantization", QUANTIZATION,
- ":" , COLON,
- "coding" , CODING,
- "training" , TRAINING,
- "set" , SET,
- "size" , SIZE,
- "test" , TEST,
- "tree" , TREE,
- "minimum" , MIN,
- "min" , MIN,
- "maximum" , MAX,
- "max" , MAX,
- "correct" , CORRECT,
- "epochs" , EPOCHS,
- "largest" , LARGEST,
- "smallest" , SMALLEST,
- "domain" , DOMAIN,
- "vote" , VOTE,
- NULL ,0
- };
-
- outcode = IDENTIFIER;
-
- for (i = 0; toktab[i].token != NULL; i++)
- {
- if (strcmp(str,toktab[i].token) == 0)
- {
- outcode = toktab[i].code;
- break;
- }
- }
-
- return(outcode);
- }
-
- int
- yylex()
-
- {
- char yytext[MAX_LEN_BUF];
- int bufptr;
- bool found_token;
- int token;
-
- found_token = FALSE;
- bufptr = 0;
-
- while (!found_token)
- {
- switch (lexstate)
- {
- case LEX_START:
- while (iswhite(nextchar))
- {
- nextchar = getc(yyin);
- }
-
- if (ISCOMMENT(nextchar))
- {
- nextchar = getc(yyin);
- lexstate = LEX_COMMENT;
- }
- else if (isdigit(nextchar) || (nextchar == '-'))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- lexstate = LEX_INT;
- }
- else if (isalpha(nextchar))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- lexstate = LEX_IDENT;
- }
- else if (ispunct(nextchar))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- lexstate = LEX_PUNCT;
- }
- else if (nextchar == EOF)
- {
- lexstate = LEX_STOP;
- }
- else
- {
- printf("Lexical error: unrecognized character %d\n",(int) nextchar);
- exit(1);
- }
- break;
-
- case LEX_INT:
- while (isdigit(nextchar))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- }
- if (nextchar == '.')
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- lexstate = LEX_DEC;
- }
- else
- {
- yytext[bufptr] = 0;
- in_int = atoi(yytext);
- token = INTEGER;
- found_token = TRUE;
- }
- break;
-
- case LEX_DEC:
- while (isextdigit(nextchar))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- }
- yytext[bufptr] = 0;
- sscanf(yytext,"%g",&in_real);
- token = REAL;
- found_token = TRUE;
- break;
-
- case LEX_IDENT:
- while (isalpha(nextchar) || isdigit(nextchar))
- {
- yytext[bufptr++] = nextchar;
- nextchar = getc(yyin);
- }
- yytext[bufptr] = 0;
- token = gettoken(yytext);
- found_token = TRUE;
- break;
-
- case LEX_PUNCT:
- yytext[bufptr] = 0;
- token = gettoken(yytext);
- found_token = TRUE;
- break;
-
- case LEX_COMMENT:
- while (nextchar != '\n')
- {
- nextchar = getc(yyin);
- }
- lexstate = LEX_START;
- break;
-
- case LEX_STOP:
- token = 0;
- found_token = TRUE;
- break;
- }
- }
-
- if (lexstate != LEX_STOP)
- {
- lexstate = LEX_START;
- }
- return(token);
- }
-
- #pragma argsused
- #pragma warn -rvl
-
- yyerror(s)
- char *s;
- {
- fprintf(stderr,"Error found on line %d\n",line_no);
- prog.error = TRUE;
- }
-
- #pragma warn .rvl
-