home *** CD-ROM | disk | FTP | other *** search
- /* SLR parser generator
-
- given slr grammar in file `grammar', generate characteristic fsm for it
- `grammar.c' is generated from cfsm to recognize the grammar
- `grammar.o' gives description of state machine
-
- */
-
-
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #include <system.h>
-
- #define gen(p) fputs(p,output)
-
- FILE *grammar, *output, *des;
-
- int states[100], sprod[1000], sposn[500], togen[500],
- tsnext[500], tsposn[500], trn[500], sttype[100],
- nstates=0, nterms=0, nnonterms=0, ntogen=1, trans=0, trs[1000];
-
- char *pptr, *prods[100], terms[50], nonterms[50], exnon[50], trc[1000];
-
- void main()
- {
- char o, ia[100], out[200], rt[50],
- *in, *tr, *aptr, *optr, *rtptr, nxtchr;
-
- int check, prct, z, rules=0, rout, flag=0, nxt=0, cflag,
- i, stype, rdstate, reduce, start, tf, tt, ot;
-
- grammar=fopen("grammar","r"); output=fopen("grammar.c","w");
- des=fopen("grammar.o","w");
-
- if(grammar==NULL || output==NULL || des==NULL)
- {
- printf("file open failure\n"),
- exit(1);
- }
-
- for(i=0; i<100; i++) prods[i]=malloc(10);
-
- *terms='\0'; *nonterms='\0';
-
- /**** get grammar from input file */
-
- for(;;)
- {
- if(fgets(ia,100,grammar)==0) break;
- in=ia;
- while(isspace(*in)) in++;
-
- tr=prods[rules++]; *tr=*in++;
- while(isspace(*in)) in++;
-
- if(!isupper(*tr) || *in++!='-' || *in++!='>')
- printf("bad grammar file\n"),
- fclose(output),
- exit(0);
-
- while(isspace(*in)) in++;
-
- while(*tr++)
- if((*tr=*in++)==' ') --tr;
-
- *(tr-2)='\0';
- }
-
- fclose(grammar);
-
- /**** show grammar */
-
- fprintf(des,"\ninput grammar : found %d productions ...\n\n",rules);
-
- for(i=0; i<rules; i++)
- fprintf(des," %c -> .%s. \n",*prods[i],prods[i]+1);
-
- /**** make list `nonterms' of non-terminals for which productions exist */
-
- for(i=0; i<rules; i++)
- {
- for(flag=0, check=0; check<nnonterms; check++)
- if(prods[i][0]==nonterms[check]) flag=1;
-
- if(!flag) nonterms[nnonterms++]=prods[i][0];
- }
-
- nonterms[nnonterms]='\0';
- fprintf(des,"\nnon-terminals are .%s.\n",nonterms);
-
- /**** check that every rhs non-terminal has a production,list non-terminals */
-
- for(i=0; i<rules; i++)
- {
- tr=prods[i];
- while(*++tr)
- {
- flag=0;
- if(isupper(*tr))
- {
- aptr=nonterms;
- while(*aptr) if(*aptr++==*tr) flag=1;
-
- if(!flag)
- printf("can\'t find production for %c in rule %s\n",
- *tr,prods[rout]),
- exit(1);
- }
- else
- {
- aptr=terms;
- while(*aptr) if(*aptr++==*tr) flag=1;
- if(!flag)
- { nterms++; *aptr++=*tr; *aptr='\0';}
- }
- }
- }
- fprintf(des,"\nterminals are .%s.\n",terms);
-
- /**** find starting S rule ... usually the first, must be unique */
-
- for(start=-1, i=0; i<rules; i++)
- if(prods[i][0]=='S')
- if(start==-1)
- start=i;
- else
- printf("starting rule must be unique\n"),
- exit(1);
-
-
- /**** expand the grammar into a characteristic fsm */
-
- tsnext[0]=0, tsposn[0]=0, tsnext[1]=-1, tsposn[1]=-1, togen[0]=0;
-
- while(ntogen>nstates) /* while there are states to expand, do nstate'th */
- {
- if(nstates>0)
- {
- nxt=states[nstates-1];
- while(sprod[nxt++]!=-1);
- }
- else
- nxt=0;
-
- states[nstates]=nxt; exnon[0]='\0';
- tf=togen[nstates]; tt=nxt;
- rdstate=0; reduce=0;
-
- /* copy unexpanded state to state table */
- while(((sprod[tt]=tsnext[tf])&(sposn[tt]=tsposn[tf]))+1) tt++, tf++;
-
- while(nxt<tt) /* expand each following nterm until complete */
- {
- if(isupper(o=prods[sprod[nxt]][sposn[nxt]+1])) /* non-terminal next? */
- {
- aptr=exnon;
-
- while(*aptr)
- if(*aptr==o)
- break;
- else
- aptr++;
-
- if(*aptr=='\0') /* then we must expand the nterm's productions */
- {
- *aptr++=o; *aptr='\0';
-
- for(i=0; i<rules; i++) /* insert prods for nterm */
- if(prods[i][0]==o)
- {
- sprod[tt]=i; sposn[tt++]=0;
- sprod[tt]=-1; sposn[tt]=-1;
- }
- }
- }
-
- nxt++;
- }
-
- ssort(nstates); nxt=states[nstates++];
- tf=togen[ntogen-1]; while(tsnext[tf++]+1);
-
- /* now copy each branch out of the state into the unexp table and try it */
- while(sprod[nxt]!=-1)
- {
- nxtchr=prods[sprod[nxt]][sposn[nxt]+1];
-
- tf=togen[ntogen-1];
- while(tsnext[tf++]+1);
- ot=tf; togen[ntogen]=tf;
-
- if(nxtchr=='\0')
- {
- trc[trans]=(char)sposn[nxt];
- trs[trans]=nxt++;
- trn[trans++]=-2; reduce=1;
- }
- else
- {
- while((sprod[nxt]!=-1)&& (nxtchr==prods[sprod[nxt]][sposn[nxt]+1]))
- {
- tsnext[ot]=sprod[nxt];
- tsposn[ot++]=sposn[nxt++]+1;
- }
-
- tsnext[ot]=-1; tsposn[ot]=-1;
- trc[trans]=nxtchr; rdstate=1;
-
- if(z=findstate(ntogen))
- trn[trans++]=z;
- else
- trn[trans++]=ntogen++;
- }
- }
-
- trn[trans]=-1;
- trc[trans++]='0'+rdstate+reduce*2;
- }
-
- /**** list cfsm's transitions */
-
- tt=0;
-
- for(i=0; i<nstates; i++)
- {
- fprintf(des,"\nstate %d \n",i);
- while(trn[tt++]!=-1)
- if(trn[tt-1]!=-2)
- fprintf(des," %c %d \n",trc[tt-1],trn[tt-1]);
- else
- fprintf(des," $\n");
-
- stype=trc[tt-1]-'0'; sttype[i]=stype;
- if(stype==1) fprintf(des,"read\n");
- if(stype==2) fprintf(des,"reduce\n");
- if(stype==3) fprintf(des,"inadequate\n");
- }
-
- /**** generate the parser */
-
- tt=0;
-
- fputs("#include <stdio.h>\n#include <system.h>\n\n",output);
- fputs("#define push(a)\t*++sp=a;\n#define pop()",output);
- fputs("\t(sp==stack?(exit(0),*sp):*sp--)\n\n",output);
- fputs("char stack[1000],*sp,res[5000],ia[100],*in;\n\n",output);
- fputs("main()\n{\n\tint state,item,resn=0,finish=0;\n\n",output);
- fputs("\tsp=stack; gets(in=ia); push(0);",output);
- fputs("\n\n\tfor(;;)\n\t{\n\t\tstate=pop(); push(state);\n",output);
- fputs("\t\tif(finish) break;\n\n\t\tswitch(state)\n\t\t{\n",output);
-
- for(i=0; i<nstates; i++) /* generate the cfsm's case statement */
- switch(sttype[i])
- {
- case 1: /* read state */
- fprintf(output,"\t\tcase %2d : push(item=*in++);\n",i);
- fprintf(output,"\n\t\t\tswitch(item){\n");
-
- while(trn[tt++]!=-1)
- fprintf(output,"\t\t\t\tcase \'%c\':push(%d); break;\n",
- trc[tt-1],trn[tt-1]);
-
- fprintf(output,"\t\t\t\tdefault :");
- fprintf(output,"printf(\"parse failed\");exit(0);\n");
- fprintf(output,"\t\t\t} break;\n\n");
- break;
-
- case 2: /* reduce state */
- fprintf(output,"\t\tcase %2d : ",i);
-
- for(ot=0; ot<trc[tt]; ot++)
- fprintf(output,"pop();pop();");
-
- fprintf(output," *--in=\'%c\';\n",prods[sprod[states[i]]][0]);
-
- fprintf(output,"\t\t\tres[resn++]=%d;",sprod[states[i]]);
-
- if(prods[sprod[states[i]]][0]=='S')
- fprintf(output," if(*in==\'S\') finish=1;");
-
- fprintf(output," break;\n\n");
- tt++;tt++;
- break;
-
- case 3: /* inadequate state */
- tf=tt; rtptr=rt;
- while(trn[tf]!=-1)
- if(trn[tf++]!=-2)
- *rtptr++=trc[tf-1];
- else
- o=prods[z=sprod[trs[tf-1]]][0];
-
- *rtptr='\0'; rtptr=rt;
- fprintf(output,"\t\tcase %2d : if(*in==\'%c\'",i,*rtptr++);
-
- while(*rtptr)
- fprintf(output,"||*in==\'%c\'",*rtptr++);
-
- fprintf(output,"){\n\t\t\t\tpush(item=*in++);");
- fprintf(output,"\n\n\t\t\t\tswitch(item){\n"); tf=tt;
-
- while(trn[tt++]!=-1)
- if(trn[tt-1]!=-2)
- fprintf(output,"\t\t\t\t\tcase \'%c\':push(%d); break;\n",
- trc[tt-1],trn[tt-1]);
-
- fprintf(output,"\t\t\t\tdefault :");
- fprintf(output,"printf(\"parse failed\");exit(0);}\n");
- fprintf(output,"\t\t\t\t} else {\n\t\t\t");
-
- while(trn[tf++]!=-2);
-
- for(ot=0; ot<trc[tf-1]; ot++)
- fprintf(output,"pop();pop();");
-
- fprintf(output," *--in=\'%c\';\n",o);
- fprintf(output,"\t\t\t\tres[resn++]=%d; ",z);
- fprintf(output,"\n\t\t\t} break;\n");
- break;
- }
-
- fprintf(output,"\n\t\t}\n\t}\n\n\tprintf(\"parsed: \");\n\tfor(item=0;");
- fprintf(output,"item<resn;item++)\n\t\tprintf(\" %%2d\",res[item]);\n}\n");
-
- fclose(des);
- fclose(output);
- }
-
-
- /**** sort the configuration set for a given state */
-
- ssort(int st)
- {
- int pstart,nstat,t,la,lb;
-
- for(nstat=0, pstart=states[st]; sprod[pstart++]!=-1; nstat++) ;
-
- if(nstat<2) return;
- pstart=states[st];
-
- for(la=0; la<nstat; la++)
- for(lb=pstart; lb<(pstart+nstat-1); lb++)
-
- if(1000*(int)prods[sprod[lb]][sposn[lb]+1]+50*(int)sprod[lb]+sposn[lb]>
- 1000*(int)prods[sprod[lb+1]][sposn[lb+1]+1]+50*(int)sprod[lb+1]+sposn[lb+1])
- {
- t=sprod[lb]; sprod[lb]=sprod[lb+1]; sprod[lb+1]=t;
- t=sposn[lb]; sposn[lb]=sposn[lb+1]; sposn[lb+1]=t;
- }
- }
-
- /**** compare two unexpanded state sets */
-
- int compare(int f1, int f2)
- {
- if(f1==f2) return 0;
- f1=togen[f1]; f2=togen[f2];
-
- while(tsnext[f1]==tsnext[f2] && tsposn[f1++]==tsposn[f2++])
- if(tsnext[f1-1]==-1) return 1;
-
- return 0;
- }
-
- /**** find if state already exists */
-
- int findstate(int state)
- {
- int i;
-
- for(i=0; i<ntogen; i++)
- if(compare(state,i)) return i;
-
- return 0;
- }
-