home *** CD-ROM | disk | FTP | other *** search
- /* A demonstration of how to use Kirstein's zero crossing interval
- distributions to do a speaker dependent speech recognition of isolated
- words, using a PC and a Sound-Blaster compatible sound card.
-
- Though I compiled it using Turbo-C 2.0, it should take little, if any,
- adaptions to have a different compiler process it.
-
- Please read sbrecog.doc for further information.
-
- Johannes Kiehl, Trier (Germany), (c) 1993
- */
-
- #define CPUSPEED 12
- /* insert here the cpu tact rate in MHz */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <alloc.h>
- #include <math.h>
- #include <io.h>
- #include <fcntl.h>
- #include <string.h>
-
- #include <direct.h> /* SB direct access */
-
- #define TRUE 1
- #define FALSE 0
- #define ERROR(x) {printf("\nAn error occured: "); printf(x); printf("\n");}
- #define MAXDICTIONARY 32
- /* the maxinum number of dictionary entries.
- Must keep it <256, or some variables will overflow
- */
- #define PDELAY CPUSPEED*6
- #define RDELAY CPUSPEED*2.3
-
- typedef int boole;
- typedef unsigned char byte;
- typedef float soundvect[16];
-
- unsigned zerotable[64];
- soundvect dictionary[MAXDICTIONARY],parvect;
- char *identifiers[MAXDICTIONARY];
- int dictsize=0;
- unsigned zerolength=0;
-
- void play_sample(byte *snd,long size)
- { long i;
- byte *wp1;
-
- wp1=snd;
- speaker_on();
- for(wp1=snd,i=0;i<size;i++,wp1++){
- write_data(*wp1); asmdelay(PDELAY);
- }
- speaker_off();
- }
-
- void clip(byte *signal,unsigned size)
- { unsigned i;
-
- for (i=0;i<size;i++)
- if ((byte)signal[i]>128) (byte)signal[i]=255;
- else (byte)signal[i]=0;
- }
-
- void classify(unsigned length)
- /* At a sampling rate of 11kHz, one byte represents 90 microseconds.
- Thus 64 bytes mean an interval of 5.8 ms, or a 86 Hz frequency.
- Fairly sufficient, according to Kristein, who set the lower
- margin of his own implementation at 79 Hz (6.3 ms intervals).
- This means that, for the given sampling rate, 64 classes instead
- of Kristein's 200 are enough
- */
- { zerotable[(length>64)?63:length-1]++;
- }
-
- void analyze(byte *signal,unsigned size)
- { unsigned i;
-
- clip(signal,size);
- for (i=1;i<size;i++) {
- zerolength++;
- if ((byte)signal[i]!=(byte)signal[i-1]) {
- classify(zerolength); zerolength=0;
- }
- }
- }
-
- byte limits[17]={0,1,2,3,4,5,6,7,8,10,12,15,19,25,34,48,64};
-
- void addtovector(byte i,float a)
- /*
- table 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 13 | 16 | 20 | 26 | 35 | 49
- | | | | | | | |-10|-12 |-15 |-19 |-25 |-34 |-48 |-64
- class 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16
- */
- { byte j,size;
-
- j=i/4; while (i>limits[j]) j++;
- size=limits[j]-limits[j-1];
- parvect[j-1]+=(float)a/(float)size;
- }
-
- void analyze_table(void)
- { unsigned max=1;
- byte i;
- char j;
- int h;
-
- for(i=0;i<64;i++) if (zerotable[i]>max) max=zerotable[i];
- for(i=0;i<64;i++) addtovector(i+1,(float)zerotable[i]/(float)max);
- }
-
- void tidyup(void)
- /* set the work space, i.e. parvect and zerotable, to all zeroes.
- Must be done *before* any and every call to analyze
- */
- { byte i;
-
- for(i=0;i<64;i++) {
- parvect[i/4]=0.0;
- zerotable[i]=0;
- }
- }
-
- boole harken(boole quiet)
- { byte threshold=2;
- byte *snd;
- int ctr,w;
- long i,size,maxsize=32768;
- boole result=FALSE;
-
- tidyup();
- if(reset_dsp()!=SBOK) {
- printf("\nError resetting Sound Blaster.\n"); exit(1);
- }
- snd=(byte*)malloc(maxsize); ctr=0;
- if (snd==(byte*)NULL) {
- ERROR("Memory allocation (fct harken)"); exit(0);
- }
- do {
- w=read_data(); asmdelay(RDELAY);
- if (abs(w-128)>threshold) ctr++;
- else ctr=0;
- } while (ctr<16);
- printf("Recording...");
- ctr=i=0;
- do {
- w=read_data(); asmdelay(RDELAY);
- if (abs(w-128)<threshold) ctr++;
- else ctr=0;
- snd[i++]=w;
- } while ((ctr<512) && (i<maxsize));
- printf(" Done (%u)\n",i);
- i-=1024;
- if (i>512) {
- result=TRUE;
- if (!quiet) play_sample(snd,i);
- analyze(snd+16,i);
- analyze_table();
- }
- free(snd);
- return(result);
- }
-
- void training(void)
- { char wk[64],wk2[8];
- boole done,inok;
- byte j,k=0;
-
- printf("You can now train up to %d words. You will be prompted to\n",
- MAXDICTIONARY);
- printf(" enter an ID string for each word, then pronounce it.\n");
- do {
- printf("\nPlease enter ID string #%d, or Q to quit training: ",
- dictsize);
- scanf("%s",wk);
- done=!strcmp(wk,"Q");
- if (!done) {
- if (identifiers[dictsize]==NULL)
- identifiers[dictsize]=malloc(strlen(wk)+1);
- strcpy(identifiers[dictsize],wk);
- do {
- inok=harken(FALSE);
- if (!inok) {
- printf("No word identified. ");
- printf("Try again (Y/N)? Y ");
- scanf("%s",wk2);
- done=!strcmp(wk2,"N");
- if (done) inok=TRUE;
- }
- } while (!inok);
- if (!done) {
- printf("Insert %s into dictionary (Y/N)? Y ",
- wk);
- scanf("%s",wk2);
- if (!!strcmp(wk2,"N")) {
- for(j=0;j<16;j++)
- dictionary[dictsize][j]=
- parvect[j];
- dictsize++;
- }
- }
- }
- } while (!done);
- printf("\nWould you like another training set of the same words to\n");
- printf(" be averaged with the set you just entered (Y/N)? N ");
- scanf("%s",wk);
- if (!!strcmp(wk,"N")) do {
- printf("\nPlease speak again #%d: %s ",k,identifiers[k]);
- do {
- inok=harken(FALSE);
- if (!inok) {
- printf("No word identified. ");
- printf("Please try again ");
- }
- } while (!inok);
- printf("Average %s into dictionary (Y/N)? Y ",wk);
- scanf("%s",wk2);
- if (!!strcmp(wk2,"N")) {
- for(j=0;j<16;j++) {
- dictionary[k][j]+=parvect[j];
- dictionary[k][j]/=2;
- }
- k++;
- }
- } while (k<dictsize);
- }
-
- float contingency(byte n)
- /* compares the parameter vector (parvect) to the nth vector stored
- in the dictionary. Returns a 0<=value<=1 (the "information trans-
- mission rate" or "Transinformationswert") that serves as a measure
- for the similarity of the two vectors. 0.0 means very similar.
-
- Error status: -1.0 -- All matrix fields zero, cannot divide
- */
- { float s,t=0.0;
- byte i,j;
- float pij;
- float cmatrix[3][17];
- float result;
-
- /* copy parameter vectors into matrix, calculate line and
- overall sums
- */
- cmatrix[2][16]=0.0;
- for (i=0;i<2;i++) {
- cmatrix[i][16]=0.0;
- for (j=0;j<16;j++) {
- if (i==0) cmatrix[i][j]=parvect[j];
- else cmatrix[i][j]=dictionary[n][j];
- cmatrix[i][16]+=cmatrix[i][j];
- }
- cmatrix[2][16]+=cmatrix[i][16];
- }
- if (cmatrix[2][16]==0.0) result=-1.0;
- else {
- /* normalize matrix to overall sum=1.0 */
- for (i=0;i<2;i++)
- for (j=0;j<17;j++)
- cmatrix[i][j]/=cmatrix[2][16];
- cmatrix[2][16]=1.0;
- /* calculate column sums */
- for (j=0;j<16;j++) {
- cmatrix[2][j]=0.0;
- for (i=0;i<2;i++)
- cmatrix[2][j]+=cmatrix[i][j];
- }
- /* calculate rate of transmission */
- for (i=0;i<2;i++)
- for (j=0;j<16;j++) {
- s=cmatrix[i][16]*cmatrix[2][j];
- if (s>0.0) {
- pij=cmatrix[i][j];
- s=pij/s;
- if (s>0.0) t=t+pij*(log(s)/log(2));
- }
- }
- result=t;
- }
- return(result);
- }
-
- typedef struct
- {
- byte first[256],second[256];
- } matchstrc;
-
- matchstrc *match(void)
- /* Compares the actual parvect to each one stored in the dictionary;
- returns a pointer to the struct defined above. It contains two lists,
- each shorter than 256 elements: The first list contains the closest
- match or equally close matches. The second list contains elements
- only if one or more matches were found whose contingency value is
- "worse" by no more than 0.001.
- The end of each list is indicated by a zero value! To make this
- possible all the indices stored in the lists are >0; subtract 1
- before using them.
- Contingency values worse than 0.060 are not accepted as matches!
-
- Error status: NULL returned -- dictionary empty
- */
- { byte i,cf,cs;
- int rs,minval=2000;
- int rsarr[MAXDICTIONARY];
- matchstrc *result=NULL;
-
- for (i=0;i<dictsize;i++) {
- rs=(int)(contingency(i)*1000);
- if ((rs>-1000) && (rs<minval)) minval=rs;
- rsarr[i]=rs;
- }
- if (minval<60) {
- result=(matchstrc*)malloc(sizeof(matchstrc));
- if (result!=NULL) {
- cf=cs=0;
- for (i=0;i<dictsize;i++) {
- if (rsarr[i]==minval)
- result->first[cf++]=i+1;
- if (rsarr[i]==minval+1)
- result->second[cs++]=i+1;
- }
- result->first[cf]=0;
- result->second[cs]=0;
- }
- }
- return(result);
- }
-
- void ppmatches(matchstrc m)
- { byte cf=0,cs=0,i;
-
- while (m.first[cf]>0) cf++;
- while (m.second[cs]>0) cs++;
- if (cf==1) printf("%s matched best. ",identifiers[m.first[0]-1]);
- else {
- for (i=0;i<cf;i++) {
- printf("%s",identifiers[m.first[i]-1]);
- if (i<cf-2) printf(", ");
- else if (i<cf-1) printf(" or ");
- else printf(" match best. ");
- }
- }
- if (cs==1) printf("%s comes close",identifiers[m.second[0]-1]);
- else {
- for (i=0;i<cs;i++) {
- printf("%s",identifiers[m.second[i]-1]);
- if (i<cs-2) printf(", ");
- else if (i<cs-1) printf(" and ");
- else printf(" come close.");
- }
- }
- printf("\n");
- }
-
- void recogniser(void)
- { char wk[8];
- boole done,inok;
- byte w;
- matchstrc *matches;
-
- printf("\nNow you can speak words, the program will match them with\n");
- printf(" the trained dictionary and produce the identifiers of\n");
- printf(" the closest matches.\n");
- do {
- /*
- printf("\nRecognise a word (Y/N)? Y ");
- scanf("%s",wk);
- done=!strcmp(wk,"N");
- */
- printf("\n"); done=FALSE;
-
- if (!done) {
- if (harken(TRUE)) {
- matches=match();
- if (matches!=NULL) {
- ppmatches(*matches);
- free(matches);
- }
- else printf("No matches.\n");
- }
- else printf("No word identified during recording\n");
- }
- } while (!done);
- }
-
- void initidents(void)
- { int i;
-
- for(i=0;i<MAXDICTIONARY;i++)
- identifiers[i]=NULL;
- }
-
- main()
- {
- printf("SBRECOG speech recogniser demo\n");
- printf("(c) Johannes Kiehl, Trier 1993\n\n");
- initidents();
- training();
- recogniser();
- }