home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1997 February
/
PCWK0297.iso
/
technika
/
nnmodel
/
datamat.c
< prev
next >
Wrap
C/C++ Source or Header
|
1996-04-18
|
18KB
|
640 lines
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include "nndefs.h"
#include "datamat.h"
#define ARRAYCHUNK 100
extern union {
float f;
long l;
} MissingUnion;
#define MISSING (MissingUnion.f)
void Logit(const char* fmt, ...);
void dump (const char *buf, int count);
/* Record type defines ****
'T' or 't' or ' ' rectype=0 Training record
'C' or 'c' rectype=0 Training record Center point
'S' or 's' rectype=0 Training record Star point
'F' or 'f' rectype=0 Training record Factorial point
'M' or 'm' rectype=0 Training record MultiLevel point
'X' or 'x' rectype=0 Training record Simplex point
'V' or 'v' rectype=1 Verification & test record
'D' or 'd' rectype=2 Deleted (but use for scaling)
'R' or 'r' rectype=2 Rejected (don't suggest,use in scale)
'E' or 'e' rectype=3 Excluded (not used in scaling)
'*' rectype=4 Comment record
'L' or 'l' rectype=5 Field label record
'U' or 'u' rectype=6 Units record
*/
/*
static char BASED_CODE msg1[] = "Out of memory in CDataMat line no=%d";
static char BASED_CODE msg2[] = "Invalid record type in data file <%x> record=%d";
static char BASED_CODE msg3[] = "Error while importing design matrix\nlast record D%02d";
static char BASED_CODE msg4[] = "Error number of columns don't agree record=%d";
static char BASED_CODE msg5[] = "Can't open file <%s>";
static char BASED_CODE msg7[] = "Calculated fields not suppported";
static char BASED_CODE msg8[] = "No data loaded into training or test maxrix due to lags";
static char BASED_CODE msg9[] = "Skipping record(s) with bad record types";
static char BASED_CODE msg10[] = "Rejecting - Bad Scale on record %d field=%d val=%f\n";
*/
void ToUpper (char *s);
int ChkClip (float f, float hi, float lo);
// Call implement serial macro for all the classes declared
// in datamat.h
void ZeroAll(DATAMAT *pD) {
#ifdef VERBOSE
Logit ("Zeroall\n");
#endif
pD->m_version = REVLEVEL;
pD->m_numtests = 0 ;
pD->m_numcols = 0 ;
pD->m_numrows = 0 ;
pD->m_ninputs = 0 ;
pD->m_maxrows = 0;
pD->m_maxtests = 0 ;
pD->m_noutputs = 0 ;
pD->m_maxrows = pD->m_rawcols = pD->m_rawrows = 0;
pD->m_istate = NFobject_exists;
pD->m_icrossref = NULL;
pD->m_ocrossref = NULL;
pD->m_coldesc = NULL;
pD->m_rowdesc = NULL;
pD->m_icoldesc = NULL;
pD->m_ocoldesc = NULL;
pD->m_iarray = NULL;
pD->m_oarray = NULL;
pD->m_itarray = NULL;
pD->m_otarray = NULL;
pD->m_title[0] = pD->m_desc[0] = pD->m_parfname[0] = pD->m_rawfname[0] = 0;
}
// Null Constructor
DATAMAT *DCreateDataMat()
{
DATAMAT *pD;
pD = (DATAMAT*) malloc (sizeof(DATAMAT));
#ifdef VERBOSE
Logit ("CDataMat null constructor\n");
#endif
ZeroAll(pD);
return pD;
}
void ZeroColDesc(DATAMAT *pD, int num,COL_DESC* desc)
{
int i;
for (i=0;i<num;i++) {
desc[i].fscale =
desc[i].foffset = 0.0f;
desc[i].max = 0.0f;
desc[i].min = 0.0f;
desc[i].fieldtype=0;
desc[i].cliphi = MISSING;
desc[i].cliplo = MISSING;
desc[i].colwidth = 90;
desc[i].flag = 0;
desc[i].col_usage = 'N';
strcpy (&desc[i].format[0],"%s");
desc[i].vlab[0]=0;
desc[i].units[0]=0;
if (pD->m_icrossref!=NULL) pD->m_icrossref[i] = pD->m_ocrossref[i] = -1;
}
}
//Destruction
void DDeleteDataMat(DATAMAT *pD)
{
#ifdef VERBOSE
Logit ("CDataMat DeleteContents istate=%lx\n",pD->m_istate);
#endif
if (pD->m_istate & NFnum_col_known) {
free (pD->m_coldesc);
free (pD->m_icrossref);
free (pD->m_ocrossref);
}
free (pD->m_icoldesc);
free (pD->m_ocoldesc);
#ifdef VERBOSE
Logit ("CDataMat Delete Arrays\n");
#endif
if (pD->m_istate&NFtrainmat_loaded) {
if (pD->m_ninputs)
free_2d_floats (pD->m_iarray,pD->m_ninputs);
if (pD->m_noutputs)
free_2d_floats (pD->m_oarray,pD->m_noutputs);
}
if (pD->m_istate&NFtestmat_loaded) {
if (pD->m_ninputs)
free_2d_floats (pD->m_itarray,pD->m_ninputs);
if (pD->m_noutputs)
free_2d_floats (pD->m_otarray,pD->m_noutputs);
}
ZeroAll(pD);
}
void RemoveTestMatrix(DATAMAT *pD)
{
if (pD->m_istate&NFtestmat_loaded) {
if (pD->m_ninputs) free_2d_floats (pD->m_itarray,pD->m_ninputs);
if (pD->m_noutputs) free_2d_floats (pD->m_otarray,pD->m_noutputs);
}
pD->m_istate &= ~NFtestmat_loaded;
pD->m_itarray = NULL;
pD->m_otarray = NULL;
pD->m_numtests=0;
}
void RemoveMatrix(DATAMAT *pD)
{
if (pD->m_istate&NFtrainmat_loaded) {
if (pD->m_ninputs) free_2d_floats (pD->m_iarray,pD->m_ninputs);
if (pD->m_noutputs) free_2d_floats (pD->m_oarray,pD->m_noutputs);
}
if (pD->m_istate&NFtestmat_loaded) {
if (pD->m_ninputs) free_2d_floats (pD->m_itarray,pD->m_ninputs);
if (pD->m_noutputs) free_2d_floats (pD->m_otarray,pD->m_noutputs);
}
pD->m_istate &= ~NFtestmat_loaded;
pD->m_istate &= ~NFtrainmat_loaded;
pD->m_istate &= ~NFunscaled_loaded;
pD->m_iarray = NULL;
pD->m_oarray = NULL;
pD->m_itarray = NULL;
pD->m_otarray = NULL;
}
//Attributes
//member functions to modify the protected member variables
float DGetInputVal(DATAMAT *pD, int row, int col ) {
return pD->m_iarray[col][row];
}
float DGetOutputVal(DATAMAT *pD, int row, int col ) {
return pD->m_oarray[col][row];
}
float DGetInputTVal(DATAMAT *pD, int row, int col ) {
return pD->m_itarray[col][row];
}
float DGetOutputTVal(DATAMAT *pD, int row, int col ) {
return pD->m_otarray[col][row];;
}
void DSetInputVal(DATAMAT *pD, int row, int col, float val ) {
pD->m_iarray[col][row] = val;
}
void DSetInputTVal(DATAMAT *pD, int row, int col, float val ) {
pD->m_itarray[col][row] = val;
}
void DSetOutputVal(DATAMAT *pD, int row, int col, float val ) {
pD->m_oarray[col][row] = val;
}
void DSetOutputTVal(DATAMAT *pD, int row, int col, float val ) {
pD->m_otarray[col][row] = val;
}
float DRescale(DATAMAT *pD, float f,char C,int ix) {
if (f==MISSING) return f;
if (C=='I') {
f /= pD->m_icoldesc[ix].fscale;
f -= pD->m_icoldesc[ix].foffset;
}
if (C=='O') {
f /= pD->m_ocoldesc[ix].fscale;
f -= pD->m_ocoldesc[ix].foffset;
}
return f;
}
float DScale(DATAMAT *pD, float f,char C,int ix) {
if (C=='I') {
f += pD->m_icoldesc[ix].foffset;
f *= pD->m_icoldesc[ix].fscale;
}
if (C=='O') {
f += pD->m_ocoldesc[ix].foffset;
f *= pD->m_ocoldesc[ix].fscale;
}
return f;
}
char * RescaleFmt(DATAMAT *pD,float f,char C,int ix) {
float val;
char buf[40];
if (f==MISSING) return " . ";
val = DRescale(pD,f,C,ix);
if (C=='O') {
sprintf (buf,&pD->m_ocoldesc[ix].format[0],val);
strncpy (&pD->m_ocoldesc[ix].convstr[0],buf,20);
pD->m_ocoldesc[ix].convstr[19]=0;
return &pD->m_ocoldesc[ix].convstr[0];
}
if (C=='I') {
sprintf (buf,&pD->m_icoldesc[ix].format[0],val);
strncpy (&pD->m_icoldesc[ix].convstr[0],buf,20);
pD->m_icoldesc[ix].convstr[19]=0;
return &pD->m_icoldesc[ix].convstr[0];
}
return "BadC";
}
void DReCalcScalingFactor(COL_DESC *cold, int cols)
{
int i;
float range;
for (i=0;i<cols;i++) {
if (cold[i].fieldtype==FIELDTYPE_FLOAT) {
range = cold[i].max-cold[i].min;
if (cold[i].min < 0.0f) cold[i].foffset = (float) fabs(cold[i].min);
else cold[i].foffset = -cold[i].min;
if (range!=0.0f) cold[i].fscale = 1.0f/range;
else {
cold[i].fscale=1.0f;
cold[i].foffset += .5f;
}
cold[i].fscale *= .6f;
cold[i].foffset += ((float)fabs(range) * .333333f);
}
}
return;
}
int dtransl(char *cdummy)
{
int val=0;
if (cdummy[0] == 'D') {
sscanf(&cdummy[1],"%d",&val);
if (val > 8) return -1;
return val;
}
if (cdummy[0] == 'M') return 0;
if (cdummy[0] == 'T') return 1000;
return -1;
}
int DImportDataMat(DATAMAT *pD, FILE *fd) {
int i,numtrain,numtest;
char cdummy[128];
int sel,stat,lastsel;
float f;
time_t ttime;
sel=0;
#ifdef VERBOSE
Logit("Start import DM\n");
#endif
numtrain = numtest = 0;
top:
stat = fscanf (fd,"%s",&cdummy);
if (stat==EOF) {
#ifdef VERBOSE
Logit("Finished import DM\n");
#endif
return 0;
}
lastsel = sel;
sel = dtransl(cdummy);
// Logit("Reading %s\n",cdummy);
switch (sel) {
default: //error
goto errorexit;
break;
case 0: //training data
for(i=0;i<pD->m_ninputs;i++) {
fscanf (fd,"%e",&f);
DSetInputVal(pD,numtrain,i,DScale(pD,f,'I',i));
}
for(i=0;i<pD->m_noutputs;i++) {
fscanf (fd,"%e",&f);
DSetOutputVal(pD,numtrain,i,DScale(pD,f,'O',i));
}
numtrain++;
break;
case 1000: // test data
for(i=0;i<pD->m_ninputs;i++) {
fscanf (fd,"%e",&f);
DSetInputTVal(pD,numtest,i,DScale(pD,f,'I',i));
}
for(i=0;i<pD->m_noutputs;i++) {
fscanf (fd,"%e",&f);
DSetOutputTVal(pD,numtest,i,DScale(pD,f,'O',i));
}
numtest++;
break;
case 1:
fscanf (fd,"%lu %d %d %d %d %d %d %d\n",&pD->m_istate,&pD->m_numcols,&pD->m_numrows,
&pD->m_ninputs,&pD->m_noutputs,&pD->m_rawrows,&pD->m_rawcols,&pD->m_total);
if (pD->m_istate&NFnum_col_known) {
pD->m_coldesc = (COL_DESC*) malloc (sizeof(COL_DESC)*pD->m_numcols);
pD->m_icrossref = (int*) malloc (sizeof(int)*pD->m_numcols);
pD->m_ocrossref = (int*) malloc (sizeof(int)*pD->m_numcols);
for (i=0;i<pD->m_numcols;i++) {
pD->m_icrossref[i]=0;
pD->m_ocrossref[i]=0;
}
for (i=0;i<pD->m_numcols;i++) {
pD->m_coldesc[i].fscale = 0;
pD->m_coldesc[i].foffset = 0;
pD->m_coldesc[i].cliphi = pD->m_coldesc[i].cliplo = MISSING;
pD->m_coldesc[i].max = 0;
pD->m_coldesc[i].min = 0;
pD->m_coldesc[i].flag = 0;
pD->m_coldesc[i].fieldtype = 0;
pD->m_coldesc[i].col_usage = 'U';
pD->m_coldesc[i].format[0] = 0;
pD->m_coldesc[i].vlab[0] = 0;
pD->m_coldesc[i].units[0] = 0;
pD->m_coldesc[i].colwidth = 90;
}
}
pD->m_istate = NFobject_exists | NFnum_col_known |
NFtrainmat_loaded | NFi_o_col_known;
if (pD->m_istate&NFtrainmat_loaded) {
pD->m_icoldesc = (COL_DESC*) malloc (sizeof(COL_DESC)*pD->m_ninputs);
pD->m_ocoldesc = (COL_DESC*) malloc (sizeof(COL_DESC)*pD->m_noutputs);
if (pD->m_numrows) {
pD->m_iarray = alloc_2d_floats (pD->m_ninputs,pD->m_numrows);
pD->m_oarray = alloc_2d_floats (pD->m_noutputs,pD->m_numrows);
} else pD->m_istate &= ~NFtrainmat_loaded;
}
break;
case 2:
fscanf (fd,"%s",cdummy);
strncpy (pD->m_title,cdummy,MAXCSTRING);
fscanf (fd,"%s",cdummy);
strncpy (pD->m_desc,cdummy,MAXCSTRING);
fscanf (fd,"%s",cdummy);
strncpy (pD->m_rawfname,cdummy,MAXCSTRING);
fscanf (fd,"%s",cdummy);
strncpy (pD->m_parfname,cdummy,MAXCSTRING);
fscanf (fd,"%ld",&ttime);
// if (ttime) pD->m_creation = ttime;
// else pD->m_creation = CTime::GetCurrentTime();
pD->m_istate |= (NFtitle_known | NFdesc_known |
NFraw_file_known );
break;
case 3:
fscanf (fd,"%d",&i);
fscanf (fd,"%d %d %f %f %f %f %c %s %s",
&pD->m_coldesc[i].flag,
&pD->m_coldesc[i].fieldtype,
&pD->m_coldesc[i].fscale,
&pD->m_coldesc[i].foffset,
&pD->m_coldesc[i].max,
&pD->m_coldesc[i].min,
&pD->m_coldesc[i].col_usage,
&pD->m_coldesc[i].format,
&pD->m_coldesc[i].vlab);
pD->m_coldesc[i].units[0] = 0;
pD->m_coldesc[i].cliphi = pD->m_coldesc[i].cliplo = MISSING;
pD->m_coldesc[i].colwidth = 90;
pD->m_istate |= NFcol_usage_known;
break;
case 4:
for (i=0;i<pD->m_numcols;i++) fscanf(fd,"%d",&pD->m_icrossref[i]);
break;
case 5:
for (i=0;i<pD->m_numcols;i++) fscanf(fd,"%d",&pD->m_ocrossref[i]);
break;
case 6:
fscanf (fd,"%d",&i);
fscanf (fd,"%d %d %f %f %f %f %c %s %s",
&pD->m_icoldesc[i].flag,
&pD->m_icoldesc[i].fieldtype,
&pD->m_icoldesc[i].fscale,
&pD->m_icoldesc[i].foffset,
&pD->m_icoldesc[i].max,
&pD->m_icoldesc[i].min,
&pD->m_icoldesc[i].col_usage,
&pD->m_icoldesc[i].format,
&pD->m_icoldesc[i].vlab);
pD->m_icoldesc[i].units[0] = 0;
pD->m_icoldesc[i].colwidth = 90;
pD->m_icoldesc[i].cliphi = pD->m_icoldesc[i].cliplo = MISSING;
pD->m_istate |= NFcol_usage_known | NFi_o_col_known;
break;
case 7:
fscanf (fd,"%d",&i);
fscanf (fd,"%d %d %f %f %f %f %c %s %s",
&pD->m_ocoldesc[i].flag,
&pD->m_ocoldesc[i].fieldtype,
&pD->m_ocoldesc[i].fscale,
&pD->m_ocoldesc[i].foffset,
&pD->m_ocoldesc[i].max,
&pD->m_ocoldesc[i].min,
&pD->m_ocoldesc[i].col_usage,
&pD->m_ocoldesc[i].format,
&pD->m_ocoldesc[i].vlab);
pD->m_ocoldesc[i].units[0] = 0;
pD->m_ocoldesc[i].colwidth = 90;
pD->m_ocoldesc[i].cliphi = pD->m_ocoldesc[i].cliplo = MISSING;
pD->m_istate |= NFcol_usage_known | NFi_o_col_known;
break;
case 8: //NUMBER OF TEST ROWS
fscanf(fd,"%d",&pD->m_numtests);
if (pD->m_numtests) {
pD->m_itarray = alloc_2d_floats (pD->m_ninputs,pD->m_numtests);
pD->m_otarray = alloc_2d_floats (pD->m_noutputs,pD->m_numtests);
pD->m_istate |= NFtestmat_loaded;
}
break;
}
goto top;
errorexit:
return -1;
}
char * DRescaleFmt(DATAMAT *pD, float f,char C,int ix) {
float val;
char buf[40];
if (f==MISSING) return " . ";
val = DRescale(pD,f,C,ix);
if (C=='O') {
sprintf (buf,&pD->m_ocoldesc[ix].format[0],val);
strncpy (&pD->m_ocoldesc[ix].convstr[0],buf,20);
pD->m_ocoldesc[ix].convstr[19]=0;
return &pD->m_ocoldesc[ix].convstr[0];
}
if (C=='I') {
sprintf (buf,&pD->m_icoldesc[ix].format[0],val);
strncpy (&pD->m_icoldesc[ix].convstr[0],buf,20);
pD->m_icoldesc[ix].convstr[19]=0;
return &pD->m_icoldesc[ix].convstr[0];
}
return "BadC";
}
void printclip (FILE *fd,float hi, float lo) {
if (hi==MISSING) fprintf (fd," cliphi=MISSING");
else fprintf (fd," cliphi=%f",hi);
if (lo==MISSING) fprintf (fd," cliplo=MISSING\n");
else fprintf (fd," cliplo=%f\n",lo);
}
void DumpDataMat(DATAMAT *pD,FILE *fd) {
int i,j;
int nodata;
int maxnum;
static char fmt1[] =
"Dump of DataMat istate=%lx title=%s\ndesc=%s\nraw=%s\nparfname=%s\
\nnumcols=%d numrows=%d numtests=%d nin=%d nout=%d rawrows=%d rawcols=%d\
\nmaxrows=%d maxtests=%d sep=%d\n";
static char fmt2[] =
"flag=%d ftype=%d fscale=%f foffset=%f max=%f min= %f usage=%c format=<%s> vlab=<%s> units=<%s> colw=%d";
static char fmt3[] =
"numfact=%d numresp=%d dtype=%d dsel=%d scope=%d phase=%d ncenter=%d\nmode=%d scale=%f\n";
fprintf (fd,fmt1,pD->m_istate,(const char *)pD->m_title,(const char *)pD->m_desc,
(const char *)pD->m_rawfname,(const char *)pD->m_parfname,
pD->m_numcols,pD->m_numrows,pD->m_numtests,
pD->m_ninputs,pD->m_noutputs,pD->m_rawrows,pD->m_rawcols,pD->m_maxrows,pD->m_maxtests,pD->m_sep);
if ((pD->m_numcols<1) || (pD->m_numrows<0)) {
fprintf (fd,"Bad DM\n");
return;
}
if (pD->m_istate&NFnum_col_known) {
fprintf (fd,"icross = ");
for (i=0;i<pD->m_numcols;i++)
fprintf (fd,"%02d ",pD->m_icrossref[i]);
fprintf (fd,"\nocross = ");
for (i=0;i<pD->m_numcols;i++)
fprintf (fd,"%02d ",pD->m_ocrossref[i]);
fprintf (fd,"\n");
}
if (pD->m_coldesc) {
for (i=0;i<pD->m_numcols;i++) {
fprintf (fd,fmt2,
pD->m_coldesc[i].flag,
pD->m_coldesc[i].fieldtype,
pD->m_coldesc[i].fscale,
pD->m_coldesc[i].foffset,
pD->m_coldesc[i].max,
pD->m_coldesc[i].min,
pD->m_coldesc[i].col_usage,
pD->m_coldesc[i].format,
pD->m_coldesc[i].vlab,
pD->m_coldesc[i].units,
pD->m_coldesc[i].colwidth);
printclip (fd,pD->m_coldesc[i].cliphi,pD->m_coldesc[i].cliplo);
}
}
fprintf(fd,"\n");
for (i=0;i<pD->m_ninputs;i++) {
fprintf (fd,fmt2,
pD->m_icoldesc[i].flag,
pD->m_icoldesc[i].fieldtype,
pD->m_icoldesc[i].fscale,
pD->m_icoldesc[i].foffset,
pD->m_icoldesc[i].max,
pD->m_icoldesc[i].min,
pD->m_icoldesc[i].col_usage,
pD->m_icoldesc[i].format,
pD->m_icoldesc[i].vlab,
pD->m_icoldesc[i].units,
pD->m_icoldesc[i].colwidth);
printclip (fd,pD->m_icoldesc[i].cliphi,pD->m_icoldesc[i].cliplo);
}
for (i=0;i<pD->m_noutputs;i++) {
fprintf (fd,fmt2,
pD->m_ocoldesc[i].flag,
pD->m_ocoldesc[i].fieldtype,
pD->m_ocoldesc[i].fscale,
pD->m_ocoldesc[i].foffset,
pD->m_ocoldesc[i].max,
pD->m_ocoldesc[i].min,
pD->m_ocoldesc[i].col_usage,
pD->m_ocoldesc[i].format,
pD->m_ocoldesc[i].vlab,
pD->m_ocoldesc[i].units,
pD->m_ocoldesc[i].colwidth);
printclip (fd,pD->m_ocoldesc[i].cliphi,pD->m_ocoldesc[i].cliplo);
}
nodata =1;
if (pD->m_istate&NFtrainmat_loaded) {
nodata = 0;
maxnum = pD->m_numrows;
// if (maxnum > 10) maxnum=10;
for(i=0;i<pD->m_ninputs;i++) fprintf (fd,"\t%s",pD->m_icoldesc[i].vlab);
fprintf (fd," ==>");
for(i=0;i<pD->m_noutputs;i++) fprintf (fd,"\t%s",pD->m_ocoldesc[i].vlab);
fprintf (fd,"\n");
for (j=0;j<maxnum;j++) {
fprintf (fd,"D ");
for(i=0;i<pD->m_ninputs;i++) {
fprintf (fd,"\t");
fprintf (fd,"%s",DRescaleFmt(pD,DGetInputVal(pD,j,i),'I',i));
}
fprintf (fd," ");
for(i=0;i<pD->m_noutputs;i++) {
fprintf (fd,"\t");
fprintf (fd,"%s",DRescaleFmt(pD,DGetOutputVal(pD,j,i),'O',i));
}
fprintf (fd,"\n");
}
// if (m_numrows>10) fprintf (fd,"Only 10 out of %d shown\n",m_numrows);
for (j=0;j<pD->m_numtests;j++) {
fprintf (fd,"T ");
for(i=0;i<pD->m_ninputs;i++) {
fprintf (fd,"\t");
fprintf (fd,"%s",DRescaleFmt(pD,DGetInputTVal(pD,j,i),'I',i));
}
fprintf (fd," ");
for(i=0;i<pD->m_noutputs;i++) {
fprintf (fd,"\t");
fprintf (fd,"%s",DRescaleFmt(pD,DGetOutputTVal(pD,j,i),'O',i));
}
fprintf (fd,"\n");
}
}
if (nodata) fprintf (fd,"\nNo data loaded\n");
}