home *** CD-ROM | disk | FTP | other *** search
- program REGRESS_FILE;
- uses MATRIXU,CRT;
-
- {by Josh Mitteldorf
- 6800 Scotforth Rd
- Philadelphia, PA 19119
- 70441,1305 }
-
-
- {Performs a linear regression (best line fit) on an ASCII-formatted file.
- File should be organized in columns, with only whitespace between the
- columns. You may specify at run time which column contains the dependent
- variable and which contains the independent variables. Columns which
- are not specified as either are read in and skipped. They may contain
- non-numeric data without causing an error. However, non-numeric data in
- any variable cell will cause an entire row of numbers to be skipped over,
- with a message to the user.
-
- If a row is found with more or less than the specified number of columns,
- the program attempts to skip that row and move on.}
-
- const Nmax=20; NSQ=400; cmax=50; maxstr=80;
-
- type float=double;
- vector=array[1..Nmax] of float;
- matrix=array[1..NSQ] of float;
-
- const errval:float=9.87654321987654321E37; ef=#26; space=' '; tab=#9;
- cr=#13; lf=#10; crlf:array[1..2] of char=(cr,lf);
-
- var f :text;
- xxsum,xxavg,xxvar,xinv :matrix;
- xavg,xyavg,xsum,xysum,x,
- xavgyavg,aw,num :vector;
- y,ysum,yysum,yavg,yyavg,norm,
- yvar,dy,dyf :float;
- n,d,count,t1,t2,t3,cols,ycol,nmiss :integer;
- g :file of float;
- xcol :array[1..Nmax] of integer;
- filename :string[maxstr];
- vertical :boolean;
-
- procedure CumRegistersToZero;
- var i,j :integer;
- begin
- ysum:=0; yysum:=0; n:=0; nmiss:=0;
- for i:=1 to d do begin
- xsum[i]:=0;
- xysum[i]:=0;
- for j:=1 to d do xxsum[d*pred(i)+j]:=0;
- end;
- end;
-
- procedure Accumulate;
- var k,i,j :integer;
- xi :float;
- begin
- inc(n);
- {$ifdef DIAGNOSTIC} write(n:4,' '); {$endif}
- ysum:=ysum+y; yysum:=yysum+sqr(y);
- for i:=1 to d do begin
- xi:=x[i];
- xsum[i]:=xsum[i]+xi;
- xysum[i]:=xysum[i]+y*xi;
- for j:=i to d do xxsum[d*pred(i)+j]:=xxsum[d*pred(i)+j]+xi*x[j];
- end; {Upper half only, for speed}
- end;
-
- procedure SumsToAverages;
- var i,j :integer;
-
- begin
- if (n-d-1<=0) then begin
- writeln(crlf,'Not enough points for regression.');
- writeln('You have ',n,' lines of valid data and ',d,' regression variables.');
- halt; end;
- norm:=1/n;
- yavg:=ysum*norm; yyavg:=yysum*norm;
- {$ifdef DIAGNOSTIC} write('XSUM: '); vecprint(xsum,d); {$endif}
- ScalarMultV(addr(xsum),norm,addr(xavg),d);
- {$ifdef DIAGNOSTIC} write('XAVG: '); vecprint(xavg,d); {$endif}
- ScalarMultV(addr(xavg),yavg,addr(xavgyavg),d);
- ScalarMultV(addr(xysum),norm,addr(xyavg),d);
- ScalarMult(addr(xxsum),norm,addr(xxavg),d);
- {$ifdef DIAGNOSTIC} write('XXAVG: '); for i:=1 to d do write('[',i,']=',xxavg[{pred(1)*d+} i]:11:6,' '); writeln; {$endif}
- for i:=1 to d do for j:=succ(i) to d do xxavg[d*pred(j)+i]:=xxavg[d*pred(i)+j];
- {Because we saved time and computed upper half before}
- yvar:=yyavg-sqr(yavg);
- end;
-
- procedure ReportMissing;
- begin
- inc(nmiss);
- write(crlf,'Missing data in line ',n + nmiss,'.');
- end;
-
- procedure Regression;
- var xavg2 :matrix;
- wv :vector;
- i,j :integer;
- wr :float;
- c :array[1..cmax] of float;
- ok,warned :boolean;
- ch :char;
-
- procedure DistributeAndCheckData;
- var i :integer;
- begin
- y:=c[ycol];
- ok:=(y<>errval);
- for i:=1 to d do begin
- x[i]:=c[xcol[i]];
- ok:=(ok and (x[i]<>errval));
- end;
- repeat read(f,ch) until ((eof(f) or not (ch in [space,tab])));
- if ((ch<>cr) and (ch<>ef)) then begin
- write(crlf,'Warning! Unexpected number of columns in line ',1+n+nmiss,'.');
- ok:=false; warned:=true; inc(nmiss);
- end
- else warned:=false;
- repeat read(f,ch) until ((ch=lf) or (eof(f)));
- if (eof(f) and (x[1]=0) and (y=0)) then ok:=false;
- {A blank line at the end of the file reads in as a row of zeros.}
- end;
-
-
- begin {Regression}
- CumRegistersToZero;
- repeat
- i:=1;
- for i:=1 to cols do begin
- {$I-} read(f,c[i]);
- if (ioresult>0) then c[i]:=errval;
- { write(c[i]:16); }
- {$I+} end;
- DistributeAndCheckData;
- {$ifdef DIAGNOSTIC} write(y:8:5); for i:=1 to d do write(x[i]:8:5); writeln; {$endif}
- if (ok) then Accumulate else if (warned) or (eof(f) and (x[1]=0) and (y=0)) then else ReportMissing;
- until (ioresult>0) or (eof(f));
- SumsToAverages;
- Diadic(addr(xavg),addr(xavg),addr(xavg2),d);
- MatDif(addr(xxavg),addr(xavg2),addr(xxvar),d);
-
- VecDif(addr(xyavg),addr(xavgyavg),addr(num),d);
- {$ifdef DIAGNOSTIC}
- writeln('Y AVG: ',yavg:11);
- write(crlf,'XY AVG:'); for i:=1 to d do write(xyavg[i]:11,' ');
- write(crlf,'XAVYAV:'); for i:=1 to d do write(xavgyavg[i]:11,' ');
- write(crlf,'NUM: '); for i:=1 to d do write(num[i]:11,' ');
- write(crlf,'DEN: '); for i:=1 to d do begin
- for j:=1 to d do write(xxvar[i,j]:11,' '); write(crlf,' ');
- end; {$endif}
- Solve(addr(xxvar),addr(num),addr(aw),d);
- Prod(addr(xxvar),addr(aw),addr(wv),d); {wv:=σ²(x) · a}
- dy:=sqrt(yvar - dot(addr(wv),addr(aw),d)); {δy²= σ²(y) - a²σ²(x)}
- dyf:=dy/sqrt(n-d-1);
- { for i:=1 to d do write(dyf/sqrt(xxavg[d*pred(i)+i]-xavg2[d*pred(i)+i]):9:5); writeln; (This is my own measure.) }
- Inverse(addr(xxvar),addr(xinv),d);
- end;
-
- procedure Report;
- var i :integer;
- begin
- clrscr;
- write('INDEPENDENT: col ',ycol,' DEPENDENT:'); for i:=1 to d do write(' ',xcol[i]);
- writeln(crlf,n,' point regression with ',d,' variables. ',n-d-1,' degrees of freedom.');
- writeln('R² = ',Dot(addr(aw),addr(num),d)/yvar:9:5);
- writeln('δY (RMS deviance from line) = ',dy:9:5,',');
- writeln('Compare σ(Y) = ',sqrt(yvar):9:5);
- if (vertical) then begin
- writeln(crlf,' COL COEFFICIENT STD ERROR STD DEVIATION PROP. OF VARIANCE');
- writeln( '-------- ----------- --------- ------------- -----------------');
- writeln( ' const ',yavg-dot(addr(xavg),addr(aw),d):13:7,dy:15:7);
- for i:=1 to d do
- writeln(xcol[i]:5,aw[i]:16:7,dyf*sqrt(xinv[d*pred(i)+i]):15:7,sqrt(xxvar[pred(i)*d+i]):18:5,
- xxvar[pred(i)*d+i]*sqr(aw[i])/yvar:21:5);
- end
- else begin
- writeln('B = ',yavg-dot(addr(xavg),addr(aw),d):9:5,' (Std error=δY)');
- writeln('Coefficient vector A:');
- for i:=1 to d do write(aw[i]:9:5); writeln;
- writeln('Std errors of coefficients:');
- for i:=1 to d do write(dyf*sqrt(xinv[d*pred(i)+i]):9:5); writeln;
- writeln('Std Deviations σ(X):');
- for i:=1 to d do write(sqrt(xxvar[pred(i)*d+i]):9:5); writeln;
- writeln('Proportion of Y''s variance attributable to:');
- for i:=1 to d do write(xxvar[pred(i)*d+i]*sqr(aw[i])/yvar:9:5); writeln;
- end;
- end;
-
-
- procedure Parse(xstr :string);
- var i,j,k,ta :integer;
- substr :string[80];
- v :float;
- begin
- for i:=succ(length(xstr)) to maxstr do xstr[i]:=' ';
- i:=0; k:=1;
- while (i<length(xstr)) do begin
- substr:=''; j:=1;
- repeat inc(i) until (xstr[i] in ['0'..'9']);
- repeat substr[j]:=xstr[i]; inc(i); inc(j); until not (xstr[i] in ['0'..'9']);
- substr[0]:=char(pred(j));
- val(substr,v,ta);
- xcol[k]:=round(v); inc(k);
- end;
- d:=pred(k);
- write('There are ',d,' variables in cols ');
- for k:=1 to d do write(xcol[k],' ');
- end;
-
- procedure Input;
- var ki :char;
- xstr :string[80];
- i :integer;
- ok :boolean;
- begin
- clrscr;
- writeln('ASCII FILE -> Regression',crlf);
- {$I-} repeat
- write('What is the name of the file containing the data? ');
- readln(filename); assign(f,filename); reset(f);
- until (ioresult=0);
- {$I+} write('Total number of ');
- textattr:=112; write('columns'); textattr:=7;
- write(' in file: '); readln(cols);
- repeat
- write('Which column contains the ');
- textattr:=112; write('dependent'); textattr:=7;
- write(' variable? '); readln(ycol);
- ok:=((ycol<=cols) and (ycol>0));
- until ok;
- repeat
- write('Which columns contain the ');
- textattr:=112; write('independent'); textattr:=7;
- write(' variables? '); readln(xstr);
- Parse(xstr);
- ok:=true;
- for i:=1 to d do ok:=(ok and ((xcol[i]<=cols) and (xcol[i]>0)));
- if ok then begin
- write(crlf,' Correct? '); readln(ki);
- end
- else begin ki:='N'; writeln; end;
- until upcase(ki) in ['Y','y'];
- textattr:=112; write('H'); textattr:=7;
- write('orizontal or ');
- textattr:=112; write('V'); textattr:=7;
- write('ertical output form? ');
- repeat ki:=upcase(readkey) until ki in ['H','V']; writeln(ki);
- if (ki='V') then vertical:=true else vertical:=false;
- {window(1,8,80,25); clrscr; window(1,1,80,25); gotoxy(1,8);}
- write('Computing...');
- end;
-
- begin
- Input;
- Regression;
- Report;
- end.