home *** CD-ROM | disk | FTP | other *** search
- /* ------------------------------ fftg.c ------------------------------------ */
- /* */
- /* Author: Eyal Lebedinsky */
- /* Date: May 1990 */
- /* Version: 9 June 1991 */
- /* */
- /* Program generator for integer DFFT. This program was developed for a very */
- /* specific need and no attempt was made to make it general. Still, the basic */
- /* algorithm is a standard one from the literature. For maximum performance */
- /* of the integer calculation a special mechanism was employed to track shift */
- /* operations that could be delayed. Coefficients are kept as 16 bits with 1 */
- /* sign bit, 1 integer bit and 14 fractional bits. So a typicall multiply */
- /* will do 16bit*16bits=32bits, then shift down one bit and use the HIGH 16 */
- /* bits as the result. Before adding two numbers they must be scaled down one */
- /* bit to avoid overflow. Adding more numbers calls for more scaling. The */
- /* delayed scaling allows the intermediate array to hold numbers that are */
- /* scaled to a varying degree until a clash occurs. Well, this space is too */
- /* short to elaborate any further and if you did similar kind of work then */
- /* you should know what I am talking about. */
- /* */
- /* usage: fftgc File Size EpName */
- /* File - output file name. */
- /* Size - sample order: 8 for 256 points, 10 for 1024... */
- /* EpName - name of the generated function ('_' added) */
- /* Now assemble the output file and then use as: */
- /* */
- /* #define M 8 */
- /* #define N (1 << M) */
- /* short x[N+1]; [input/output array] */
- /* short qf[(N/2)+1]; [power spectrum] */
- /* extern void far fft () */
- /* */
- /* [then call it as: fft();] */
- /* */
- /* This is a modified version of Sorensen et al, IEEE ASSP-35 no 6. */
- /* */
- /* - uses 3+3 complex mult. */
- /* - no pre scrambling. descrambling done when power spectrum calculated. */
- /* - moved to 0-origin as proper for c. */
- /* - trig values in table. */
- /* - the case for sin==cos treated separately. */
- /* - uses delayed scaling down. */
- /* - does length 256 fft with 642 mults. */
- /* */
- /* */
- /* This program is released into the public domain. */
- /* */
- /*----------------------------------------------------------------------------*/
-
- #include <stdio.h>
- #include <math.h>
-
- #define SC15 32768 /* scaling factor */
-
- static short mm[1025]; /* fp(16, 0) */
- static short ad[1025]; /* fp(16, 0) */
- static int cntx[9];
-
- static short x[1025]; /* fp(16,0) */
- static short qf[513]; /* fp(16,0) */
-
- static char *file_name, *ep_name;
-
- extern void start_fft (), end_fft ();
- extern void fft1 (), fft2 (), fft3 (), fft4 (), fft5 (), fft7 (), fft8 ();
-
- static int mad; /* maximum adjust at this level */
-
- static void
- smad (i, j) /* set proper adjust */
- int i, j;
- {
- while (ad[i] < (mad-j)) {
- fprintf (stderr, "x[%u] >>= %d (%d-%d)\n", i, 1, mad, j);
- fft1 (mm[i]);
- ++ad[i];
- cntx[0] += 1;
- cntx[1] += 1;
- cntx[2] += 1;
- cntx[5] += 1;
- }
- }
-
- main (argc, argv)
- int argc;
- char *argv[];
- {
- int n, m, j, i, k, is, id, n1, n2, n4, n8, ind,
- i0, i1, i2, i3, i4, i5, i6, i7, i8,
- cc1, ss1, cc3, ss3;
- float e, a, a3;
-
- if (argc < 4) {
- printf ("usage: fftg File PowerOf2 EpName [stderrFile]\n");
- exit (0);
- }
-
- file_name = argv[1];
- sscanf (argv[2], "%u", &m);
- ep_name = argv[3];
- if (argc > 4)
- freopen (argv[4], "wt", stderr);
- start_fft (file_name, ep_name);
-
- /* ---------- Initialise ------------------------------------------------ */
-
- n = 1 << m;
-
- for (i = 0; i < 9; ++i)
- cntx[i] = 0;
-
- /* ---------- Digit reverse counter -------------------------------------- */
-
- for (i = 1; i <= n; ++i) { /* '-1' for shifting to 0-origin */
- mm[i] = i-1;
- ad[i] = 0;
- }
-
- j = 1;
- n1 = n - 1;
- for (i = 1; i <= n1; ++i) {
- if (i < j) {
- mm[i] = j-1; /* '-1' for shifting to 0-origin */
- mm[j] = i-1; /* '-1' for shifting to 0-origin */
- }
-
- k = n / 2;
-
- while (k < j) {
- j = j - k;
- k = k / 2;
- }
-
- j = j + k;
- }
-
- /* ---------- Length two butterflies ------------------------------------- */
-
- mad = 0;
-
- is = 1;
- id = 4;
-
- do {
- for (i0 = is; i0 <= n; i0 += id) {
- i1 = i0 + 1;
-
- smad (i0, 0);
- smad (i1, 0);
- fft2 (mm[i0], mm[i1]);
- ++ad[i0];
- ++ad[i1];
- cntx[0] += 2;
- cntx[1] += 2;
- cntx[2] += 2;
- cntx[3] += 2;
- }
- is = 2 * id - 1;
- id = 4 * id;
- } while (is < n);
-
- /* ---------- L shaped butterflies --------------------------------------- */
-
- n2 = 2;
- for (k = 2; k <= m; ++k) {
- ++mad;
- n2 = n2 * 2;
- n4 = n2 / 4;
- n8 = n2 / 8;
- e = 6.2831853071719586 / n2;
- is = 0;
- id = n2 * 2;
-
- do {
- cc1 = (SC15/2) * sqrt (0.5);
- for (i = is; i <= n - 1; i += id) {
- i1 = i + 1;
- i2 = i1 + n4;
- i3 = i2 + n4;
- i4 = i3 + n4;
-
- smad (i1, 0);
- smad (i3, 1);
- smad (i4, 1);
- fft3 (mm[i1], mm[i3], mm[i4]);
- ad[i1] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- cntx[0] += 3;
- cntx[1] += 3;
- cntx[2] += 5;
- cntx[3] += 4;
-
- if (n4 != 1) {
- i1 = i1 + n8;
- i2 = i2 + n8;
- i3 = i3 + n8;
- i4 = i4 + n8;
-
- smad (i1, 1);
- smad (i2, 0);
- smad (i3, 1);
- smad (i4, 1);
- fft4 (mm[i1], mm[i2], mm[i3], mm[i4], cc1);
- ad[i1] += 2;
- ad[i2] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- cntx[0] += 4;
- cntx[1] += 4;
- cntx[2] += 2;
- cntx[3] += 6;
- cntx[4] += 2;
- }
- }
- is = 2 * id - n2;
- id = 4 * id;
- } while (is < n);
-
- a = e;
- for (j = 2; j <= n8; ++j) {
- a3 = 3 * a;
- cc1 = (SC15/2) * cos (a);
- ss1 = (SC15/2) * sin (a);
- cc3 = (SC15/2) * cos (a3);
- ss3 = (SC15/2) * sin (a3);
- /* a = j * e;*/
- is = 0;
- id = 2 * n2;
-
- do {
- for (i = is; i <= n - 1; i += id) {
- i1 = i + j;
- i2 = i1 + n4;
- i3 = i2 + n4;
- i4 = i3 + n4;
- i5 = i + n4 - j + 2;
- i6 = i5 + n4;
- i7 = i6 + n4;
- i8 = i7 + n4;
-
- smad (i1, 0);
- smad (i2, 0);
- smad (i3, 2);
- smad (i4, 2);
- smad (i5, 0);
- smad (i6, 0);
- smad (i7, 1);
- smad (i8, 1);
- if (ind = (ad[i3] < (mad-1))) {
- ++ad[i3];
- ++ad[i4];
- cntx[2] += 1;
- }
- fft5 (mm[i1], mm[i2], mm[i3], mm[i4],
- mm[i5], mm[i6], mm[i7], mm[i8],
- ss1-cc1, -ss1-cc1, cc1,
- ss3-cc3, -ss3-cc3, cc3, ind);
- ad[i1] += 1;
- ad[i2] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- ad[i5] += 1;
- ad[i6] += 1;
- ad[i7] += 2;
- ad[i8] += 2;
- cntx[0] += 8;
- cntx[1] += 8;
- cntx[2] += 4;
- cntx[3] += 18;
- cntx[4] += 6;
- }
- is = 2 * id - n2;
- id = 4 * id;
- } while (is < n);
- a += e;
- }
- }
-
- ++mad;
- for (i = 1; i <= n; ++i)
- smad (i, 0);
-
- fft7 (0, mm[1]);
-
- for (i = 1; i < n/2; ++i)
- fft8 (i, mm[i+1], mm[n-i+1]);
-
- fft7 ((n/2), mm[n/2+1]);
-
- end_fft (file_name, ep_name);
-
- for (i = 0; i < 9; ++i)
- fprintf (stderr, " %5u", i);
- fprintf (stderr, "\n Load Store Shift Add Mult Adj\n");
- for (i = 0; i < 9; ++i)
- fprintf (stderr, " %5u", cntx[i]);
- fprintf (stderr, "\n");
-
- exit (0);
- }
-