home *** CD-ROM | disk | FTP | other *** search
Text File | 1992-05-09 | 29.4 KB | 1,088 lines |
- Newsgroups: comp.sources.unix
- From: dbell@pdact.pd.necisa.oz.au (David I. Bell)
- Subject: v26i037: CALC - An arbitrary precision C-like calculator, Part11/21
- Sender: unix-sources-moderator@pa.dec.com
- Approved: vixie@pa.dec.com
-
- Submitted-By: dbell@pdact.pd.necisa.oz.au (David I. Bell)
- Posting-Number: Volume 26, Issue 37
- Archive-Name: calc/part11
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of archive 11 (of 21)."
- # Contents: zmul.c
- # Wrapped by dbell@elm on Tue Feb 25 15:21:09 1992
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'zmul.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'zmul.c'\"
- else
- echo shar: Extracting \"'zmul.c'\" \(27389 characters\)
- sed "s/^X//" >'zmul.c' <<'END_OF_FILE'
- X/*
- X * Copyright (c) 1992 David I. Bell
- X * Permission is granted to use, distribute, or modify this source,
- X * provided that this copyright notice remains intact.
- X *
- X * Faster than usual multiplying and squaring routines.
- X * The algorithm used is the reasonably simple one from Knuth, volume 2,
- X * section 4.3.3. These recursive routines are of speed O(N^1.585)
- X * instead of O(N^2). The usual multiplication and (almost usual) squaring
- X * algorithms are used for small numbers. On a 386 with its compiler, the
- X * two algorithms are equal in speed at about 100 decimal digits.
- X */
- X
- X#include <stdio.h>
- X#include "math.h"
- X
- X
- XLEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */
- XLEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */
- X
- X
- X#if 0
- Xstatic char *abortmsg = "Calculation aborted";
- Xstatic char *memmsg = "Not enough memory";
- X#endif
- X
- Xstatic HALF *tempbuf; /* temporary buffer for multiply and square */
- X
- Xstatic LEN domul proto((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));
- Xstatic LEN dosquare proto((HALF *vp, LEN size, HALF *ans));
- X
- X
- X/*
- X * Multiply two numbers using the following formula recursively:
- X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
- X * where S is a power of 2^16, and so multiplies by it are shifts, and
- X * A,B,C,D are the left and right halfs of the numbers to be multiplied.
- X */
- Xvoid
- Xzmul(z1, z2, res)
- X ZVALUE z1, z2; /* numbers to multiply */
- X ZVALUE *res; /* result of multiplication */
- X{
- X LEN len; /* size of array */
- X
- X if (iszero(z1) || iszero(z2)) {
- X *res = _zero_;
- X return;
- X }
- X if (isunit(z1)) {
- X zcopy(z2, res);
- X res->sign = (z1.sign != z2.sign);
- X return;
- X }
- X if (isunit(z2)) {
- X zcopy(z1, res);
- X res->sign = (z1.sign != z2.sign);
- X return;
- X }
- X
- X /*
- X * Allocate a temporary buffer for the recursion levels to use.
- X * An array needs to be allocated large enough for all of the
- X * temporary results to fit in. This size is about twice the size
- X * of the largest original number, since each recursion level uses
- X * the size of its given number, and whose size is 1/2 the size of
- X * the previous level. The sum of the infinite series is 2.
- X * Add some extra words because of rounding when dividing by 2
- X * and also because of the extra word that each multiply needs.
- X */
- X len = z1.len;
- X if (len < z2.len)
- X len = z2.len;
- X len = 2 * len + 64;
- X tempbuf = zalloctemp(len);
- X
- X res->sign = (z1.sign != z2.sign);
- X res->v = alloc(z1.len + z2.len + 1);
- X res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
- X}
- X
- X
- X/*
- X * Recursive routine to multiply two numbers by splitting them up into
- X * two numbers of half the size, and using the results of multiplying the
- X * subpieces. The result is placed in the indicated array, which must be
- X * large enough for the result plus one extra word (size1 + size2 + 1).
- X * Returns the actual size of the result with leading zeroes stripped.
- X * This also uses a temporary array which must be twice as large as
- X * one more than the size of the number at the top level recursive call.
- X */
- Xstatic LEN
- Xdomul(v1, size1, v2, size2, ans)
- X HALF *v1; /* first number */
- X LEN size1; /* size of first number */
- X HALF *v2; /* second number */
- X LEN size2; /* size of second number */
- X HALF *ans; /* location for result */
- X{
- X LEN shift; /* amount numbers are shifted by */
- X LEN sizeA; /* size of left half of first number */
- X LEN sizeB; /* size of right half of first number */
- X LEN sizeC; /* size of left half of second number */
- X LEN sizeD; /* size of right half of second number */
- X LEN sizeAB; /* size of subtraction of A and B */
- X LEN sizeDC; /* size of subtraction of D and C */
- X LEN sizeABDC; /* size of product of above two results */
- X LEN subsize; /* size of difference of halfs */
- X LEN copysize; /* size of number left to copy */
- X LEN sizetotal; /* total size of product */
- X LEN len; /* temporary length */
- X HALF *baseA; /* base of left half of first number */
- X HALF *baseB; /* base of right half of first number */
- X HALF *baseC; /* base of left half of second number */
- X HALF *baseD; /* base of right half of second number */
- X HALF *baseAB; /* base of result of subtraction of A and B */
- X HALF *baseDC; /* base of result of subtraction of D and C */
- X HALF *baseABDC; /* base of product of above two results */
- X HALF *baseAC; /* base of product of A and C */
- X HALF *baseBD; /* base of product of B and D */
- X FULL carry; /* carry digit for small multiplications */
- X FULL carryACBD; /* carry from addition of A*C and B*D */
- X FULL digit; /* single digit multiplying by */
- X HALF *temp; /* base for temporary calculations */
- X BOOL neg; /* whether imtermediate term is negative */
- X register HALF *hd, *h1, *h2; /* for inner loops */
- X SIUNION sival; /* for addition of digits */
- X
- X /*
- X * Trim the numbers of leading zeroes and initialize the
- X * estimated size of the result.
- X */
- X hd = &v1[size1 - 1];
- X while ((*hd == 0) && (size1 > 1)) {
- X hd--;
- X size1--;
- X }
- X hd = &v2[size2 - 1];
- X while ((*hd == 0) && (size2 > 1)) {
- X hd--;
- X size2--;
- X }
- X sizetotal = size1 + size2;
- X
- X /*
- X * First check for zero answer.
- X */
- X if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
- X *ans = 0;
- X return 1;
- X }
- X
- X /*
- X * Exchange the two numbers if necessary to make the number of
- X * digits of the first number be greater than or equal to the
- X * second number.
- X */
- X if (size1 < size2) {
- X len = size1; size1 = size2; size2 = len;
- X hd = v1; v1 = v2; v2 = hd;
- X }
- X
- X /*
- X * If the smaller number has only a few digits, then calculate
- X * the result in the normal manner in order to avoid the overhead
- X * of the recursion for small numbers. The number of digits where
- X * the algorithm changes is settable from 2 to maxint.
- X */
- X if (size2 < _mul2_) {
- X /*
- X * First clear the top part of the result, and then multiply
- X * by the lowest digit to get the first partial sum. Later
- X * products will then add into this result.
- X */
- X hd = &ans[size1];
- X len = size2;
- X while (len--)
- X *hd++ = 0;
- X
- X digit = *v2++;
- X h1 = v1;
- X hd = ans;
- X carry = 0;
- X len = size1;
- X while (len >= 4) { /* expand the loop some */
- X len -= 4;
- X sival.ivalue = ((FULL) *h1++) * digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) * digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X *hd = (HALF)carry;
- X
- X /*
- X * Now multiply by the remaining digits of the second number,
- X * adding each product into the final result.
- X */
- X h2 = ans;
- X while (--size2 > 0) {
- X digit = *v2++;
- X h1 = v1;
- X hd = ++h2;
- X if (digit == 0)
- X continue;
- X carry = 0;
- X len = size1;
- X while (len >= 4) { /* expand the loop some */
- X len -= 4;
- X sival.ivalue = ((FULL) *h1++) * digit
- X + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit
- X + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit
- X + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *h1++) * digit
- X + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) * digit
- X + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X }
- X
- X /*
- X * Now return the true size of the number.
- X */
- X len = sizetotal;
- X hd = &ans[len - 1];
- X while ((*hd == 0) && (len > 1)) {
- X hd--;
- X len--;
- X }
- X return len;
- X }
- X
- X /*
- X * Need to multiply by a large number.
- X * Allocate temporary space for calculations, and calculate the
- X * value for the shift. The shift value is 1/2 the size of the
- X * larger (first) number (rounded up). The amount of temporary
- X * space needed is twice the size of the shift, plus one more word
- X * for the multiply to use.
- X */
- X shift = (size1 + 1) / 2;
- X temp = tempbuf;
- X tempbuf += (2 * shift) + 1;
- X
- X /*
- X * Determine the sizes and locations of all the numbers.
- X * The value of sizeC can be negative, and this is checked later.
- X * The value of sizeD is limited by the full size of the number.
- X */
- X baseA = v1 + shift;
- X baseB = v1;
- X baseC = v2 + shift;
- X baseD = v2;
- X baseAB = ans;
- X baseDC = ans + shift;
- X baseAC = ans + shift * 2;
- X baseBD = ans;
- X
- X sizeA = size1 - shift;
- X sizeC = size2 - shift;
- X
- X sizeB = shift;
- X hd = &baseB[shift - 1];
- X while ((*hd == 0) && (sizeB > 1)) {
- X hd--;
- X sizeB--;
- X }
- X
- X sizeD = shift;
- X if (sizeD > size2)
- X sizeD = size2;
- X hd = &baseD[sizeD - 1];
- X while ((*hd == 0) && (sizeD > 1)) {
- X hd--;
- X sizeD--;
- X }
- X
- X /*
- X * If the smaller number has a high half of zero, then calculate
- X * the result by breaking up the first number into two numbers
- X * and combining the results using the obvious formula:
- X * (A*S+B) * D = (A*D)*S + B*D
- X */
- X if (sizeC <= 0) {
- X len = domul(baseB, sizeB, baseD, sizeD, ans);
- X hd = &ans[len];
- X len = sizetotal - len;
- X while (len--)
- X *hd++ = 0;
- X
- X /*
- X * Add the second number into the first number, shifted
- X * over at the correct position.
- X */
- X len = domul(baseA, sizeA, baseD, sizeD, temp);
- X h1 = temp;
- X hd = ans + shift;
- X carry = 0;
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Determine the final size of the number and return it.
- X */
- X len = sizetotal;
- X hd = &ans[len - 1];
- X while ((*hd == 0) && (len > 1)) {
- X hd--;
- X len--;
- X }
- X tempbuf = temp;
- X return len;
- X }
- X
- X /*
- X * Now we know that the high halfs of the numbers are nonzero,
- X * so we can use the complete formula.
- X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
- X * The steps are done in the following order:
- X * A-B
- X * D-C
- X * (A-B)*(D-C)
- X * S^2*A*C + B*D
- X * (S^2+S)*A*C + (S+1)*B*D (*)
- X * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
- X *
- X * Note: step (*) above can produce a result which is larger than
- X * the final product will be, and this is where the extra word
- X * needed in the product comes from. After the final subtraction is
- X * done, the result fits in the expected size. Using the extra word
- X * is easier than suppressing the carries and borrows everywhere.
- X *
- X * Begin by forming the product (A-B)*(D-C) into a temporary
- X * location that we save until the final step. Do each subtraction
- X * at positions 0 and S. Be very careful about the relative sizes
- X * of the numbers since this result can be negative. For the first
- X * step calculate the absolute difference of A and B into a temporary
- X * location at position 0 of the result. Negate the sign if A is
- X * smaller than B.
- X */
- X neg = FALSE;
- X if (sizeA == sizeB) {
- X len = sizeA;
- X h1 = &baseA[len - 1];
- X h2 = &baseB[len - 1];
- X while ((len > 1) && (*h1 == *h2)) {
- X len--;
- X h1--;
- X h2--;
- X }
- X }
- X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
- X {
- X h1 = baseA;
- X h2 = baseB;
- X sizeAB = sizeA;
- X subsize = sizeB;
- X } else {
- X neg = !neg;
- X h1 = baseB;
- X h2 = baseA;
- X sizeAB = sizeB;
- X subsize = sizeA;
- X }
- X copysize = sizeAB - subsize;
- X
- X hd = baseAB;
- X carry = 0;
- X while (subsize--) {
- X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X while (copysize--) {
- X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X hd = &baseAB[sizeAB - 1];
- X while ((*hd == 0) && (sizeAB > 1)) {
- X hd--;
- X sizeAB--;
- X }
- X
- X /*
- X * This completes the calculation of abs(A-B). For the next step
- X * calculate the absolute difference of D and C into a temporary
- X * location at position S of the result. Negate the sign if C is
- X * larger than D.
- X */
- X if (sizeC == sizeD) {
- X len = sizeC;
- X h1 = &baseC[len - 1];
- X h2 = &baseD[len - 1];
- X while ((len > 1) && (*h1 == *h2)) {
- X len--;
- X h1--;
- X h2--;
- X }
- X }
- X if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2)))
- X {
- X neg = !neg;
- X h1 = baseC;
- X h2 = baseD;
- X sizeDC = sizeC;
- X subsize = sizeD;
- X } else {
- X h1 = baseD;
- X h2 = baseC;
- X sizeDC = sizeD;
- X subsize = sizeC;
- X }
- X copysize = sizeDC - subsize;
- X
- X hd = baseDC;
- X carry = 0;
- X while (subsize--) {
- X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X while (copysize--) {
- X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X hd = &baseDC[sizeDC - 1];
- X while ((*hd == 0) && (sizeDC > 1)) {
- X hd--;
- X sizeDC--;
- X }
- X
- X /*
- X * This completes the calculation of abs(D-C). Now multiply
- X * together abs(A-B) and abs(D-C) into a temporary location,
- X * which is preserved until the final steps.
- X */
- X baseABDC = temp;
- X sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);
- X
- X /*
- X * Now calculate B*D and A*C into one of their two final locations.
- X * Make sure the high order digits of the products are zeroed since
- X * this initializes the final result. Be careful about this zeroing
- X * since the size of the high order words might be smaller than
- X * the shift size. Do B*D first since the multiplies use one more
- X * word than the size of the product. Also zero the final extra
- X * word in the result for possible carries to use.
- X */
- X len = domul(baseB, sizeB, baseD, sizeD, baseBD);
- X hd = &baseBD[len];
- X len = shift * 2 - len;
- X while (len--)
- X *hd++ = 0;
- X
- X len = domul(baseA, sizeA, baseC, sizeC, baseAC);
- X hd = &baseAC[len];
- X len = sizetotal - shift * 2 - len + 1;
- X while (len--)
- X *hd++ = 0;
- X
- X /*
- X * Now add in A*C and B*D into themselves at the other shifted
- X * position that they need. This addition is tricky in order to
- X * make sure that the two additions cannot interfere with each other.
- X * Therefore we first add in the top half of B*D and the lower half
- X * of A*C. The sources and destinations of these two additions
- X * overlap, and so the same answer results from the two additions,
- X * thus only two pointers suffice for both additions. Keep the
- X * final carry from these additions for later use since we cannot
- X * afford to change the top half of A*C yet.
- X */
- X h1 = baseBD + shift;
- X h2 = baseAC;
- X carryACBD = 0;
- X len = shift;
- X while (len--) {
- X sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
- X *h1++ = sival.silow;
- X *h2++ = sival.silow;
- X carryACBD = sival.sihigh;
- X }
- X
- X /*
- X * Now add in the bottom half of B*D and the top half of A*C.
- X * These additions are straightforward, except that A*C should
- X * be done first because of possible carries from B*D, and the
- X * top half of A*C might not exist. Add in one of the carries
- X * from the previous addition while we are at it.
- X */
- X h1 = baseAC + shift;
- X hd = baseAC;
- X carry = carryACBD;
- X len = sizetotal - 3 * shift;
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X h1 = baseBD;
- X hd = baseBD + shift;
- X carry = 0;
- X len = shift;
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Now finally add in the other delayed carry from the
- X * above addition.
- X */
- X hd = baseAC + shift;
- X while (carryACBD) {
- X sival.ivalue = ((FULL) *hd) + carryACBD;
- X *hd++ = sival.silow;
- X carryACBD = sival.sihigh;
- X }
- X
- X /*
- X * Now finally add or subtract (A-B)*(D-C) into the final result at
- X * the correct position (S), according to whether it is positive or
- X * negative. When subtracting, the answer cannot go negative.
- X */
- X h1 = baseABDC;
- X hd = ans + shift;
- X carry = 0;
- X len = sizeABDC;
- X if (neg) {
- X while (len--) {
- X sival.ivalue = BASE1 - ((FULL) *hd) +
- X ((FULL) *h1++) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = BASE1 - ((FULL) *hd) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X } else {
- X while (len--) {
- X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X }
- X
- X /*
- X * Finally determine the size of the final result and return that.
- X */
- X len = sizetotal;
- X hd = &ans[len - 1];
- X while ((*hd == 0) && (len > 1)) {
- X hd--;
- X len--;
- X }
- X tempbuf = temp;
- X return len;
- X}
- X
- X
- X/*
- X * Square a number by using the following formula recursively:
- X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
- X * where S is a power of 2^16, and so multiplies by it are shifts,
- X * and A and B are the left and right halfs of the number to square.
- X */
- Xvoid
- Xzsquare(z, res)
- X ZVALUE z, *res;
- X{
- X LEN len;
- X
- X if (iszero(z)) {
- X *res = _zero_;
- X return;
- X }
- X if (isunit(z)) {
- X *res = _one_;
- X return;
- X }
- X
- X /*
- X * Allocate a temporary array if necessary for the recursion to use.
- X * The array needs to be allocated large enough for all of the
- X * temporary results to fit in. This size is about 3 times the
- X * size of the original number, since each recursion level uses 3/2
- X * of the size of its given number, and whose size is 1/2 the size
- X * of the previous level. The sum of the infinite series is 3.
- X * Allocate some extra words for rounding up the sizes.
- X */
- X len = 3 * z.len + 32;
- X tempbuf = zalloctemp(len);
- X
- X res->sign = 0;
- X res->v = alloc(z.len * 2);
- X res->len = dosquare(z.v, z.len, res->v);
- X}
- X
- X
- X/*
- X * Recursive routine to square a number by splitting it up into two numbers
- X * of half the size, and using the results of squaring the subpieces.
- X * The result is placed in the indicated array, which must be large
- X * enough for the result (size * 2). Returns the size of the result.
- X * This uses a temporary array which must be 3 times as large as the
- X * size of the number at the top level recursive call.
- X */
- Xstatic LEN
- Xdosquare(vp, size, ans)
- X HALF *vp; /* value to be squared */
- X LEN size; /* length of value to square */
- X HALF *ans; /* location for result */
- X{
- X LEN shift; /* amount numbers are shifted by */
- X LEN sizeA; /* size of left half of number to square */
- X LEN sizeB; /* size of right half of number to square */
- X LEN sizeAA; /* size of square of left half */
- X LEN sizeBB; /* size of square of right half */
- X LEN sizeAABB; /* size of sum of squares of A and B */
- X LEN sizeAB; /* size of difference of A and B */
- X LEN sizeABAB; /* size of square of difference of A and B */
- X LEN subsize; /* size of difference of halfs */
- X LEN copysize; /* size of number left to copy */
- X LEN sumsize; /* size of sum */
- X LEN sizetotal; /* total size of square */
- X LEN len; /* temporary length */
- X LEN len1; /* another temporary length */
- X FULL carry; /* carry digit for small multiplications */
- X FULL digit; /* single digit multiplying by */
- X HALF *temp; /* base for temporary calculations */
- X HALF *baseA; /* base of left half of number */
- X HALF *baseB; /* base of right half of number */
- X HALF *baseAA; /* base of square of left half of number */
- X HALF *baseBB; /* base of square of right half of number */
- X HALF *baseAABB; /* base of sum of squares of A and B */
- X HALF *baseAB; /* base of difference of A and B */
- X HALF *baseABAB; /* base of square of difference of A and B */
- X register HALF *hd, *h1, *h2, *h3; /* for inner loops */
- X SIUNION sival; /* for addition of digits */
- X
- X /*
- X * First trim the number of leading zeroes.
- X */
- X hd = &vp[size - 1];
- X while ((*hd == 0) && (size > 1)) {
- X size--;
- X hd--;
- X }
- X sizetotal = size + size;
- X
- X /*
- X * If the number has only a small number of digits, then use the
- X * (almost) normal multiplication method. Multiply each halfword
- X * only by those halfwards further on in the number. Missed terms
- X * will then be the same pairs of products repeated, and the squares
- X * of each halfword. The first case is handled by doubling the
- X * result. The second case is handled explicitly. The number of
- X * digits where the algorithm changes is settable from 2 to maxint.
- X */
- X if (size < _sq2_) {
- X hd = ans;
- X len = sizetotal;
- X while (len--)
- X *hd++ = 0;
- X
- X h2 = vp;
- X hd = ans + 1;
- X for (len = size; len--; hd += 2) {
- X digit = (FULL) *h2++;
- X if (digit == 0)
- X continue;
- X h3 = h2;
- X h1 = hd;
- X carry = 0;
- X len1 = len;
- X while (len1 >= 4) { /* expand the loop some */
- X len1 -= 4;
- X sival.ivalue = (digit * ((FULL) *h3++))
- X + ((FULL) *h1) + carry;
- X *h1++ = sival.silow;
- X sival.ivalue = (digit * ((FULL) *h3++))
- X + ((FULL) *h1) + ((FULL) sival.sihigh);
- X *h1++ = sival.silow;
- X sival.ivalue = (digit * ((FULL) *h3++))
- X + ((FULL) *h1) + ((FULL) sival.sihigh);
- X *h1++ = sival.silow;
- X sival.ivalue = (digit * ((FULL) *h3++))
- X + ((FULL) *h1) + ((FULL) sival.sihigh);
- X *h1++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (len1--) {
- X sival.ivalue = (digit * ((FULL) *h3++))
- X + ((FULL) *h1) + carry;
- X *h1++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *h1) + carry;
- X *h1++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X }
- X
- X /*
- X * Now double the result.
- X * There is no final carry to worry about because we
- X * handle all digits of the result which must fit.
- X */
- X carry = 0;
- X hd = ans;
- X len = sizetotal;
- X while (len--) {
- X digit = ((FULL) *hd);
- X sival.ivalue = digit + digit + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Now add in the squares of each halfword.
- X */
- X carry = 0;
- X hd = ans;
- X h3 = vp;
- X len = size;
- X while (len--) {
- X digit = ((FULL) *h3++);
- X sival.ivalue = digit * digit + ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Finally return the size of the result.
- X */
- X len = sizetotal;
- X hd = &ans[len - 1];
- X while ((*hd == 0) && (len > 1)) {
- X len--;
- X hd--;
- X }
- X return len;
- X }
- X
- X /*
- X * The number to be squared is large.
- X * Allocate temporary space and determine the sizes and
- X * positions of the values to be calculated.
- X */
- X temp = tempbuf;
- X tempbuf += (3 * (size + 1) / 2);
- X
- X sizeA = size / 2;
- X sizeB = size - sizeA;
- X shift = sizeB;
- X baseA = vp + sizeB;
- X baseB = vp;
- X baseAA = &ans[shift * 2];
- X baseBB = ans;
- X baseAABB = temp;
- X baseAB = temp;
- X baseABAB = &temp[shift];
- X
- X /*
- X * Trim the second number of leading zeroes.
- X */
- X hd = &baseB[sizeB - 1];
- X while ((*hd == 0) && (sizeB > 1)) {
- X sizeB--;
- X hd--;
- X }
- X
- X /*
- X * Now to proceed to calculate the result using the formula.
- X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
- X * The steps are done in the following order:
- X * S^2*A^2 + B^2
- X * A^2 + B^2
- X * (S^2+S)*A^2 + (S+1)*B^2
- X * (A-B)^2
- X * (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
- X *
- X * Begin by forming the squares of two the halfs concatenated
- X * together in the final result location. Make sure that the
- X * highest words of the results are zero.
- X */
- X sizeBB = dosquare(baseB, sizeB, baseBB);
- X hd = &baseBB[sizeBB];
- X len = shift * 2 - sizeBB;
- X while (len--)
- X *hd++ = 0;
- X
- X sizeAA = dosquare(baseA, sizeA, baseAA);
- X hd = &baseAA[sizeAA];
- X len = sizetotal - shift * 2 - sizeAA;
- X while (len--)
- X *hd++ = 0;
- X
- X /*
- X * Sum the two squares into a temporary location.
- X */
- X if (sizeAA >= sizeBB) {
- X h1 = baseAA;
- X h2 = baseBB;
- X sizeAABB = sizeAA;
- X sumsize = sizeBB;
- X } else {
- X h1 = baseBB;
- X h2 = baseAA;
- X sizeAABB = sizeBB;
- X sumsize = sizeAA;
- X }
- X copysize = sizeAABB - sumsize;
- X
- X hd = baseAABB;
- X carry = 0;
- X while (sumsize--) {
- X sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (copysize--) {
- X sival.ivalue = ((FULL) *h1++) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X if (carry) {
- X *hd = (HALF)carry;
- X sizeAABB++;
- X }
- X
- X /*
- X * Add the sum back into the previously calculated squares
- X * shifted over to the proper location.
- X */
- X h1 = baseAABB;
- X hd = ans + shift;
- X carry = 0;
- X len = sizeAABB;
- X while (len--) {
- X sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = ((FULL) *hd) + carry;
- X *hd++ = sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Calculate the absolute value of the difference of the two halfs
- X * into a temporary location.
- X */
- X if (sizeA == sizeB) {
- X len = sizeA;
- X h1 = &baseA[len - 1];
- X h2 = &baseB[len - 1];
- X while ((len > 1) && (*h1 == *h2)) {
- X len--;
- X h1--;
- X h2--;
- X }
- X }
- X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2)))
- X {
- X h1 = baseA;
- X h2 = baseB;
- X sizeAB = sizeA;
- X subsize = sizeB;
- X } else {
- X h1 = baseB;
- X h2 = baseA;
- X sizeAB = sizeB;
- X subsize = sizeA;
- X }
- X copysize = sizeAB - subsize;
- X
- X hd = baseAB;
- X carry = 0;
- X while (subsize--) {
- X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X while (copysize--) {
- X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X hd = &baseAB[sizeAB - 1];
- X while ((*hd == 0) && (sizeAB > 1)) {
- X sizeAB--;
- X hd--;
- X }
- X
- X /*
- X * Now square the number into another temporary location,
- X * and subtract that from the final result.
- X */
- X sizeABAB = dosquare(baseAB, sizeAB, baseABAB);
- X
- X h1 = baseABAB;
- X hd = ans + shift;
- X carry = 0;
- X while (sizeABAB--) {
- X sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X while (carry) {
- X sival.ivalue = BASE1 - ((FULL) *hd) + carry;
- X *hd++ = BASE1 - sival.silow;
- X carry = sival.sihigh;
- X }
- X
- X /*
- X * Return the size of the result.
- X */
- X len = sizetotal;
- X hd = &ans[len - 1];
- X while ((*hd == 0) && (len > 1)) {
- X len--;
- X hd--;
- X }
- X tempbuf = temp;
- X return len;
- X}
- X
- X
- X/*
- X * Return a pointer to a buffer to be used for holding a temporary number.
- X * The buffer will be at least as large as the specified number of HALFs,
- X * and remains valid until the next call to this routine. The buffer cannot
- X * be freed by the caller. There is only one temporary buffer, and so as to
- X * avoid possible conflicts this is only used by the lowest level routines
- X * such as divide, multiply, and square.
- X */
- XHALF *
- Xzalloctemp(len)
- X LEN len; /* required number of HALFs in buffer */
- X{
- X HALF *hp;
- X static LEN buflen; /* current length of temp buffer */
- X static HALF *bufptr; /* pointer to current temp buffer */
- X
- X if (len <= buflen)
- X return bufptr;
- X
- X /*
- X * We need to grow the temporary buffer.
- X * First free any existing buffer, and then allocate the new one.
- X * While we are at it, make the new buffer bigger than necessary
- X * in order to reduce the number of reallocations.
- X */
- X len += 100;
- X if (buflen) {
- X buflen = 0;
- X free(bufptr);
- X }
- X hp = (HALF *) malloc(len * sizeof(HALF));
- X if (hp == NULL)
- X error("No memory for temp buffer");
- X bufptr = hp;
- X buflen = len;
- X return hp;
- X}
- X
- X/* END CODE */
- END_OF_FILE
- if test 27389 -ne `wc -c <'zmul.c'`; then
- echo shar: \"'zmul.c'\" unpacked with wrong size!
- fi
- # end of 'zmul.c'
- fi
- echo shar: End of archive 11 \(of 21\).
- cp /dev/null ark11isdone
- MISSING=""
- for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 21 archives.
- rm -f ark[1-9]isdone ark[1-9][0-9]isdone
- else
- echo You still need to unpack the following archives:
- echo " " ${MISSING}
- fi
- ## End of shell archive.
- exit 0
-