Source Code 1994 March

home *** CD-ROM | disk | FTP | other *** search

/ Source Code 1994 March / Source_Code_CD-ROM_Walnut_Creek_March_1994.iso / compsrcs / unix / volume26 / calc / part11 < prev next >

Wrap

Text File | 1992-05-09 | 29.4 KB | 1,088 lines

Newsgroups: comp.sources.unix From: dbell@pdact.pd.necisa.oz.au (David I. Bell) Subject: v26i037: CALC - An arbitrary precision C-like calculator, Part11/21 Sender: unix-sources-moderator@pa.dec.com Approved: vixie@pa.dec.com Submitted-By: dbell@pdact.pd.necisa.oz.au (David I. Bell) Posting-Number: Volume 26, Issue 37 Archive-Name: calc/part11 #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of archive 11 (of 21)." # Contents: zmul.c # Wrapped by dbell@elm on Tue Feb 25 15:21:09 1992 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'zmul.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'zmul.c'\" else echo shar: Extracting \"'zmul.c'\" $27389 characters$ sed "s/^X//" >'zmul.c' <<'END_OF_FILE' X/* X * Copyright (c) 1992 David I. Bell X * Permission is granted to use, distribute, or modify this source, X * provided that this copyright notice remains intact. X * X * Faster than usual multiplying and squaring routines. X * The algorithm used is the reasonably simple one from Knuth, volume 2, X * section 4.3.3. These recursive routines are of speed O(N^1.585) X * instead of O(N^2). The usual multiplication and (almost usual) squaring X * algorithms are used for small numbers. On a 386 with its compiler, the X * two algorithms are equal in speed at about 100 decimal digits. X */ X X#include <stdio.h> X#include "math.h" X X XLEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */ XLEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */ X X X#if 0 Xstatic char *abortmsg = "Calculation aborted"; Xstatic char *memmsg = "Not enough memory"; X#endif X Xstatic HALF *tempbuf; /* temporary buffer for multiply and square */ X Xstatic LEN domul proto((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans)); Xstatic LEN dosquare proto((HALF *vp, LEN size, HALF *ans)); X X X/* X * Multiply two numbers using the following formula recursively: X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D X * where S is a power of 2^16, and so multiplies by it are shifts, and X * A,B,C,D are the left and right halfs of the numbers to be multiplied. X */ Xvoid Xzmul(z1, z2, res) X ZVALUE z1, z2; /* numbers to multiply */ X ZVALUE *res; /* result of multiplication */ X{ X LEN len; /* size of array */ X X if (iszero(z1) || iszero(z2)) { X *res = _zero_; X return; X } X if (isunit(z1)) { X zcopy(z2, res); X res->sign = (z1.sign != z2.sign); X return; X } X if (isunit(z2)) { X zcopy(z1, res); X res->sign = (z1.sign != z2.sign); X return; X } X X /* X * Allocate a temporary buffer for the recursion levels to use. X * An array needs to be allocated large enough for all of the X * temporary results to fit in. This size is about twice the size X * of the largest original number, since each recursion level uses X * the size of its given number, and whose size is 1/2 the size of X * the previous level. The sum of the infinite series is 2. X * Add some extra words because of rounding when dividing by 2 X * and also because of the extra word that each multiply needs. X */ X len = z1.len; X if (len < z2.len) X len = z2.len; X len = 2 * len + 64; X tempbuf = zalloctemp(len); X X res->sign = (z1.sign != z2.sign); X res->v = alloc(z1.len + z2.len + 1); X res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v); X} X X X/* X * Recursive routine to multiply two numbers by splitting them up into X * two numbers of half the size, and using the results of multiplying the X * subpieces. The result is placed in the indicated array, which must be X * large enough for the result plus one extra word (size1 + size2 + 1). X * Returns the actual size of the result with leading zeroes stripped. X * This also uses a temporary array which must be twice as large as X * one more than the size of the number at the top level recursive call. X */ Xstatic LEN Xdomul(v1, size1, v2, size2, ans) X HALF *v1; /* first number */ X LEN size1; /* size of first number */ X HALF *v2; /* second number */ X LEN size2; /* size of second number */ X HALF *ans; /* location for result */ X{ X LEN shift; /* amount numbers are shifted by */ X LEN sizeA; /* size of left half of first number */ X LEN sizeB; /* size of right half of first number */ X LEN sizeC; /* size of left half of second number */ X LEN sizeD; /* size of right half of second number */ X LEN sizeAB; /* size of subtraction of A and B */ X LEN sizeDC; /* size of subtraction of D and C */ X LEN sizeABDC; /* size of product of above two results */ X LEN subsize; /* size of difference of halfs */ X LEN copysize; /* size of number left to copy */ X LEN sizetotal; /* total size of product */ X LEN len; /* temporary length */ X HALF *baseA; /* base of left half of first number */ X HALF *baseB; /* base of right half of first number */ X HALF *baseC; /* base of left half of second number */ X HALF *baseD; /* base of right half of second number */ X HALF *baseAB; /* base of result of subtraction of A and B */ X HALF *baseDC; /* base of result of subtraction of D and C */ X HALF *baseABDC; /* base of product of above two results */ X HALF *baseAC; /* base of product of A and C */ X HALF *baseBD; /* base of product of B and D */ X FULL carry; /* carry digit for small multiplications */ X FULL carryACBD; /* carry from addition of A*C and B*D */ X FULL digit; /* single digit multiplying by */ X HALF *temp; /* base for temporary calculations */ X BOOL neg; /* whether imtermediate term is negative */ X register HALF *hd, *h1, *h2; /* for inner loops */ X SIUNION sival; /* for addition of digits */ X X /* X * Trim the numbers of leading zeroes and initialize the X * estimated size of the result. X */ X hd = &v1[size1 - 1]; X while ((*hd == 0) && (size1 > 1)) { X hd--; X size1--; X } X hd = &v2[size2 - 1]; X while ((*hd == 0) && (size2 > 1)) { X hd--; X size2--; X } X sizetotal = size1 + size2; X X /* X * First check for zero answer. X */ X if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) { X *ans = 0; X return 1; X } X X /* X * Exchange the two numbers if necessary to make the number of X * digits of the first number be greater than or equal to the X * second number. X */ X if (size1 < size2) { X len = size1; size1 = size2; size2 = len; X hd = v1; v1 = v2; v2 = hd; X } X X /* X * If the smaller number has only a few digits, then calculate X * the result in the normal manner in order to avoid the overhead X * of the recursion for small numbers. The number of digits where X * the algorithm changes is settable from 2 to maxint. X */ X if (size2 < _mul2_) { X /* X * First clear the top part of the result, and then multiply X * by the lowest digit to get the first partial sum. Later X * products will then add into this result. X */ X hd = &ans[size1]; X len = size2; X while (len--) X *hd++ = 0; X X digit = *v2++; X h1 = v1; X hd = ans; X carry = 0; X len = size1; X while (len >= 4) { /* expand the loop some */ X len -= 4; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (len--) { X sival.ivalue = ((FULL) *h1++) * digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X *hd = (HALF)carry; X X /* X * Now multiply by the remaining digits of the second number, X * adding each product into the final result. X */ X h2 = ans; X while (--size2 > 0) { X digit = *v2++; X h1 = v1; X hd = ++h2; X if (digit == 0) X continue; X carry = 0; X len = size1; X while (len >= 4) { /* expand the loop some */ X len -= 4; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (len--) { X sival.ivalue = ((FULL) *h1++) * digit X + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X } X X /* X * Now return the true size of the number. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X return len; X } X X /* X * Need to multiply by a large number. X * Allocate temporary space for calculations, and calculate the X * value for the shift. The shift value is 1/2 the size of the X * larger (first) number (rounded up). The amount of temporary X * space needed is twice the size of the shift, plus one more word X * for the multiply to use. X */ X shift = (size1 + 1) / 2; X temp = tempbuf; X tempbuf += (2 * shift) + 1; X X /* X * Determine the sizes and locations of all the numbers. X * The value of sizeC can be negative, and this is checked later. X * The value of sizeD is limited by the full size of the number. X */ X baseA = v1 + shift; X baseB = v1; X baseC = v2 + shift; X baseD = v2; X baseAB = ans; X baseDC = ans + shift; X baseAC = ans + shift * 2; X baseBD = ans; X X sizeA = size1 - shift; X sizeC = size2 - shift; X X sizeB = shift; X hd = &baseB[shift - 1]; X while ((*hd == 0) && (sizeB > 1)) { X hd--; X sizeB--; X } X X sizeD = shift; X if (sizeD > size2) X sizeD = size2; X hd = &baseD[sizeD - 1]; X while ((*hd == 0) && (sizeD > 1)) { X hd--; X sizeD--; X } X X /* X * If the smaller number has a high half of zero, then calculate X * the result by breaking up the first number into two numbers X * and combining the results using the obvious formula: X * (A*S+B) * D = (A*D)*S + B*D X */ X if (sizeC <= 0) { X len = domul(baseB, sizeB, baseD, sizeD, ans); X hd = &ans[len]; X len = sizetotal - len; X while (len--) X *hd++ = 0; X X /* X * Add the second number into the first number, shifted X * over at the correct position. X */ X len = domul(baseA, sizeA, baseD, sizeD, temp); X h1 = temp; X hd = ans + shift; X carry = 0; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Determine the final size of the number and return it. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X tempbuf = temp; X return len; X } X X /* X * Now we know that the high halfs of the numbers are nonzero, X * so we can use the complete formula. X * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D. X * The steps are done in the following order: X * A-B X * D-C X * (A-B)*(D-C) X * S^2*A*C + B*D X * (S^2+S)*A*C + (S+1)*B*D (*) X * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D X * X * Note: step (*) above can produce a result which is larger than X * the final product will be, and this is where the extra word X * needed in the product comes from. After the final subtraction is X * done, the result fits in the expected size. Using the extra word X * is easier than suppressing the carries and borrows everywhere. X * X * Begin by forming the product (A-B)*(D-C) into a temporary X * location that we save until the final step. Do each subtraction X * at positions 0 and S. Be very careful about the relative sizes X * of the numbers since this result can be negative. For the first X * step calculate the absolute difference of A and B into a temporary X * location at position 0 of the result. Negate the sign if A is X * smaller than B. X */ X neg = FALSE; X if (sizeA == sizeB) { X len = sizeA; X h1 = &baseA[len - 1]; X h2 = &baseB[len - 1]; X while ((len > 1) && (*h1 == *h2)) { X len--; X h1--; X h2--; X } X } X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) X { X h1 = baseA; X h2 = baseB; X sizeAB = sizeA; X subsize = sizeB; X } else { X neg = !neg; X h1 = baseB; X h2 = baseA; X sizeAB = sizeB; X subsize = sizeA; X } X copysize = sizeAB - subsize; X X hd = baseAB; X carry = 0; X while (subsize--) { X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X X hd = &baseAB[sizeAB - 1]; X while ((*hd == 0) && (sizeAB > 1)) { X hd--; X sizeAB--; X } X X /* X * This completes the calculation of abs(A-B). For the next step X * calculate the absolute difference of D and C into a temporary X * location at position S of the result. Negate the sign if C is X * larger than D. X */ X if (sizeC == sizeD) { X len = sizeC; X h1 = &baseC[len - 1]; X h2 = &baseD[len - 1]; X while ((len > 1) && (*h1 == *h2)) { X len--; X h1--; X h2--; X } X } X if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) X { X neg = !neg; X h1 = baseC; X h2 = baseD; X sizeDC = sizeC; X subsize = sizeD; X } else { X h1 = baseD; X h2 = baseC; X sizeDC = sizeD; X subsize = sizeC; X } X copysize = sizeDC - subsize; X X hd = baseDC; X carry = 0; X while (subsize--) { X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X hd = &baseDC[sizeDC - 1]; X while ((*hd == 0) && (sizeDC > 1)) { X hd--; X sizeDC--; X } X X /* X * This completes the calculation of abs(D-C). Now multiply X * together abs(A-B) and abs(D-C) into a temporary location, X * which is preserved until the final steps. X */ X baseABDC = temp; X sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC); X X /* X * Now calculate B*D and A*C into one of their two final locations. X * Make sure the high order digits of the products are zeroed since X * this initializes the final result. Be careful about this zeroing X * since the size of the high order words might be smaller than X * the shift size. Do B*D first since the multiplies use one more X * word than the size of the product. Also zero the final extra X * word in the result for possible carries to use. X */ X len = domul(baseB, sizeB, baseD, sizeD, baseBD); X hd = &baseBD[len]; X len = shift * 2 - len; X while (len--) X *hd++ = 0; X X len = domul(baseA, sizeA, baseC, sizeC, baseAC); X hd = &baseAC[len]; X len = sizetotal - shift * 2 - len + 1; X while (len--) X *hd++ = 0; X X /* X * Now add in A*C and B*D into themselves at the other shifted X * position that they need. This addition is tricky in order to X * make sure that the two additions cannot interfere with each other. X * Therefore we first add in the top half of B*D and the lower half X * of A*C. The sources and destinations of these two additions X * overlap, and so the same answer results from the two additions, X * thus only two pointers suffice for both additions. Keep the X * final carry from these additions for later use since we cannot X * afford to change the top half of A*C yet. X */ X h1 = baseBD + shift; X h2 = baseAC; X carryACBD = 0; X len = shift; X while (len--) { X sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD; X *h1++ = sival.silow; X *h2++ = sival.silow; X carryACBD = sival.sihigh; X } X X /* X * Now add in the bottom half of B*D and the top half of A*C. X * These additions are straightforward, except that A*C should X * be done first because of possible carries from B*D, and the X * top half of A*C might not exist. Add in one of the carries X * from the previous addition while we are at it. X */ X h1 = baseAC + shift; X hd = baseAC; X carry = carryACBD; X len = sizetotal - 3 * shift; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X h1 = baseBD; X hd = baseBD + shift; X carry = 0; X len = shift; X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Now finally add in the other delayed carry from the X * above addition. X */ X hd = baseAC + shift; X while (carryACBD) { X sival.ivalue = ((FULL) *hd) + carryACBD; X *hd++ = sival.silow; X carryACBD = sival.sihigh; X } X X /* X * Now finally add or subtract (A-B)*(D-C) into the final result at X * the correct position (S), according to whether it is positive or X * negative. When subtracting, the answer cannot go negative. X */ X h1 = baseABDC; X hd = ans + shift; X carry = 0; X len = sizeABDC; X if (neg) { X while (len--) { X sival.ivalue = BASE1 - ((FULL) *hd) + X ((FULL) *h1++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = BASE1 - ((FULL) *hd) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X } else { X while (len--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X } X X /* X * Finally determine the size of the final result and return that. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X hd--; X len--; X } X tempbuf = temp; X return len; X} X X X/* X * Square a number by using the following formula recursively: X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2 X * where S is a power of 2^16, and so multiplies by it are shifts, X * and A and B are the left and right halfs of the number to square. X */ Xvoid Xzsquare(z, res) X ZVALUE z, *res; X{ X LEN len; X X if (iszero(z)) { X *res = _zero_; X return; X } X if (isunit(z)) { X *res = _one_; X return; X } X X /* X * Allocate a temporary array if necessary for the recursion to use. X * The array needs to be allocated large enough for all of the X * temporary results to fit in. This size is about 3 times the X * size of the original number, since each recursion level uses 3/2 X * of the size of its given number, and whose size is 1/2 the size X * of the previous level. The sum of the infinite series is 3. X * Allocate some extra words for rounding up the sizes. X */ X len = 3 * z.len + 32; X tempbuf = zalloctemp(len); X X res->sign = 0; X res->v = alloc(z.len * 2); X res->len = dosquare(z.v, z.len, res->v); X} X X X/* X * Recursive routine to square a number by splitting it up into two numbers X * of half the size, and using the results of squaring the subpieces. X * The result is placed in the indicated array, which must be large X * enough for the result (size * 2). Returns the size of the result. X * This uses a temporary array which must be 3 times as large as the X * size of the number at the top level recursive call. X */ Xstatic LEN Xdosquare(vp, size, ans) X HALF *vp; /* value to be squared */ X LEN size; /* length of value to square */ X HALF *ans; /* location for result */ X{ X LEN shift; /* amount numbers are shifted by */ X LEN sizeA; /* size of left half of number to square */ X LEN sizeB; /* size of right half of number to square */ X LEN sizeAA; /* size of square of left half */ X LEN sizeBB; /* size of square of right half */ X LEN sizeAABB; /* size of sum of squares of A and B */ X LEN sizeAB; /* size of difference of A and B */ X LEN sizeABAB; /* size of square of difference of A and B */ X LEN subsize; /* size of difference of halfs */ X LEN copysize; /* size of number left to copy */ X LEN sumsize; /* size of sum */ X LEN sizetotal; /* total size of square */ X LEN len; /* temporary length */ X LEN len1; /* another temporary length */ X FULL carry; /* carry digit for small multiplications */ X FULL digit; /* single digit multiplying by */ X HALF *temp; /* base for temporary calculations */ X HALF *baseA; /* base of left half of number */ X HALF *baseB; /* base of right half of number */ X HALF *baseAA; /* base of square of left half of number */ X HALF *baseBB; /* base of square of right half of number */ X HALF *baseAABB; /* base of sum of squares of A and B */ X HALF *baseAB; /* base of difference of A and B */ X HALF *baseABAB; /* base of square of difference of A and B */ X register HALF *hd, *h1, *h2, *h3; /* for inner loops */ X SIUNION sival; /* for addition of digits */ X X /* X * First trim the number of leading zeroes. X */ X hd = &vp[size - 1]; X while ((*hd == 0) && (size > 1)) { X size--; X hd--; X } X sizetotal = size + size; X X /* X * If the number has only a small number of digits, then use the X * (almost) normal multiplication method. Multiply each halfword X * only by those halfwards further on in the number. Missed terms X * will then be the same pairs of products repeated, and the squares X * of each halfword. The first case is handled by doubling the X * result. The second case is handled explicitly. The number of X * digits where the algorithm changes is settable from 2 to maxint. X */ X if (size < _sq2_) { X hd = ans; X len = sizetotal; X while (len--) X *hd++ = 0; X X h2 = vp; X hd = ans + 1; X for (len = size; len--; hd += 2) { X digit = (FULL) *h2++; X if (digit == 0) X continue; X h3 = h2; X h1 = hd; X carry = 0; X len1 = len; X while (len1 >= 4) { /* expand the loop some */ X len1 -= 4; X sival.ivalue = (digit * ((FULL) *h3++)) X + ((FULL) *h1) + carry; X *h1++ = sival.silow; X sival.ivalue = (digit * ((FULL) *h3++)) X + ((FULL) *h1) + ((FULL) sival.sihigh); X *h1++ = sival.silow; X sival.ivalue = (digit * ((FULL) *h3++)) X + ((FULL) *h1) + ((FULL) sival.sihigh); X *h1++ = sival.silow; X sival.ivalue = (digit * ((FULL) *h3++)) X + ((FULL) *h1) + ((FULL) sival.sihigh); X *h1++ = sival.silow; X carry = sival.sihigh; X } X while (len1--) { X sival.ivalue = (digit * ((FULL) *h3++)) X + ((FULL) *h1) + carry; X *h1++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *h1) + carry; X *h1++ = sival.silow; X carry = sival.sihigh; X } X } X X /* X * Now double the result. X * There is no final carry to worry about because we X * handle all digits of the result which must fit. X */ X carry = 0; X hd = ans; X len = sizetotal; X while (len--) { X digit = ((FULL) *hd); X sival.ivalue = digit + digit + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Now add in the squares of each halfword. X */ X carry = 0; X hd = ans; X h3 = vp; X len = size; X while (len--) { X digit = ((FULL) *h3++); X sival.ivalue = digit * digit + ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Finally return the size of the result. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X len--; X hd--; X } X return len; X } X X /* X * The number to be squared is large. X * Allocate temporary space and determine the sizes and X * positions of the values to be calculated. X */ X temp = tempbuf; X tempbuf += (3 * (size + 1) / 2); X X sizeA = size / 2; X sizeB = size - sizeA; X shift = sizeB; X baseA = vp + sizeB; X baseB = vp; X baseAA = &ans[shift * 2]; X baseBB = ans; X baseAABB = temp; X baseAB = temp; X baseABAB = &temp[shift]; X X /* X * Trim the second number of leading zeroes. X */ X hd = &baseB[sizeB - 1]; X while ((*hd == 0) && (sizeB > 1)) { X sizeB--; X hd--; X } X X /* X * Now to proceed to calculate the result using the formula. X * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2. X * The steps are done in the following order: X * S^2*A^2 + B^2 X * A^2 + B^2 X * (S^2+S)*A^2 + (S+1)*B^2 X * (A-B)^2 X * (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2. X * X * Begin by forming the squares of two the halfs concatenated X * together in the final result location. Make sure that the X * highest words of the results are zero. X */ X sizeBB = dosquare(baseB, sizeB, baseBB); X hd = &baseBB[sizeBB]; X len = shift * 2 - sizeBB; X while (len--) X *hd++ = 0; X X sizeAA = dosquare(baseA, sizeA, baseAA); X hd = &baseAA[sizeAA]; X len = sizetotal - shift * 2 - sizeAA; X while (len--) X *hd++ = 0; X X /* X * Sum the two squares into a temporary location. X */ X if (sizeAA >= sizeBB) { X h1 = baseAA; X h2 = baseBB; X sizeAABB = sizeAA; X sumsize = sizeBB; X } else { X h1 = baseBB; X h2 = baseAA; X sizeAABB = sizeBB; X sumsize = sizeAA; X } X copysize = sizeAABB - sumsize; X X hd = baseAABB; X carry = 0; X while (sumsize--) { X sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = ((FULL) *h1++) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X if (carry) { X *hd = (HALF)carry; X sizeAABB++; X } X X /* X * Add the sum back into the previously calculated squares X * shifted over to the proper location. X */ X h1 = baseAABB; X hd = ans + shift; X carry = 0; X len = sizeAABB; X while (len--) { X sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = ((FULL) *hd) + carry; X *hd++ = sival.silow; X carry = sival.sihigh; X } X X /* X * Calculate the absolute value of the difference of the two halfs X * into a temporary location. X */ X if (sizeA == sizeB) { X len = sizeA; X h1 = &baseA[len - 1]; X h2 = &baseB[len - 1]; X while ((len > 1) && (*h1 == *h2)) { X len--; X h1--; X h2--; X } X } X if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) X { X h1 = baseA; X h2 = baseB; X sizeAB = sizeA; X subsize = sizeB; X } else { X h1 = baseB; X h2 = baseA; X sizeAB = sizeB; X subsize = sizeA; X } X copysize = sizeAB - subsize; X X hd = baseAB; X carry = 0; X while (subsize--) { X sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (copysize--) { X sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X X hd = &baseAB[sizeAB - 1]; X while ((*hd == 0) && (sizeAB > 1)) { X sizeAB--; X hd--; X } X X /* X * Now square the number into another temporary location, X * and subtract that from the final result. X */ X sizeABAB = dosquare(baseAB, sizeAB, baseABAB); X X h1 = baseABAB; X hd = ans + shift; X carry = 0; X while (sizeABAB--) { X sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X while (carry) { X sival.ivalue = BASE1 - ((FULL) *hd) + carry; X *hd++ = BASE1 - sival.silow; X carry = sival.sihigh; X } X X /* X * Return the size of the result. X */ X len = sizetotal; X hd = &ans[len - 1]; X while ((*hd == 0) && (len > 1)) { X len--; X hd--; X } X tempbuf = temp; X return len; X} X X X/* X * Return a pointer to a buffer to be used for holding a temporary number. X * The buffer will be at least as large as the specified number of HALFs, X * and remains valid until the next call to this routine. The buffer cannot X * be freed by the caller. There is only one temporary buffer, and so as to X * avoid possible conflicts this is only used by the lowest level routines X * such as divide, multiply, and square. X */ XHALF * Xzalloctemp(len) X LEN len; /* required number of HALFs in buffer */ X{ X HALF *hp; X static LEN buflen; /* current length of temp buffer */ X static HALF *bufptr; /* pointer to current temp buffer */ X X if (len <= buflen) X return bufptr; X X /* X * We need to grow the temporary buffer. X * First free any existing buffer, and then allocate the new one. X * While we are at it, make the new buffer bigger than necessary X * in order to reduce the number of reallocations. X */ X len += 100; X if (buflen) { X buflen = 0; X free(bufptr); X } X hp = (HALF *) malloc(len * sizeof(HALF)); X if (hp == NULL) X error("No memory for temp buffer"); X bufptr = hp; X buflen = len; X return hp; X} X X/* END CODE */ END_OF_FILE if test 27389 -ne `wc -c <'zmul.c'`; then echo shar: \"'zmul.c'\" unpacked with wrong size! fi # end of 'zmul.c' fi echo shar: End of archive 11 $of 21$. cp /dev/null ark11isdone MISSING="" for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 21 archives. rm -f ark[1-9]isdone ark[1-9][0-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0