home *** CD-ROM | disk | FTP | other *** search
- /* hAWK_Sort.c : builtin sort capability */
- /* Copyright © 1986, 1988, 1989 1991 the Free Software Foundation, Inc.
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language, modified for the Macintosh (also called hAWK).
- * GAWK is free software; you can redistribute or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or any later version.
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file "COPYING hAWK". If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- * Written for THINK C 4 on the Macintosh by Ken Earle (Dynabyte) Aug 1991.
- */
- #include "AWK.H"
-
- enum {ASCII_SORT, RASCII_SORT, DICT_SORT, RDICT_SORT,
- NUM_SORT, RNUM_SORT, roentiyehs = 999};
- #define NUMSORTS 6
-
- /* from ARRAY.C */
- extern void assoc_clear(NODE *symbol);
- extern struct search *assoc_scan_sort(NODE *symbol);
- extern struct search *assoc_next_sort(struct search *lookat);
-
- /* hAWK_Sort.c functions */
- short b_get_three(NODE *tree, NODE **res1, NODE **res2, NODE **res3);
- NODE *do_sort(NODE *tree);
- static long create_index_array(NODE *arr, NODE *ind, short sort_type);
- void InitCompFuncArray(void);
- void SortArray(long numIndexes, short sort_type);
- void qs(long lb, long ub);
- long Rearrange(long lb, long ub);
- void SwapSorters(long i, long j);
- /* comparison functions */
- short ASCIIComp(long i, long j);
- short RASCIIComp(long i, long j);
- short DictComp(long i, long j);
- short RDictComp(long i, long j);
- short NumComp(long i, long j);
- short RNumComp(long i, long j);
- short PtrLenPtrLenCompare(Ptr spatPtr, short patLen, Ptr stargetPtr, short targetLen);
-
-
- short b_get_three(NODE *tree, NODE **res1, NODE **res2, NODE **res3)
- {
- if (!tree) {
- return 0;
- }
- *res1 = tree->lnode;
- if (!tree->rnode)
- return 1;
- tree = tree->rnode;
- *res2 = tree->lnode;
- if (!tree->rnode)
- return 2;
- tree = tree->rnode;
- *res3 = tree_eval(tree->lnode);
- return 3;
- }
-
- NODE *do_sort(NODE *tree)
- {
- NODE *t1, *t2, *t3; /* array, array, string */
- char *order_c;
- char sort_char,
- *s;
- NODE *arr, *ind;
- short num_args, sort_type, in_reverse = 0;
-
- if ((num_args = b_get_three(tree, &t1, &t2, &t3)) < 3)
- {
- if (num_args == 2)
- sort_type = ASCII_SORT;
- else
- fatal("sort requires at least two arguments");
- }
- else
- {
- order_c = force_string(t3)->stptr;
- if (*order_c == 'r' || *order_c == 'R')
- {
- in_reverse = 1;
- sort_char = *(order_c + 1);
- }
- else
- sort_char = *order_c;
- switch (sort_char)
- {
- case 'n':
- case 'N':
- sort_type = NUM_SORT;
- break;
- case 'd':
- case 'D':
- sort_type = DICT_SORT;
- break;
- case 'a':
- case 'A':
- default:
- sort_type = ASCII_SORT;
- break;
- }
- if (in_reverse)
- ++sort_type;
- free_temp(t3);
- }
-
- arr = t1;
- if (t1->type == Node_param_list)
- arr = stack_ptr[t1->param_cnt];
- if (arr->type != Node_var && arr->type != Node_var_array)
- fatal("first argument of sort is not a variable");
-
- ind = t2;
- if (t2->type == Node_param_list)
- ind = stack_ptr[t2->param_cnt];
- if (ind->type != Node_var && ind->type != Node_var_array)
- fatal("second argument of sort is not a variable");
- assoc_clear(ind);
- return tmp_number((AWKNUM)
- create_index_array(arr, ind, sort_type));
- }
-
- static NODE **stat_sorter;
-
- /* loop over arr, copying index and element pointers; sort this
- temp array on values of elements, then use this order to create
- numerically-indexed ind array via
- *assoc_lookup(ind, tmp_number((AWKNUM) (number)))
- = make_string(ptr to index string, len of string);
- 1 hiccup results of assoc_scan of arr[] into temporary
- handled array of NODE pointers
- 2 sort array of NODE pointers by element strings
- 3 walk sorted NODE pointers and create the array ind[] with
- indexes 1: up and values equal to arr[] indexes
- 4 return number of elements in ind[]
-
- Prelim version: walk array to precalculate size of sorter, then walk
- it again and fill in the buckets. Not a horrible waste of time.
- */
- static long create_index_array(NODE *arr, NODE *ind, short sort_type)
- {
- struct search *l;
- long j, k, numIndexes = 0;
-
- for (l = assoc_scan_sort(arr); l; l = assoc_next_sort((struct search *)l))
- {
- ++numIndexes;
- }
- if (numIndexes == 0)
- return(0);
- emalloc(stat_sorter, NODE **, (sizeof(NODE *) *
- numIndexes), "create_index_array");
- for (j = 0, l = assoc_scan_sort(arr); l; l = assoc_next_sort((struct search *)l), ++j)
- {
- if (sort_type >= NUM_SORT)
- force_number(l->retval->ahvalue);
- else
- force_string(l->retval->ahvalue);
- *(stat_sorter + j) = l->retval;
- }
- SortArray(numIndexes, sort_type);
- for (j = 0, k = 1; j < numIndexes; ++j, ++k)
- {
- *assoc_lookup(ind, tmp_number((AWKNUM) (k)))
- = make_string((*(stat_sorter+j))->ahname->stptr,
- (*(stat_sorter+j))->ahname->stlen);
- }
- free(stat_sorter);
- return(numIndexes);
- }
-
-
- typedef short (*CompareFunction)(long lb, long ub);
- static CompareFunction compFuncArray[NUMSORTS];
- static CompareFunction compFunc;
-
-
- void InitCompFuncArray()
- {
- compFuncArray[0] = ASCIIComp;
- compFuncArray[1] = RASCIIComp;
- compFuncArray[2] = DictComp;
- compFuncArray[3] = RDictComp;
- compFuncArray[4] = NumComp;
- compFuncArray[5] = RNumComp;
- }
-
- void SortArray(long numIndexes, short sort_type)
- {
- if (!(compFuncArray[0]))
- InitCompFuncArray();
- compFunc = compFuncArray[sort_type];
- if (numIndexes)
- qs(0L, numIndexes - 1L);
- }
-
- void qs(long lb, long ub)
- {
- long j;
-
- if (lb < ub)
- {
- if (j = Rearrange(lb, ub))
- qs(lb, j - 1L);
- qs(j + 1L, ub);
- }
- }
-
- long Rearrange(long lb, long ub)
- {
- do
- {
- while (ub > lb && compFunc(ub, lb) >= 0)
- ub--;
- if (ub != lb)
- {
- SwapSorters(ub, lb);
- while (lb < ub && compFunc(lb, ub) <= 0)
- lb++;
- if (lb != ub)
- SwapSorters(lb, ub);
- }
- } while (lb != ub);
- return(lb);
- }
-
- void SwapSorters(long i, long j)
- {
- NODE *temp = *(stat_sorter + i);
-
- *(stat_sorter + i) = *(stat_sorter + j);
- *(stat_sorter + j) = temp;
- }
-
- /* Sort compare functions - ASCII, dictionary, numeric */
-
- short ASCIIComp(long i, long j)
- {
- return(strcmp((*(stat_sorter + i))->ahvalue->stptr,
- (*(stat_sorter + j))->ahvalue->stptr));
- }
-
- short RASCIIComp(long i, long j)
- {
- return(strcmp((*(stat_sorter + j))->ahvalue->stptr,
- (*(stat_sorter + i))->ahvalue->stptr));
- }
-
- short DictComp(long i, long j)
- {
- return(PtrLenPtrLenCompare((*(stat_sorter + i))->ahvalue->stptr,
- (*(stat_sorter + i))->ahvalue->stlen,
- (*(stat_sorter + j))->ahvalue->stptr,
- (*(stat_sorter + j))->ahvalue->stlen));
- }
-
- short RDictComp(long i, long j)
- {
- return(PtrLenPtrLenCompare((*(stat_sorter + j))->ahvalue->stptr,
- (*(stat_sorter + j))->ahvalue->stlen,
- (*(stat_sorter + i))->ahvalue->stptr,
- (*(stat_sorter + i))->ahvalue->stlen));
- }
-
- short NumComp(long i, long j)
- {
- if ((*(stat_sorter + i))->ahvalue->numbr <
- (*(stat_sorter + j))->ahvalue->numbr)
- return(-1);
- else if ((*(stat_sorter + i))->ahvalue->numbr ==
- (*(stat_sorter + j))->ahvalue->numbr)
- return(0);
- else
- return(1);
- }
-
- short RNumComp(long i, long j)
- {
- if ((*(stat_sorter + i))->ahvalue->numbr <
- (*(stat_sorter + j))->ahvalue->numbr)
- return(1);
- else if ((*(stat_sorter + i))->ahvalue->numbr ==
- (*(stat_sorter + j))->ahvalue->numbr)
- return(0);
- else
- return(-1);
- }
-
- typedef Byte *BPtr;
- short PtrLenPtrLenCompare(Ptr spatPtr, short patLen, Ptr stargetPtr, short targetLen)
- {
- BPtr patPtr = (BPtr)spatPtr,
- patEndPtr = patPtr + patLen,
- curPtr = (BPtr)stargetPtr,
- curEndPtr = curPtr + targetLen;
- short i, j;
-
- while (patPtr < patEndPtr && curPtr < curEndPtr && *patPtr == *curPtr)
- {
- ++patPtr;
- ++curPtr;
- }
-
- if (patPtr == patEndPtr && curPtr == curEndPtr) /* exact match */
- return(0);
- if (patPtr == patEndPtr && curPtr != curEndPtr) /* pattern too short */
- return(-1);
- if (patPtr != patEndPtr && curPtr == curEndPtr) /* starget too short */
- return(1);
-
- /* true miscompare within string */
- i = (short)*patPtr;
- j = (short)*curPtr;
- /* sequence: nonalpha, AaBbCc...Zz */
- /* 'A' = 65, 'a' = 97; move 'U' to 'U'*2 + 256, 'u' to 'u'*2 + 193 */
- /* 'A' -> 386, 'B' -> 388, 'a' -> 387, 'b' -> 389 etc */
- if ('A' <= i && i <= 'Z')
- i = i*2 + 256;
- else if ('a' <= i && i <= 'z')
- i = i*2 + 193;
- if ('A' <= j && j <= 'Z')
- j = j*2 + 256;
- else if ('a' <= j && j <= 'z')
- j = j*2 + 193;
- return(i - j);
- }
-