home *** CD-ROM | disk | FTP | other *** search
- page 66,80
- title HASH_IT
- ;
- ; A hashing algorithm test driver by William DeGrandpre, 12/84
- ;
- ; Features:
- ; A symbol table which is loaded once from a disk file
- ; by block read and is accessed directly from the DTA.
- ; A short integer hash table.
- ; A table-zeroing procedure.
- ; Relative timing routines.
- ; A collision counter/reporter.
- ; Macro defined algorithm test structures
- ; Hashing algorithms, including:
- ; (1) The standard sum-of-ASCII-character values.
- ; (2) My exclusive-or procedure.
- ;
- ; Equates:
- str_len equ 6 ; length of ASCII output string
- nam_len equ 8 ; length of a symbol.
- rec_len equ 10 ; length of a record including cr-lf.
- nt_len equ 100 ; number of records (names) in the table.
- ht_len equ 101 ; bytes in integer hash table. Should be prime.
- no_iter equ 100 ; number of iterations for time test.
- dta_len equ 1024 ; table size allocation, at least rec_len*nt_len.
- ;
- ; Macros:
- ;
- sethsh macro
- ; This macro sets up addresses and values for the hashing
- ; procedures and defines register usage for test purposes:
- ; AX and DX are used for calculations
- ; BX indexes the hash table using [bx+di] addressing
- push cx ;save count of external loops
- mov cx,nt_len ;number of records to hash
- mov bp,ht_len ;divisor to get remainder offset
- lea di,hsh_tbl ;di points to hash table
- lea si,nam_tbl ;si points to name table
- ; Remember to pop CX before exiting hashing procedure
- endm
- ;
- stdtst macro ptitle, pprocd
- local lop,notm
- ; This macro provides a standard test protocol for testing
- ; all hashing procedures. It calls the procedure once to count
- ; collisions and no_iter times for timing purposes.
- ;
- lea dx,ptitle ;print procedure title
- call scr_out
- call ht_clr ;zero the hash table values
- call pprocd ;call once for collision count
- call ht_cnt ;count collisions
- mov fcount,dx ;store collision count
- sub cx,cx ;set system timer to zero
- sub dx,dx ;using BIOS interrupt for speed
- mov ftime,dx ;zero time in case not used
- mov ah,01
- int 1ah ;zero timer
- mov cx,no_iter ;set up for time test
- jcxz notm ;skip timing routine
- lop: call pprocd ;call the procedure
- loop lop ;until enough iterations to count
- mov ah,00 ;now get final time
- int 1ah ;read timer
- mov ftime,dx ;assume only low word changed
- notm: call ht_stat ;report final results
- ret
- endm
-
- staksg segment para stack 'stack'
- dw 64 dup(?)
- staksg ends
- ;
- datasg segment para 'data'
- nam_tbl db dta_len dup(?) ;DTA and symbol list
- fcbr label byte ;FCB
- fcbdr db 02 ;drive B
- fcbnm db "SYMBOLS " ;file name
- fcbext db "DOC" ;and extension
- fcbbl dw 0000 ;DOS use
- fcbrs dw 0000
- fcbdos dd ?
- dw ?
- dt ?
- db 00
- fcbrn dd 00000000
- ;
- ftime dw 0 ; save low word of timer here
- fcount dw 0 ; save final count here
- hsh_tbl db ht_len dup(?)
-
- col_msg db 0ah,0dh,"Number of collisions: "
- col_cnt db str_len dup(" ")
- db 0ah,0dh,"$"
- tim_msg db 0ah,0dh,"Time in counts: "
- tim_cnt db str_len dup(" ")
- db 0ah,0dh,"$"
- prg_ttl db 0dh,0ah,09h,"Hashing Algorithm Test",0dh,0ah,"$"
- asc_ttl db 0dh,0ah,"Test 1: ASCII Sum",0dh,0ah,"$"
- xor_ttl db 0dh,0ah,"Test 2: XOR symbol halves",0dh,0ah,"$"
- ; put any other titles here
- datasg ends
- ;
- codesg segment para 'code'
- hash_it proc far
- assume cs:codesg, ds:datasg, ss:staksg, es:datasg
- ; set up for return to DOS
- push ds
- sub ax,ax
- push ax
- mov ax,datasg
- mov ds,ax
- mov es,ax
- ; end setup, now print title
- lea dx,prg_ttl
- call scr_out
- call fill_tbl ;load symbol table
- call hsh_asc ;test ASCII sum hash method
- call hsh_xor ;test XOR hash method
- ; put any other test calls here
- ret
- hash_it endp
- ;
- scr_out proc near
- ; This procedure prints a message with address in DX
- ; on the screen using DOS interrupt 21h.
- push ax ;save affected register
- mov ah,09
- int 21h ;do the interrupt
- pop ax ;restore ax
- ret
- scr_out endp
- ;
- hsh_asc proc near
- ; This procedure tests the hashing method in which the
- ; ASCII values of each character in a symbol are summed, then
- ; divided by the table length, with the remainder used as
- ; the hash key. Easily implemented in high level languages.
- ; The stdtst macro does all the work.
- ;
- stdtst asc_ttl,asc_hsh
- hsh_asc endp
- ;
- asc_hsh proc near
- ; This procedure performs ASCII sum symbol hashing.
- ; The sethsh macro establishes addressing for the test.
- ;
- sethsh
- loopa: sub dx,dx ;zero dx for sum.
- push cx ;save outer loop count
- mov cx,nam_len ;number of bytes to sum
- loopb: mov al,[si] ;get byte into al
- cbw ;convert to integer
- add dx,ax ;and add to total for name
- inc si ;next byte
- loop loopb ;repeat until name bytes are summed
- mov ax,dx ;set up to divide
- sub dx,dx ;need unsigned dw dividend
- div bp ;divide by table length
- mov bx,dx ;remainder to index reg
- inc byte ptr [bx+di] ;increment hash table byte accessed
- inc si ;skip cr-lf at end of record
- inc si
- pop cx ;restore count of outer loopa
- loop loopa ;repeat until table is done
- pop cx ;restore external count
- ret
- asc_hsh endp
- ;
- hsh_xor proc near
- ; This procedure tests an hashing algorithm which exclusive-ors
- ; the first and second halves of an 8-byte symbol to create a
- ; double word which is divided by the table length. The remainder
- ; is then used as the hash key. Again the macro does the work.
- ;
- stdtst xor_ttl,xor_hsh
- hsh_xor endp
- ;
- xor_hsh proc near
- ; This procedure performs XOR hashing of one table
- ; The macro sethsh establishes addressing
- ;
- sethsh
- loopc: mov dx,word ptr [si] ;first bytes to high word
- mov ax,word ptr [si+2] ;second 2 to low
- xor dx,word ptr [si+4] ;xor high bytes
- xor ax,word ptr [si+6] ;then low with last 2
- xor ax,dx ;finally xor together
- sub dx,dx ;zero dx for dw division
- div bp ;divide by table length
- mov bx,dx ;remainder to index
- inc byte ptr [bx+di] ;inc accessed byte
- add si,rec_len ;move to next record
- loop loopc ; repeat until table is traversed
- pop cx ;restore external count
- ret
- xor_hsh endp
- ;
- fill_tbl proc near
- ; This procedure fills the symbol table from a sequential disk
- ; file. The table is filled once before the hashing algorithms are
- ; tested to exclude disk i/o from algorithm timing.
- ;
- ; open file
- lea dx,fcbr
- mov ah,0fh
- int 21h
- ; set record size and DTA address
- mov fcbrs,rec_len
- lea dx,nam_tbl
- mov ah,1ah
- int 21h
- ; read disk file in block read. Compatible with DOS 1.1 and up
- mov cx,nt_len
- lea dx,fcbr
- mov ah,27h
- int 21h
- ret
- fill_tbl endp
- ;
- ht_clr proc near
- ; This procedure zeroes the hash table. For this test, the
- ; hash table is simply an array of byte integers in which a
- ; count of the number of hits on that location is kept.
- ;
- sub al,al ;get a convenient zero
- lea bx,hsh_tbl ;get table address
- mov cx,ht_len ;set count to table length
- loopd: mov [bx],al ;zero byte in table
- inc bx ;set up for next byte
- loop loopd ;repeat until table is zeroed
- ret
- ht_clr endp
- ;
- ht_cnt proc near
- ; This procedure counts collisions in the hash table and
- ; returns the result in dx.
- ;
- lea bx,hsh_tbl ;get table address
- mov cx,ht_len ;and size
- sub ah,ah ;zero ah for unsigned math
- sub dx,dx ;zero dx for sum
- looph: mov al,[bx] ;move byte to al, sure its positive
- cmp ax,1 ;0 or 1 is no collision
- jbe nxtb
- dec ax ;allow for 1 hit
- add dx,ax ;add collisions to sum
- nxtb: inc bx ;move to next byte
- loop looph ;repeat until table traversed
- ret
- ht_cnt endp
- ;
- ht_stat proc near
- ; This procedure produces the collision count and time results
- ; of a hashing test.
- ; 1. Display collisions
- mov ax,fcount ;count in ax
- lea bx,col_cnt ;where to put ASCII string
- call asc_con ;convert count
- lea dx,col_msg ;and print it
- call scr_out
- ; 2. Display time
- mov ax,ftime ;done same as count
- lea bx,tim_cnt
- call asc_con
- lea dx,tim_msg
- call scr_out
- ret
- ht_stat endp
- ;
- asc_con proc near
- ; This procedure converts an unsigned integer in ax into a
- ; str_len byte string pointed to by bx.
- ;
- add bx,str_len-1 ;move to end of string space
- mov cx,str_len ;string length in cx
- mov si,10 ;converting to base 10
- loopm: sub dx,dx ;zero for unsigned divide
- div si
- add dl,'0' ;make it ASCII
- mov [bx],dl ;put character in buffer
- dec bx ;back up to next position
- loop loopm ;repeat until buffer is full
- ret
- asc_con endp
- ;
- codesg ends
- end hash_it
- c bx ;back up to next