home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources
- From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
- Subject: kjv browser, part 5 of 11
- Message-ID: <1991Jul3.065108.28132@midway.uchicago.edu>
- Date: Wed, 3 Jul 1991 06:51:08 GMT
-
- ---- Cut Here and feed the following to sh ----
- #!/bin/sh
- # this is bibleref.05 (part 5 of a multipart archive)
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file indexutl.icn continued
- #
- if test ! -r _shar_seq_.tmp; then
- echo 'Please unpack part 1 first!'
- exit 1
- fi
- (read Scheck
- if test "$Scheck" != 5; then
- echo Please unpack part "$Scheck" next!
- exit 1
- else
- exit 0
- fi
- ) < _shar_seq_.tmp || exit 1
- if test ! -f _shar_wnt_.tmp; then
- echo 'x - still skipping indexutl.icn'
- else
- echo 'x - continuing file indexutl.icn'
- sed 's/^X//' << 'SHAR_EOF' >> 'indexutl.icn' &&
- X#
- X############################################################################
- X
- X#
- X# All from the IPL.
- X#
- Xlink radcon, bincvt
- X
- X#
- X# FS = field separator, s_len = string length of fields, len =
- X# byte length of fields, no = number of fields, is_case_sensitive =
- X# whether to map index entries to lowercase, r_field = rollover
- X# field for limits file.
- X#
- Xrecord is(FS, s_len, len, no, is_case_sensitive, r_field)
- Xglobal _slash, _baselen, IS
- X
- X
- Xprocedure base_name(s)
- X
- X # If s == "/usr/local/man/man1/icon.1", base_name will return
- X # "icon.1". Somewhat like the Unix basename system command.
- X
- X # global _slash # _slash = \ for MS-DOS, / for Unix
- X s ? {
- X while tab(find(_slash)+1)
- X return tab(0)
- X }
- X
- Xend
- X
- X
- X
- Xprocedure dir_name(s)
- X
- X # If s == "/usr/local/man/man1/icon.1", dir_name will return
- X # "/usr/local/man/man1". Somewhat like the Unix dirname system
- X # command.
- X
- X local s2
- X # global _slash # _slash = \ for MS-DOS, / for Unix
- X
- X s2 := ""
- X s ? {
- X while s2 ||:= tab(find(_slash)+1)
- X return s2
- X }
- X
- Xend
- X
- X
- X
- Xprocedure create_fname(FNAME, EXT)
- X
- X #
- X # Discard path component. Cut basename down to a small enough
- X # size that the OS will be able to handle addition of the ex-
- X # tension, EXT.
- X #
- X
- X # global _slash, _baselen
- X
- X *EXT > 3 &
- X abort("get_index_fname","extension too long",7)
- X
- X return right(
- X stripchars(base_name(FNAME,_slash),'.'), _baselen, "x") ||
- X "." || EXT
- X
- Xend
- X
- X
- X
- Xprocedure stripchars(s,c)
- X
- X # Strip chars (c) from string (s). Return stripped s.
- X
- X local s2
- X
- X s2 := ""
- X s ? {
- X while s2 ||:= tab(upto(c))
- X do tab(many(c))
- X s2 ||:= tab(0)
- X }
- X return s2
- X
- Xend
- X
- X
- X
- Xprocedure abort(proc_name, message, error_code)
- X
- X if not (/proc_name := "") then
- X proc_name := trim(proc_name, ': ') || ": "
- X /error_code := 1
- X
- X write(&errout, proc_name, \message) # fail if there's no error msg,
- X exit(error_code) # then abort
- X
- Xend
- X
- X
- X
- Xprocedure write_int(f, i, size)
- X
- X # Write out an integer byte-by-byte.
- X #
- X # Important little routine. I know it looks inelegant and slow.
- X # Feel free to modify it for speed, and send me the results.
- X # Don't knock out the old code, though. You understood it when
- X # you read it, right? That's the idea :-).
- X
- X local marker, how_many
- X
- X marker := ""
- X how_many := 0
- X /size := (*exbase10(i,2) <= seq(0,8))
- X
- X # output bytes most significant first; then least significant
- X until (size -:= 8) <= -8 do {
- X how_many +:= 1
- X marker ||:= (f, char(iand(ishift(i, -size), 2r11111111)))
- X }
- X
- X writes(f, marker)
- X return how_many # number of characters written
- X
- Xend
- X
- X
- X
- Xprocedure read_int(f, size)
- X
- X local i, _shift
- X
- X # collect bytes, putting the first one read into the high
- X # end of an integer, and on down to the last read (into the
- X # low end)
- X i := _shift := 0
- X while (_shift +:= 8) <= size do
- X i +:= ishift(ord(reads(f)), size - _shift) | fail
- X return i
- X
- Xend
- X
- X
- X
- Xprocedure initialize_os_params()
- X
- X local os
- X # global _slash, _baselen
- X
- X if find("MS-DOS", os := &features) then {
- X _slash := "\\"; _baselen := 8
- X }
- X else if find("UNIX", os := &features) then {
- X _slash := "/"; _baselen := 10
- X }
- X else abort("initialize_os_params","os parameters undefined", 6)
- X
- X return os
- X
- Xend
- X
- X
- Xprocedure are_metas(str)
- X
- X local chr, tmp
- X
- X str ? {
- X
- X # String-initial metacharacters are meaningless.
- X tab(many('*+?|'))
- X
- X # Look for metacharacters and backslashes.
- X while tab(upto('\\*+()|?.$^[')) do {
- X
- X # If a backslash comes first, then the next character can't
- X # be a meta. Move past it, and try again.
- X if ="\\" then move(1) |
- X abort("are_metas","malformed \-escape sequence",19)
- X # Otherwise, we have a metacharacter. Return its position
- X # in str. Dereference just so as not to have a global var.
- X # on the loose.
- X else return .&pos
- X }
- X
- X }
- X
- X # If we've gotten this far without returning, then the string is
- X # clean of metacharacters, and (in boolean terms) the procedure
- X # are_metas() returns false.
- X fail
- X
- Xend
- X
- X
- X#
- X# digits_2_bitmap
- X#
- X# Converts a string representation of a set of bit-fields into an
- X# integer. I.e. 1:1:3 becomes binary 010111 (decimal 23). This
- X# integer is like a map, and is called, in text-processing circles,
- X# a bitmap (not to be confused with bit-mapped display techniques).
- X#
- Xprocedure digits_2_bitmap(s)
- X
- X # s = location string (e.g. 10:02:03:75)
- X # IS.s_len = the string length of fields in s (3 in the above example)
- X # IS.len = the number of bits needed to hold an integer
- X # representation of a single field
- X # IS.no = number of fields in s (4 in the above example)
- X #
- X # Fixed field lengths make things much simpler, but a whole
- X # helluva lot less economical. Be sure that (IS.len * IS.no) does
- X # not exceed the register width for your CPU if either a) your
- X # implementation has no limits on the size of integers, or b) you
- X # are really concerned about performance. Otherwise, never mind.
- X
- X local bitmap, field, no
- X
- X no := IS.no
- X bitmap := 0
- X
- X s ? {
- X if upto(~&digits) then {
- X # The bitmap is delineated by field-markers (e.g. 11;23).
- X tab(upto(&digits))
- X while field := tab(many(&digits)) do {
- X no -:= 1
- X tab(upto(&digits))
- X bitmap +:= ishift(field, no * IS.len)
- X }
- X } else {
- X # Yuck! An un-delineated bitmap (e.g. 23423).
- X while field := integer(move(IS.s_len)) do {
- X no -:= 1
- X tab(upto(&digits))
- X bitmap +:= ishift(field, no * IS.len)
- X }
- X }
- X # If we're not at the end of the line, then we've got a
- X # a problem with the portion of the input file passed
- X # to digits_2_bitmap as s (arg1).
- X pos(0) | abort("digits_2_bitmap",
- X "malformed position marker: "||s,
- X 11)
- X }
- X
- X # If the current no is not -1, then we have either too
- X # many or too few fields, i.e. someone wrote, say, 01:02:03 in
- X # a text which he or she declared as having four fields.
- X no = 0 | abort("digits_2_bitmap",
- X no || " fields in "||s||" (expected "||IS.no||")",
- X 12)
- X # write(&errout,"bitmap = ",radcon(bitmap,10,2)) # for debugging
- X return bitmap
- X
- Xend
- SHAR_EOF
- echo 'File indexutl.icn is complete' &&
- true || echo 'restore of indexutl.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= retrops.icn ==============
- if test -f 'retrops.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping retrops.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting retrops.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'retrops.icn' &&
- X############################################################################
- X#
- X# Name: retrops.icn
- X#
- X# Title: logical operations for retrievals
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.6
- X#
- X############################################################################
- X#
- X# The following collection of procedures implements logical
- X# and/or/and_not operations for the retrieve text-retrieval package.
- X# Their general form is
- X#
- X# r_op(set1, set2, filename, field, range)
- X#
- X# where op = one of either and, or, or and_not. The field and range
- X# arguments are optional.
- X#
- X# To illustrate how these operations are performed, let me explain
- X# how one of the procedures below, r_and(), works. Let us assume we
- X# have retrieve()d bitmap sets for two patterns in a single indexed
- X# file. Call the sets set1 and set2. Call the file filename. These
- X# two sets are passed to r_and() as arguments one and two. R_and()
- X# takes the intersection of these two sets. The result is a
- X# collection of all bitmaps pointing to blocks in filename containing
- X# words matching *both* of the two patterns used to generate set1 and
- X# set2. R_and() returns this result to the calling procedure.
- X#
- X# Note that, by default, r_and() retrieves co-ocurrences of patterns
- X# within a single block. If the programmer wishes to find
- X# co-ocurrences within larger units, he or she may supply a field
- X# argument. Fields are fixed width bit-fields into which location
- X# markers for filename are divided, numbered from the largest and
- X# most general to the smallest and most specific. See the file
- X# makeind for a discussion of how they are handled. A range
- X# parameter may also be specified, which makes it possible to look
- X# for coocurrences in collections of more than one unit of the type
- X# specified in the field argument.
- X#
- X############################################################################
- X#
- X# Links: none
- X#
- X# See also: retrieve.icn, makeind.icn
- X#
- X############################################################################
- X
- X# The following globals contain stats for current file (here, arg 3).
- X# global filestats # declared in initfile.icn
- X# global IS # declared in indexutl.icn
- X
- Xprocedure r_or(set1, set2, filename, field, range)
- X # field and range are meaningless for this op
- X return set1 ++ set2
- Xend
- X
- Xprocedure r_and(set1, set2, filename, field, range)
- X # set intersection
- X return apply_op("**", set1, set2, filename, field, range)
- Xend
- X
- Xprocedure r_and_not(set1, set2, filename, field, range)
- X # simpler way of saying X and not Y, or Y and not X
- X return apply_op("--", set1, set2, filename, field, range)
- Xend
- X
- X
- Xprocedure apply_op(op, set1, set2, filename, field, range)
- X
- X local r_shift, tbl, elem, set1a, set2a, shifted_elem
- X
- X # globals:
- X #
- X # IS is a global record in which will be stored important stats for
- X # the file named in arg 4 (filename). We will be using two of IS's
- X # fields:
- X #
- X # IS.len = the number of bits needed to hold an integer
- X # representation of a single field in filename
- X # IS.no = number of fields for filename
- X #
- X # Filestats is a global table which contains various important stats
- X # for every file that's been accessed. These stats are kept in a
- X # record of type Fs
- X #
- X # record Fs(ind_filename, bmp_filename, lim_filename, IS, ofs_table)
- X #
- X # Fs is declared in initfile.icn; here all we need is Fs.IS,
- X # which we access via filestats[filename].IS. Initfile() sets up
- X # filestats for filename, but we shouldn't call it here. It has
- X # (or should already have) been called by retrieve().
- X #
- X
- X # Check for sloppy programming.
- X /filename & abort("apply_op", "you gotta call me with a filename", 43)
- X type(set1) == ("list"|"set") |
- X abort("apply_op","arg 1 must be a list/set",47)
- X type(set2) == ("list"|"set") |
- X abort("apply_op","arg 2 must be a list/set",48)
- X
- X # Initialize important variables.
- X #
- X if /filestats | /filestats[filename]
- X then abort("apply_op", "can't apply_op before retrieve()ing", 44)
- X IS := filestats[filename].IS # re-initialize IS for current file
- X
- X /field := IS.no; field := IS.no - field
- X /range := 0
- X IS.no >= field >= 0 | abort("apply_op", "field out of range", 40)
- X range >= 0 | abort("apply_op", "no negative ranges, please!", 41)
- X
- X if field = range = 0 then {
- X type(set1) ~== "set" & set1 := set(set1)
- X type(set2) ~== "set" & set2 := set(set2)
- X # no need to shift anything around
- X return op(set1, set2)
- X } else {
- X set1a := set()
- X if field = 0 then {
- X every elem := !set1 do {
- X every abs(elem - !set2) <= range do
- X insert(set1a, elem)
- X }
- X return set1a
- X } else {
- X # uh oh, we need to knock out some fields
- X tbl := table()
- X set1a := set(); set2a := set()
- X r_shift := -(field * IS.len)
- X every elem := !set1 do {
- X shifted_elem := ishift(elem,r_shift)
- X /tbl[elem] := set()
- X insert(tbl[shifted_elem], elem)
- X insert(set1a, shifted_elem)
- X }
- X every elem := !set2 do {
- X shifted_elem := ishift(elem,r_shift)
- X /tbl[elem] := set()
- X insert(tbl[shifted_elem], elem)
- X insert(set2a, shifted_elem)
- X }
- X set2 := set()
- X if range = 0 then {
- X set1 := op(set1a, set2a)
- X every insert(set2, !tbl[!set1])
- X }
- X else {
- X every elem := !set1a do {
- X every abs(elem - !set2a) <= range do
- X insert(set2, tbl[elem])
- X }
- X }
- X return set2
- X }
- X }
- X
- Xend
- SHAR_EOF
- true || echo 'restore of retrops.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= whatnext.icn ==============
- if test -f 'whatnext.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping whatnext.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting whatnext.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'whatnext.icn' &&
- X###########################################################################
- X#
- X# Name: whatnext.icn
- X#
- X# Title: return next/previous bitmap in filename
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.5
- X#
- X###########################################################################
- X#
- X# Given a bitmap and a filename, NextBitmap() and PrevBitmap() return
- X# either the next or previous bitmap in filename. Fail if there is no
- X# next or previous bitmap. Syntax:
- X#
- X# {Next,Prev}Bitmap(bitmap, filename, start_no)
- X#
- X# start_no specifies the lowest possible field value. If null, defaults
- X# to 1. Normally start_no will be either 1 or 0.
- X#
- X############################################################################
- X#
- X# links: ./indexutl.icn ./initfile.icn
- X#
- X############################################################################
- X
- X# For error messages, debugging.
- Xlink radcon
- X
- X# Declared in indexutl.icn.
- X# record is(FS, s_len, len, no, is_case_sensitive)
- X# global IS
- X
- X# Declared in initfile.icn.
- X# global filestats
- X# record Fs(ind_filename, bmp_filename, lim_filename, IS, ofs_table)
- X
- X# Used here to store limits data in the static table limits_tbl.
- Xrecord ldata(limitslst, limitsset)
- X
- X
- Xprocedure PrevBitmap(bitmap, filename, start_no)
- X return PrevNextBitmap(bitmap, filename, "p", start_no)
- Xend
- X
- Xprocedure NextBitmap(bitmap, filename, start_no)
- X return PrevNextBitmap(bitmap, filename, "n", start_no)
- Xend
- X
- X
- Xprocedure PrevNextBitmap(bitmap, filename, direction, start_no)
- X
- X local limits_file, limit, bitmap_length, limitslst,
- X limitsset, field_mask, shift_bits_out, lowlimit, shift_back,
- X i, j, newbitmap
- X static limits_tbl
- X initial limits_tbl := table()
- X
- X # These verses are missing from the standard Hebrew Bible. For
- X # English, we can safely ignore them. See below ("return newbitmap")
- X # for an explanation of how to use a weirdos set.
- X # weirdos := set(["josh 14:8","deut 23:12"])
- X
- X # Check for sloppy programming.
- X /filename & abort("PrevNextBitmap", "you called me without a filename",54)
- X /start_no := 1 # default low value for fields is 1
- X
- X # If necessary, initialize limits stats for the current file.
- X #
- X if /limits_tbl[filename] then {
- X if /filestats | /filestats[filename] then
- X initfile(filename) # see initfile.icn
- X limits_file := open(filestats[filename].lim_filename) |
- X abort("PrevBitmap","can't open "||
- X filestats[filename].lim_filename ||
- X ", index with the -l [int] option", 60)
- X IS := filestats[filename].IS
- X # Figure out how many bits we need (has to be divisible by 8).
- X bitmap_length := ((IS.len * IS.no) <= seq(0,8))
- X limitslst := list(); limitsset := set()
- X shift_bits_out := -(((IS.no-IS.r_field)+ 1) * IS.len)
- X while limit := read_int(limits_file, bitmap_length) do {
- X lowlimit := ishift(limit, shift_bits_out)
- X # Shift back, filling remaining fields with start_no (usu. 1).
- X # Note that if IS.no - IS.r_field is 0, nothing will happen!
- X every shift_back := IS.len * (1 to (IS.no-IS.r_field)+ 1) do {
- X lowlimit := ior(ishift(lowlimit, shift_back), start_no)
- X }
- X every put(limitslst, lowlimit | limit)
- X insert(limitsset, limit)
- X }
- X close(limits_file)
- X insert(limits_tbl, filename, ldata(limitslst, limitsset))
- X }
- X
- X IS := filestats[filename].IS
- X limitslst := limits_tbl[filename].limitslst
- X limitsset := limits_tbl[filename].limitsset
- X #
- X # Used to mask off the least significant field of bitmap.
- X field_mask := 2^(IS.len)-1
- X #
- X # How many bits should we shift to the right? E.g. in biblical
- X # texts with morphological tags, we want to shift out the morpheme
- X # field, and deal only with book chapter:verse. The rollover
- X # field (IS.r_field) is the field on which the limits file is
- X # based. Subtract it from the total number of fields (IS.no), and
- X # then multiply it by the field width in bits (IS.len) and we get
- X # the amount to shift out in order to leave us with the rollover
- X # field in the least position.
- X #
- X shift_bits_out := -((IS.no-IS.r_field) * IS.len)
- X
- X if direction == "p" then {
- X #
- X # See if the rollover field has its lowest possible value. If
- X # so, then use the limits list to get a bitmap for the
- X # preceding section in filename.
- X #
- X if iand(ishift(bitmap, shift_bits_out), field_mask) = start_no then {
- X bitmap = limitslst[j := 1 to *limitslst] | fail
- X newbitmap := limitslst[j - 1] | fail
- X }
- X #
- X # If the rollover field doesn't have its lowest possible
- X # value, then it can simply be decremented; then the remaining
- X # fields must be reset to start_no.
- X #
- X else {
- X # Decrement appropriate field by one; direction is "p".
- X newbitmap := ishift(bitmap, shift_bits_out) - 1
- X # Shift back, filling remaining fields with start_no (usu. 1).
- X # Note that if IS.no - IS.r_field is 0, nothing will happen!
- X every shift_back := IS.len * (1 to (IS.no-IS.r_field)) do {
- X newbitmap := ior(ishift(newbitmap, shift_back), start_no)
- X }
- X }
- X return newbitmap
- X # This is how we'd handle things if we needed a weirdos set
- X # (needed, e.g., for the Hebrew Bible).
- X # if member(weirdos, newbitmap)
- X # then return WhatsNextPrevious(
- X # newbitmap, filename, direction, start_no)
- X # else return newbitmap
- X }
- X else if direction == "n" then {
- X #
- X # See if the field after the rollover field has its highest
- X # possible value. We can determine this by checking to see if
- X # bitmap is a member of the limits set. If so, then use the
- X # limits *list* to get a bitmap for the next section in filename.
- X #
- X if member(limitsset,bitmap) then {
- X bitmap = limitslst[i := 1 to *limitslst] | fail
- X newbitmap := limitslst[i + 1] | fail
- X }
- X #
- X # If the rollover field doesn't have its highest possible
- X # value, then it can simply be incremented; then the remaining
- X # fields must be reset to start_no.
- X #
- X else {
- X # Increment appropriate field by one; direction is "n".
- X newbitmap := ishift(bitmap, shift_bits_out) + 1
- X # Shift back, filling remaining fields with start_no (usu. 1).
- X every shift_back := IS.len * (1 to (IS.no-IS.r_field)) do {
- X newbitmap := ior(ishift(newbitmap, shift_back), start_no)
- SHAR_EOF
- true || echo 'restore of whatnext.icn failed'
- fi
- echo 'End of part 5'
- echo 'File whatnext.icn is continued in part 6'
- echo 6 > _shar_seq_.tmp
- exit 0
- --
-
- -Richard L. Goerwitz goer%sophist@uchicago.bitnet
- goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer
-