home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources
- From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
- Subject: kjv browser, part 8 of 11
- Message-ID: <1991Jul3.065222.28343@midway.uchicago.edu>
- Date: Wed, 3 Jul 1991 06:52:22 GMT
-
- ---- Cut Here and feed the following to sh ----
- #!/bin/sh
- # this is bibleref.08 (part 8 of a multipart archive)
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file findre.icn continued
- #
- if test ! -r _shar_seq_.tmp; then
- echo 'Please unpack part 1 first!'
- exit 1
- fi
- (read Scheck
- if test "$Scheck" != 8; then
- echo Please unpack part "$Scheck" next!
- exit 1
- else
- exit 0
- fi
- ) < _shar_seq_.tmp || exit 1
- if test ! -f _shar_wnt_.tmp; then
- echo 'x - still skipping findre.icn'
- else
- echo 'x - continuing file findre.icn'
- sed 's/^X//' << 'SHAR_EOF' >> 'findre.icn' &&
- X tmp := tab(many('*?+')) | &null
- X if upto('*?',\tmp)
- X then put(token_list,-ord("*"))
- X else put(token_list,-ord("+"))
- X }
- X "?" : {
- X tmp := tab(many('*?+')) | &null
- X if upto('*+',\tmp)
- X then put(token_list,-ord("*"))
- X else put(token_list,-ord("?"))
- X }
- X "(" : {
- X tab(many('*+?'))
- X put(token_list,-ord("("))
- X }
- X default: {
- X put(token_list,-ord(chr))
- X }
- X }
- X }
- X else {
- X case chr of {
- X # More egrep compatibility stuff.
- X "[" : {
- X b_loc := find("[") | *&subject+1
- X every next_one := find("]",,,b_loc)
- X \next_one ~= &pos | err_out(s,2,chr)
- X put(token_list,-ord(chr))
- X }
- X "]" : {
- X if &pos = (\next_one+1)
- X then put(token_list,-ord(chr)) &
- X next_one := &null
- X else put(token_list,ord(chr))
- X }
- X default: put(token_list,ord(chr))
- X }
- X }
- X }
- X }
- X
- X token_list := UnMetaBrackets(token_list)
- X
- X fixed_length_token_list := list(*token_list)
- X every i := 1 to *token_list
- X do fixed_length_token_list[i] := token_list[i]
- X return fixed_length_token_list
- X
- Xend
- X
- X
- X
- Xprocedure UnMetaBrackets(l)
- X
- X # Since brackets delineate a cset, it doesn't make
- X # any sense to have metacharacters inside of them.
- X # UnMetaBrackets makes sure there are no metacharac-
- X # ters inside of the braces.
- X
- X local tmplst, i, Lb, Rb
- X
- X tmplst := list(); i := 0
- X Lb := -ord("[")
- X Rb := -ord("]")
- X
- X while (i +:= 1) <= *l do {
- X if l[i] = Lb then {
- X put(tmplst,l[i])
- X until l[i +:= 1] = Rb
- X do put(tmplst,abs(l[i]))
- X put(tmplst,l[i])
- X }
- X else put(tmplst,l[i])
- X }
- X return tmplst
- X
- Xend
- X
- X
- X
- Xprocedure MakeFSTN(l,INI,FIN)
- X
- X # MakeFSTN recursively descends through the tree structure
- X # implied by the tokenized string, l, recording in (global)
- X # fstn_table a list of operations to be performed, and the
- X # initial and final states which apply to them.
- X
- X local i, inter, inter2, tmp, Op, Arg
- X static Lp, Rp, Sl, Lb, Rb, Caret_inside, Dot, Dollar, Caret_outside
- X # global biggest_nonmeta_str, slash_present, parends_present
- X initial {
- X Lp := -ord("("); Rp := -ord(")")
- X Sl := -ord("|")
- X Lb := -ord("["); Rb := -ord("]"); Caret_inside := ord("^")
- X Dot := -ord("."); Dollar := -ord("$"); Caret_outside := -ord("^")
- X }
- X
- X /INI := 1 & state_table := table() &
- X NextState("new") & biggest_nonmeta_str := ""
- X /FIN := 0
- X
- X # I haven't bothered to test for empty lists everywhere.
- X if *l = 0 then {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(zSucceed,&null,FIN))
- X return
- X }
- X
- X # HUNT DOWN THE SLASH (ALTERNATION OPERATOR)
- X every i := 1 to *l do {
- X if l[i] = Sl & tab_bal(l,Lp,Rp) = i then {
- X if i = 1 then err_out(l,2,char(abs(l[i]))) else {
- X /slash_present := "yes"
- X inter := NextState()
- X inter2:= NextState()
- X MakeFSTN(l[1:i],inter2,FIN)
- X MakeFSTN(l[i+1:0],inter,FIN)
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter2,0))
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X return
- X }
- X }
- X }
- X
- X # HUNT DOWN PARENTHESES
- X if l[1] = Lp then {
- X i := tab_bal(l,Lp,Rp) | err_out(l,2,"(")
- X inter := NextState()
- X if any('*+?',char(abs(0 > l[i+1]))) then {
- X case l[i+1] of {
- X -ord("*") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X MakeFSTN(l[2:i],INI,INI)
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("+") : {
- X inter2 := NextState()
- X /state_table[inter2] := []
- X MakeFSTN(l[2:i],INI,inter2)
- X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
- X MakeFSTN(l[2:i],inter2,inter2)
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("?") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X MakeFSTN(l[2:i],INI,inter)
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X }
- X }
- X else {
- X MakeFSTN(l[2:i],INI,inter)
- X MakeFSTN(l[i+1:0],inter,FIN)
- X return
- X }
- X }
- X else { # I.E. l[1] NOT = Lp (left parenthesis as -ord("("))
- X every i := 1 to *l do {
- X case l[i] of {
- X Lp : {
- X inter := NextState()
- X MakeFSTN(l[1:i],INI,inter)
- X /parends_present := "yes"
- X MakeFSTN(l[i:0],inter,FIN)
- X return
- X }
- X Rp : err_out(l,2,")")
- X }
- X }
- X }
- X
- X # NOW, HUNT DOWN BRACKETS
- X if l[1] = Lb then {
- X i := tab_bal(l,Lb,Rb) | err_out(l,2,"[")
- X inter := NextState()
- X tmp := ""; every tmp ||:= char(l[2 to i-1])
- X if Caret_inside = l[2]
- X then tmp := ~cset(Expand(tmp[2:0]))
- X else tmp := cset(Expand(tmp))
- X if any('*+?',char(abs(0 > l[i+1]))) then {
- X case l[i+1] of {
- X -ord("*") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X put(state_table[INI],o_a_s(any,tmp,INI))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("+") : {
- X inter2 := NextState()
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(any,tmp,inter2))
- X /state_table[inter2] := []
- X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
- X put(state_table[inter2],o_a_s(any,tmp,inter2))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("?") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X put(state_table[INI],o_a_s(any,tmp,inter))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X }
- X }
- X else {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(any,tmp,inter))
- X MakeFSTN(l[i+1:0],inter,FIN)
- X return
- X }
- X }
- X else { # I.E. l[1] not = Lb
- X every i := 1 to *l do {
- X case l[i] of {
- X Lb : {
- X inter := NextState()
- X MakeFSTN(l[1:i],INI,inter)
- X MakeFSTN(l[i:0],inter,FIN)
- X return
- X }
- X Rb : err_out(l,2,"]")
- X }
- X }
- X }
- X
- X # FIND INITIAL SEQUENCES OF POSITIVE INTEGERS, CONCATENATE THEM
- X if i := match_positive_ints(l) then {
- X inter := NextState()
- X tmp := Ints2String(l[1:i])
- X # if a slash has been encountered already, forget optimizing
- X # in this way; if parends are present, too, then forget it,
- X # unless we are at the beginning or end of the input string
- X if INI = 1 | FIN = 2 | /parends_present &
- X /slash_present & *tmp > *biggest_nonmeta_str
- X then biggest_nonmeta_str := tmp
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(match,tmp,inter))
- X MakeFSTN(l[i:0],inter,FIN)
- X return
- X }
- X
- X # OKAY, CLEAN UP ALL THE JUNK THAT'S LEFT
- X i := 0
- X while (i +:= 1) <= *l do {
- X case l[i] of {
- X Dot : { Op := any; Arg := &cset }
- X Dollar : { Op := pos; Arg := 0 }
- X Caret_outside: { Op := pos; Arg := 1 }
- X default : { Op := match; Arg := char(0 < l[i]) }
- X } | err_out(l,2,char(abs(l[i])))
- X inter := NextState()
- X if any('*+?',char(abs(0 > l[i+1]))) then {
- X case l[i+1] of {
- X -ord("*") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X put(state_table[INI],o_a_s(Op,Arg,INI))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("+") : {
- X inter2 := NextState()
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(Op,Arg,inter2))
- X /state_table[inter2] := []
- X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
- X put(state_table[inter2],o_a_s(Op,Arg,inter2))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X -ord("?") : {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
- X put(state_table[INI],o_a_s(Op,Arg,inter))
- X MakeFSTN(l[i+2:0],inter,FIN)
- X return
- X }
- X }
- X }
- X else {
- X /state_table[INI] := []
- X put(state_table[INI],o_a_s(Op,Arg,inter))
- X MakeFSTN(l[i+1:0],inter,FIN)
- X return
- X }
- X }
- X
- X # WE SHOULD NOW BE DONE INSERTING EVERYTHING INTO state_table
- X # IF WE GET TO HERE, WE'VE PARSED INCORRECTLY!
- X err_out(l,4)
- X
- Xend
- X
- X
- X
- Xprocedure NextState(new)
- X static nextstate
- X if \new then nextstate := 1
- X else nextstate +:= 1
- X return nextstate
- Xend
- X
- X
- X
- Xprocedure err_out(x,i,elem)
- X writes(&errout,"Error number ",i," parsing ",image(x)," at ")
- X if \elem
- X then write(&errout,image(elem),".")
- X else write(&errout,"(?).")
- X exit(i)
- Xend
- X
- X
- X
- Xprocedure zSucceed()
- X return .&pos
- Xend
- X
- X
- X
- Xprocedure Expand(s)
- X
- X local s2, c1, c2
- X
- X s2 := ""
- X s ? {
- X s2 ||:= ="^"
- X s2 ||:= ="-"
- X while s2 ||:= tab(find("-")-1) do {
- X if (c1 := move(1), ="-",
- X c2 := move(1),
- X c1 << c2)
- X then every s2 ||:= char(ord(c1) to ord(c2))
- X else s2 ||:= 1(move(2), not(pos(0))) | err_out(s,2,"-")
- X }
- X s2 ||:= tab(0)
- X }
- X return s2
- X
- Xend
- X
- X
- X
- Xprocedure tab_bal(l,i1,i2)
- X
- X local i, i1_count, i2_count
- X
- X i := 0
- X i1_count := 0; i2_count := 0
- X while (i +:= 1) <= *l do {
- X case l[i] of {
- X i1 : i1_count +:= 1
- X i2 : i2_count +:= 1
- X }
- X if i1_count = i2_count
- X then suspend i
- X }
- X
- Xend
- X
- X
- Xprocedure match_positive_ints(l)
- X
- X # Matches the longest sequence of positive integers in l,
- X # beginning at l[1], which neither contains, nor is fol-
- X # lowed by a negative integer. Returns the first position
- X # after the match. Hence, given [55, 55, 55, -42, 55],
- X # match_positive_ints will return 3. [55, -42] will cause
- X # it to fail rather than return 1 (NOTE WELL!).
- X
- X local i
- X
- X every i := 1 to *l do {
- X if l[i] < 0
- X then return (3 < i) - 1 | fail
- X }
- X return *l + 1
- X
- Xend
- X
- X
- Xprocedure Ints2String(l)
- X
- X local tmp
- X
- X tmp := ""
- X every tmp ||:= char(!l)
- X return tmp
- X
- Xend
- X
- X
- Xprocedure StripChar(s,s2)
- X
- X local tmp
- X
- X if find(s2,s) then {
- X tmp := ""
- X s ? {
- X while tmp ||:= tab(find("s2"))
- X do tab(many(cset(s2)))
- X tmp ||:= tab(0)
- X }
- X }
- X return \tmp | s
- X
- Xend
- SHAR_EOF
- echo 'File findre.icn is complete' &&
- true || echo 'restore of findre.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= kjv2rtv.icn ==============
- if test -f 'kjv2rtv.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping kjv2rtv.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting kjv2rtv.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'kjv2rtv.icn' &&
- X############################################################################
- X#
- X# Name: kjv2rtv.icn
- X#
- X# Title: kjv2rtv (KJV -> retrieve format converter)
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.5
- X#
- X############################################################################
- X#
- X# Program for converting PD KJV biblical texts into retrieve format.
- X# Reads standard input. Writes reformatted text to standard output.
- X# Assumes the specific PC-SIG KJV format for input files. If you
- X# have a KJV text that has been "tampered" with, this program may not
- X# work correctly. And then again....
- X#
- X############################################################################
- X#
- X# Links: complete.icn ./convertr.icn ./name2num.icn
- X#
- X############################################################################
- X
- X
- Xprocedure main()
- X
- X local line, bitmap, verse
- X
- X # While you can read lines from stdin...
- X while line := read() do {
- X
- X # ...scan them for book ch:vs references, and output these in
- X # retrieve format, along with corresponding text.
- X line ? {
- X
- X # Housekeeping.
- X pos(0) & next # skip past empty lines
- X ="\x1F" # tab past ASCII 31 (if present)
- X tab(many('\t ')) # tab past whitespace (if present)
- X
- X # If the next line begins with a book ch:vs reference,
- X # then write out the text of the preceding verse (if in
- X # fact there *was* a preceding verse). Finally, write out
- X # the new book ch:vs reference (in retrieve format).
- X if bitmap := convertr(tab(find(" "))) then {
- X write(REplace("" ~== trim(\verse, '\t \x0D'), " ", " "))
- X write("::", bitmap)
- X tab(many(' \t'))
- X verse := trim(tab(0), '\t \x0D')
- X } else {
- X # Dump the (rest of) the line onto verse.
- X verse ||:= " " || ("" ~== trim(tab(0), '\t \x0D'))
- X }
- X }
- X }
- X # Flush the "verse" buffer.
- X write(REplace("" ~== trim(\verse, '\t \x0D'), " ", " "))
- X
- X exit(0)
- X
- Xend
- X
- X
- X#
- X# From strings.icn in the IPL (written by Ralph Griswold).
- X#
- Xprocedure REplace(s1,s2,s3)
- X
- X local result, i
- X result := ""
- X i := *s2
- X
- X s1 ? {
- X while result ||:= tab(find(s2)) do {
- X result ||:= s3
- X move(i)
- X }
- X return result || tab(0)
- X }
- X
- Xend
- SHAR_EOF
- true || echo 'restore of kjv2rtv.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= convertr.icn ==============
- if test -f 'convertr.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping convertr.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting convertr.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'convertr.icn' &&
- X############################################################################
- X#
- X# Name: convertr.icn
- X#
- X# Title: convert KJV book chap:verse reference to a
- X# writable bitmap suitable for a retrieve text-base
- X# file
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.3
- X#
- X############################################################################
- X#
- X# Links: complete.icn, ./name2num.icn
- X#
- X############################################################################
- X
- Xprocedure convertr(s)
- X
- X local bitmap, bookname, book_numeric, len, no
- X
- X no := 2
- X len := 8
- X bookname := ""
- X bitmap := 0
- X
- X s ? {
- X
- X # Find book name, convert it to an integer.
- X bookname ||:= tab(any('1234')); tab(many(' '))
- X bookname ||:= tab(many(&letters++&digits)) | fail
- X book_numeric := name2num(bookname) | fail
- X bitmap := book_numeric || ":"
- X
- X # Get book and verse fields. Tack them onto bitmap.
- X while tab(upto(&digits)) do {
- X no -:= 1
- X # If no goes below 0 then we have too many fields for the
- X # file named in arg 2.
- X bitmap ||:= tab(many(&digits)) || ":"
- X }
- X no ~= 0 & stop("convertr: impossible reference ",image(&subject))
- X }
- X
- X # If the current no is not 0, then we have either too
- X # many or too few fields.
- X no = 0 | fail
- X
- X return trim(bitmap, ':')
- X
- Xend
- SHAR_EOF
- true || echo 'restore of convertr.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= makeind.icn ==============
- if test -f 'makeind.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping makeind.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting makeind.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'makeind.icn' &&
- X############################################################################
- X#
- X# Name: makeind.icn
- X#
- X# Title: makeind.icn
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.24
- X#
- X############################################################################
- X#
- X# This file, makeind.icn, compiles into an indexing program which
- X# creates a series of files offering the user rapid access to
- X# individual elements (usually words) within a text file. Access is
- X# gained through a set of basic retrieval utilities contained in the
- X# file retrieve.icn, bmp2text.icn, retrops.icn, and others included
- X# with this package. In order to be indexable, files must interleave
- X# string coded bitfield-style designators with text in the following
- X# manner:
- X#
- X# ::001:001:001
- X# This is text.
- X# ::001:001:002
- X# This is more text.
- X#
- X# The lines beginning with :: (a double colon) mark bitfield-style
- X# location-designators. Location designators are strings with digit
- X# fields of fixed number and length separated either by nothing (as
- X# in, say 001001002), or better yet by non-digits (e.g. 001:001:002).
- X# NOTE WELL: The bitmaps must come in ascending order. For example,
- X# if we assume three-field bitmaps, 002:001:014 would come before
- X# 003:001:013. If your file is not sorted properly, then use the
- X# utility, sorttxt provided as a part of this distribution.
- X#
- X# usage: makeind -f filename -m int -n int [-l int] [-s]
- X#
- X# When calling makeind, you must specify the filename to be indexed
- X# (-f filename), the maximum field value (-m max-value; e.g. if
- X# fields can go from 0 to 255, then -m 255 would be used), and the
- X# number of fields (-n field-number). The -s switch directs makeind
- X# to create a case-sensitive index. The default is case-insensitive.
- X# -l [int] tells makeind to create a .LIM file, which is only needed
- X# if you want to retrieve text by location marker, and not just via
- X# the index (for this, you'll need something to translate human-
- X# readable references into retrieve's native format).
- X#
- X# BUGS: This indexing routine is going to eat up a _tremendous_
- X# amount of memory when used on large files, since every token in the
- X# input file gets its own entry in wordtbl, and each entry gets a set
- X# as its corresponding key. If you don't have the memory, then you
- X# could use strings instead of sets (the insert routines will be just
- X# a tiny bit more complicated). Intermediate files could also be
- X# used. Drop me a line if you want help. Otherwise, make sure you
- X# have at *least* two megabytes core for every megabyte of text in
- X# the file you wish to index (or else a very, very good virtual
- X# memory management system).
- X#
- X# NOTE: The -S [field-sep] option is currently disabled because using
- X# it slows things down drastically. If you want to be able to
- X# specify what separator to use when breaking files down into
- X# individual words, consult ./gettokens.icn.
- X#
- X############################################################################
- X#
- X# Links: options.icn, codeobj.icn, ./indexutl.icn ./gettokens.icn
- X#
- X# See also: retrieve.icn, bmp2text.icn, expandrf.icn
- X#
- X############################################################################
- X
- X# IPL files to be linked in at compile time.
- Xlink options, codeobj
- X
- X# Global variable (for OS-dependencies).
- X# global IS # declared in indexutl.icn
- X
- X# Is is a record containing vital information on an indexed file, such
- X# as the field separator, the string-length of fields, etc. I've re-
- X# moved the record declaration from this file, and placed it in index-
- X# utl.icn.
- X# record is(FS, s_len, len, no, is_case_sensitive, r_field)
- X
- X#
- X# Main procedure.
- X#
- Xprocedure main(a)
- X
- X local usage, opt_table, fname, rollover_field, index_fname,
- X bitmap_fname, upto_field, bofname, bitmap_offset_table,
- X out_IS, limits_fname
- X # global IS # IS contains stats for file being indexed
- X
- X #
- X # Initialize global OS-related parameters, such as the directory
- X # separator (_slash) and the maximum permissible filename length
- X # minus four (to make room for extensions makeind tacks on).
- X #
- X initialize_os_params()
- X
- X #
- X # Read in and check command argument list. Insert FS and no
- X # parameters into (global) record IS. Calculate s_len, len, and
- X # bitmap_length parameters as well. Returns table of options
- SHAR_EOF
- true || echo 'restore of makeind.icn failed'
- fi
- echo 'End of part 8'
- echo 'File makeind.icn is continued in part 9'
- echo 9 > _shar_seq_.tmp
- exit 0
- --
-
- -Richard L. Goerwitz goer%sophist@uchicago.bitnet
- goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer
-