home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources
- From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
- Subject: kjv browser, part 7 of 11
- Message-ID: <1991Jul3.065159.28268@midway.uchicago.edu>
- Date: Wed, 3 Jul 1991 06:51:59 GMT
-
- ---- Cut Here and feed the following to sh ----
- #!/bin/sh
- # this is bibleref.07 (part 7 of a multipart archive)
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file iscreen.icn continued
- #
- if test ! -r _shar_seq_.tmp; then
- echo 'Please unpack part 1 first!'
- exit 1
- fi
- (read Scheck
- if test "$Scheck" != 7; then
- echo Please unpack part "$Scheck" next!
- exit 1
- else
- exit 0
- fi
- ) < _shar_seq_.tmp || exit 1
- if test ! -f _shar_wnt_.tmp; then
- echo 'x - still skipping iscreen.icn'
- else
- echo 'x - continuing file iscreen.icn'
- sed 's/^X//' << 'SHAR_EOF' >> 'iscreen.icn' &&
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.26
- X#
- X############################################################################
- X#
- X# This and future version of iscreen are placed in the public domain - RLG
- X#
- X############################################################################
- X#
- X# This file contains some rudimentary screen functions for use with
- X# itlib.icn (termlib-like routines for Icon).
- X#
- X# clear() - clears the screen (tries several methods)
- X# emphasize() - initiates emphasized mode
- X# boldface() - initiates bold mode
- X# blink() - initiates blinking mode
- X# normal() - resets to normal mode
- X# message(s) - displays message s on 2nd-to-last line
- X# underline() - initiates underline mode
- X# status_line(s,s2,p) - draws status line s on the 3rd-to-last
- X# screen line; if s is too short for the terminal, s2 is used;
- X# if p is nonnull then it either centers, left-, or right-justi-
- X# fies, depending on the value, "c," "l," or "r."
- X# clear_emphasize() - horrible way of clearing the screen to all-
- X# emphasize mode; necessary for many terminals
- X#
- X############################################################################
- X#
- X# Requires: UNIX
- X#
- X# Links: itlib.icn (or your OS-specific port of itlib)
- X#
- X# See also: boldface.icn
- X#
- X############################################################################
- X
- X
- Xprocedure clear()
- X
- X # Clears the screen. Tries several methods.
- X local i
- X
- X normal()
- X if not iputs(getval("cl"))
- X then iputs(igoto(getval("cm"),1,1) | getval("ho"))
- X if not iputs(getval("cd"))
- X then {
- X every i := 1 to getval("li") do {
- X iputs(igoto(getval("cm"),1,i))
- X iputs(getval("ce"))
- X }
- X iputs(igoto(getval("cm"),1,1))
- X }
- X return
- X
- Xend
- X
- X
- X
- Xprocedure boldface()
- X
- X static bold_str, cookie_str
- X initial {
- X if bold_str := getval("md")
- X then cookie_str := repl(getval("le"|"bc") | "\b", getval("mg"))
- X else {
- X # One global procedure value substituted for another.
- X boldface := emphasize
- X return emphasize()
- X }
- X }
- X
- X normal()
- X iputs(\bold_str)
- X iputs(\cookie_str)
- X return
- X
- Xend
- X
- X
- X
- Xprocedure blink()
- X
- X static blink_str, cookie_str
- X initial {
- X if blink_str := getval("mb")
- X then cookie_str :=
- X repl(getval("le"|"bc") | "\b", getval("mg"))
- X else {
- X # One global procedure value substituted for another.
- X blink := emphasize
- X return emphasize()
- X }
- X }
- X
- X normal()
- X iputs(\blink_str)
- X iputs(\cookie_str)
- X return
- X
- Xend
- X
- X
- X
- Xprocedure emphasize()
- X
- X static emph_str, cookie_str
- X initial {
- X if emph_str := getval("so")
- X then cookie_str := repl(getval("le"|"bc") | "\b", getval("sg"))
- X else {
- X if emph_str := getval("us")
- X then cookie_str := repl(getval("le"|"bc") | "\b", getval("ug"))
- X }
- X }
- X
- X normal()
- X iputs(\emph_str)
- X iputs(\cookie_str)
- X return
- X
- Xend
- X
- X
- X
- Xprocedure underline()
- X
- X static underline_str, cookie_str
- X initial {
- X if underline_str := getval("us")
- X then cookie_str := repl(getval("le"|"bc") | "\b", getval("ug"))
- X }
- X
- X normal()
- X iputs(\underline_str)
- X iputs(\cookie_str)
- X return
- X
- Xend
- X
- X
- X
- Xprocedure normal(mode)
- X
- X static UN_emph_str, emph_cookie_str,
- X UN_underline_str, underline_cookie_str,
- X UN_bold_str, bold_cookie_str
- X
- X initial {
- X
- X # Find out code to turn off emphasize (reverse video) mode.
- X if UN_emph_str := getval("se") then
- X # Figure out how many backspaces we need to erase cookies.
- X emph_cookie_str := repl(getval("le"|"bc") | "\b", getval("sg"))
- X
- X # Finally, figure out how to turn off underline mode.
- X if UN_underline_str := (UN_emph_str ~== getval("ue")) then
- X underline_cookie_str := repl(getval("le"|"bc")|"\b", getval("ug"))
- X
- X # Figure out how to turn off boldface mode.
- X if UN_bold_str :=
- X (UN_underline_str ~== (UN_emph_str ~== getval("me"))) then
- X # Figure out how many backspaces we need to erase cookies.
- X bold_cookie_str := repl(getval("le"|"bc") | "\b", getval("mg"))
- X
- X }
- X
- X iputs(\UN_emph_str) &
- X iputs(\emph_cookie_str)
- X
- X iputs(\UN_underline_str) &
- X iputs(\underline_cookie_str)
- X
- X iputs(\UN_bold_str) &
- X iputs(\bold_cookie_str)
- X
- X return
- X
- Xend
- X
- X
- X
- Xprocedure status_line(s,s2,p)
- X
- X # Writes a status line on the terminal's third-to-last line
- X # The only necessary argument is s. S2 (optional) is used
- X # for extra narrow screens. In other words, by specifying
- X # s2 you give status_line an alternate, shorter status string
- X # to display, in case the terminal isn't wide enough to sup-
- X # port s. If p is nonnull, then the status line is either
- X # centered (if equal to "c"), left justified ("l"), or right
- X # justified ("r").
- X
- X local width
- X
- X /s := ""; /s2 := ""; /p := "c"
- X width := getval("co")
- X if *s > width then {
- X (*s2 < width, s := s2) |
- X er("status_line","Your terminal is too narrow.",4)
- X }
- X
- X case p of {
- X "c" : s := center(s,width)
- X "l" : s := left(s,width)
- X "r" : s := right(s,width)
- X default: stop("status_line: Unknown option "||string(p),4)
- X }
- X
- X iputs(igoto(getval("cm"), 1, getval("li")-2))
- X emphasize(); writes(s)
- X normal()
- X return
- X
- Xend
- X
- X
- X
- Xprocedure message(s)
- X
- X # Display prompt s on the second-to-last line of the screen.
- X # I hate to use the last line, due to all the problems with
- X # automatic scrolling.
- X
- X /s := ""
- X normal()
- X iputs(igoto(getval("cm"), 1, getval("li")))
- X iputs(getval("ce"))
- X normal()
- X iputs(igoto(getval("cm"), 1, getval("li")-1))
- X iputs(getval("ce"))
- X writes(s[1:getval("co")] | s)
- X return
- X
- Xend
- X
- X
- X
- Xprocedure clear_underline()
- X
- X # Horrible way of clearing the screen to all underline mode, but
- X # the only apparent way we can do it "portably" using the termcap
- X # capability database.
- X
- X local i
- X
- X underline()
- X iputs(igoto(getval("cm"),1,1))
- X if getval("am") then {
- X underline()
- X every 1 to (getval("li")-1) * getval("co") do
- X writes(" ")
- X }
- X else {
- X every i := 1 to getval("li")-1 do {
- X iputs(igoto(getval("cm"), 1, i))
- X underline()
- X writes(repl(" ",getval("co")))
- X }
- X }
- X iputs(igoto(getval("cm"),1,1))
- X
- Xend
- X
- X
- X
- Xprocedure clear_emphasize()
- X
- X # Horrible way of clearing the screen to all reverse-video, but
- X # the only apparent way we can do it "portably" using the termcap
- X # capability database.
- X
- X local i
- X
- X emphasize()
- X iputs(igoto(getval("cm"),1,1))
- X if getval("am") then {
- X emphasize()
- X every 1 to (getval("li")-1) * getval("co") do
- X writes(" ")
- X }
- X else {
- X every i := 1 to getval("li")-1 do {
- X iputs(igoto(getval("cm"), 1, i))
- X emphasize()
- X writes(repl(" ",getval("co")))
- X }
- X }
- X iputs(igoto(getval("cm"),1,1))
- X
- Xend
- SHAR_EOF
- echo 'File iscreen.icn is complete' &&
- true || echo 'restore of iscreen.icn failed'
- rm -f _shar_wnt_.tmp
- fi
- # ============= findre.icn ==============
- if test -f 'findre.icn' -a X"$1" != X"-c"; then
- echo 'x - skipping findre.icn (File already exists)'
- rm -f _shar_wnt_.tmp
- else
- > _shar_wnt_.tmp
- echo 'x - extracting findre.icn (Text)'
- sed 's/^X//' << 'SHAR_EOF' > 'findre.icn' &&
- X########################################################################
- X#
- X# Name: findre.icn
- X#
- X# Title: "Find" Regular Expression
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.15
- X#
- X########################################################################
- X#
- X# I place this and any later versions in the public domain - RLG.
- X#
- X########################################################################
- X#
- X# DESCRIPTION: findre() is like the Icon builtin function find(),
- X# except that it takes, as its first argument, a regular expression
- X# pretty much like the ones the Unix egrep command uses (the few
- X# minor differences are listed below). Its syntax is the same as
- X# find's (i.e. findre(s1,s2,i,j)), with the exception that a no-
- X# argument invocation wipes out all static structures utilized by
- X# findre, and then forces a garbage collection.
- X#
- X# (For those not familiar with regular expressions and the Unix egrep
- X# command: findre() offers a simple and compact wildcard-based search
- X# system. If you do a lot of searches through text files, or write
- X# programs which do searches based on user input, then findre is a
- X# utility you might want to look over.)
- X#
- X# IMPORTANT DIFFERENCES between find and findre: As noted above,
- X# findre() is just a find() function that takes a regular expression
- X# as its first argument. One major problem with this setup is that
- X# it leaves the user with no easy way to tab past a matched
- X# substring, as with
- X#
- X# s ? write(tab(find("hello")+5))
- X#
- X# In order to remedy this intrinsic deficiency, findre() sets the
- X# global variable __endpoint to the first position after any given
- X# match occurs. Use this variable with great care, preferably
- X# assigning its value to some other variable immediately after the
- X# match (for example, findre("hello [.?!]*",s) & tmp := __endpoint).
- X# Otherwise, you will certainly run into trouble. (See the example
- X# below for an illustration of how __endpoint is used).
- X#
- X# IMPORTANT DIFFERENCES between egrep and findre: findre utilizes
- X# the same basic language as egrep. The only big difference is that
- X# findre uses intrinsic Icon data structures and escaping conven-
- X# tions rather than those of any particular Unix variant. Be care-
- X# ful! If you put findre("\(hello\)",s) into your source file,
- X# findre will treat it just like findre("(hello)",s). If, however,
- X# you enter '\(hello\)' at run-time (via, say, findre(!&input,s)),
- X# what Icon receives will depend on your operating system (most
- X# likely, a trace will show "\\(hello\\)").
- X#
- X# BUGS: Space has essentially been conserved at the expense of time
- X# in the automata produced by findre(). The algorithm, in other
- X# words, will produce the equivalent of a pushdown automaton under
- X# certain circumstances, rather than strive (at the expense of space)
- X# for full determinism. I tried to make up a nfa -> dfa converter
- X# that would only create that portion of the dfa it needed to accept
- X# or reject a string, but the resulting automaton was actually quite
- X# slow (if anyone can think of a way to do this in Icon, and keep it
- X# small and fast, please let us all know about it). Note that under
- X# version 8 of Icon, findre takes up negligible storage space, due to
- X# the much improved hashing algorithm. I have not tested it under
- X# version 7, but I would expect it to use up quite a bit more space
- X# in that environment.
- X#
- X# IMPORTANT NOTE: Findre takes a shortest-possible-match approach
- X# to regular expressions. In other words, if you look for "a*",
- X# findre will not even bother looking for an "a." It will just match
- X# the empty string. Without this feature, findre would perform a bit
- X# more slowly. The problem with such an approach is that often the
- X# user will want to tab past the longest possible string of matched
- X# characters (say tab((findre("a*|b*"), __endpoint)). In circumstan-
- X# ces like this, please just use something like:
- X#
- X# s ? {
- X# tab(find("a")) & # or use Arb() from the IPL (patterns.icn)
- X# tab(many('a'))
- X# tab(many('b'))
- X# }
- X#
- X# or else use some combination of findre and the above.
- X#
- X########################################################################
- X#
- X# REGULAR EXPRESSION SYNTAX: Regular expression syntax is complex,
- X# and yet simple. It is simple in the sense that most of its power
- X# is concentrated in about a dozen easy-to-learn symbols. It is
- X# complex in the sense that, by combining these symbols with
- X# characters, you can represent very intricate patterns.
- X#
- X# I make no pretense here of offering a full explanation of regular
- X# expressions, their usage, and the deeper nuances of their syntax.
- X# As noted above, this should be gleaned from a Unix manual. For
- X# quick reference, however, I have included a brief summary of all
- X# the special symbols used, accompanied by an explanation of what
- X# they mean, and, in some cases, of how they are used (most of this
- X# is taken from the comments prepended to Jerry Nowlin's Icon-grep
- X# command, as posted a couple of years ago):
- X#
- X# ^ - matches if the following pattern is at the beginning
- X# of a line (i.e. ^# matches lines beginning with "#")
- X# $ - matches if the preceding pattern is at the end of a line
- X# . - matches any single character
- X# + - matches from 1 to any number of occurrences of the
- X# previous expression (i.e. a character, or set of paren-
- X# thesized/bracketed characters)
- X# * - matches from 0 to any number of occurrences of the previous
- X# expression
- X# \ - removes the special meaning of any special characters
- X# recognized by this program (i.e if you want to match lines
- X# beginning with a "[", write ^\[, and not ^[)
- X# | - matches either the pattern before it, or the one after
- X# it (i.e. abc|cde matches either abc or cde)
- X# [] - matches any member of the enclosed character set, or,
- X# if ^ is the first character, any nonmember of the
- X# enclosed character set (i.e. [^ab] matches any character
- X# _except_ a and b).
- X# () - used for grouping (e.g. ^(abc|cde)$ matches lines consist-
- X# ing of either "abc" or "cde," while ^abc|cde$ matches
- X# lines either beginning with "abc" or ending in "cde")
- X#
- X#########################################################################
- X#
- X# EXAMPLE program:
- X#
- X# procedure main(a)
- X# while line := !&input do {
- X# token_list := tokenize_line(line,a[1])
- X# every write(!token_list)
- X# }
- X# end
- X#
- X# procedure tokenize_line(s,sep)
- X# tmp_lst := []
- X# s ? {
- X# while field := tab(findre(sep)|0) &
- X# mark := __endpoint
- X# do {
- X# put(tmp_lst,"" ~== field)
- X# if pos(0) then break
- X# else tab(mark)
- X# }
- X# }
- X# return tmp_lst
- X# end
- X#
- X# The above program would be compiled with findre (e.g. "icont
- X# test_prg.icn findre.icn") to produce a single executable which
- X# tokenizes each line of input based on a user-specified delimiter.
- X# Note how __endpoint is set soon after findre() succeeds. Note
- X# also how empty fields are excluded with "" ~==, etc. Finally, note
- X# that the temporary list, tmp_lst, is not needed. It is included
- X# here merely to illustrate one way in which tokens might be stored.
- X#
- X# Tokenizing is, of course, only one of many uses one might put
- X# findre to. It is very helpful in allowing the user to construct
- X# automata at run-time. If, say, you want to write a program that
- X# searches text files for patterns given by the user, findre would be
- X# a perfect utility to use. Findre in general permits more compact
- X# expression of patterns than one can obtain using intrinsic Icon
- X# scanning facilities. Its near complete compatibility with the Unix
- X# regexp library, moreover, makes for greater ease of porting,
- X# especially in cases where Icon is being used to prototype C code.
- X#
- X#########################################################################
- X
- X
- Xglobal state_table, parends_present, slash_present
- Xglobal biggest_nonmeta_str, __endpoint
- Xrecord o_a_s(op,arg,state)
- X
- X
- Xprocedure findre(re, s, i, j)
- X
- X local p, x, nonmeta_len, tokenized_re, tmp
- X static FSTN_table, STRING_table
- X initial {
- X FSTN_table := table()
- X STRING_table := table()
- X }
- X
- X if /re then {
- X FSTN_table := table()
- X STRING_table := table()
- X collect() # do it *now*
- X return
- X }
- X
- X /s := &subject
- X if \i then {
- X if i < 1 then
- X i := *s + (i+1)
- X }
- X else i := \&pos | 1
- X if \j then {
- X if j < 1 then
- X j := *s + (j+1)
- X }
- X
- X else j := *s+1
- X if /FSTN_table[re] then {
- X # If we haven't seen this re before, then...
- X if \STRING_table[re] then {
- X # ...if it's in the STRING_table, use plain find()
- X every p := find(STRING_table[re],s,i,j)
- X do { __endpoint := p + *STRING_table[re]; suspend p }
- X fail
- X }
- X else {
- X # However, if it's not in the string table, we have to
- X # tokenize it and check for metacharacters. If it has
- X # metas, we create an FSTN, and put that into FSTN_table;
- X # otherwise, we just put it into the STRING_table.
- X tokenized_re := tokenize(re)
- X if 0 > !tokenized_re then {
- X # if at least one element is < 0, re has metas
- X MakeFSTN(tokenized_re) | err_out(re,2)
- X # both biggest_nonmeta_str and state_table are global
- X /FSTN_table[re] := [.biggest_nonmeta_str, copy(state_table)]
- X }
- X else {
- X # re has no metas; put the input string into STRING_table
- X # for future reference, and execute find() at once
- X tmp := ""; every tmp ||:= char(!tokenized_re)
- X insert(STRING_table,re,tmp)
- X every p := find(STRING_table[re],s,i,j)
- X do { __endpoint := p + *STRING_table[re]; suspend p }
- X fail
- X }
- X }
- X }
- X
- X
- X if nonmeta_len := (1 < *FSTN_table[re][1]) then {
- X # If the biggest non-meta string in the original re
- X # was more than 1, then put in a check for it...
- X s[1:j] ? {
- X tab(x := i to j - nonmeta_len) &
- X (find(FSTN_table[re][1]) | fail) \ 1 &
- X (__endpoint := apply_FSTN(&null,FSTN_table[re][2])) &
- X (suspend x)
- X }
- X }
- X else {
- X #...otherwise it's not worth worrying about the biggest nonmeta str
- X s[1:j] ? {
- X tab(x := i to j) &
- X (__endpoint := apply_FSTN(&null,FSTN_table[re][2])) &
- X (suspend x)
- X }
- X }
- X
- Xend
- X
- X
- X
- Xprocedure apply_FSTN(ini,tbl)
- X
- X local biggest_pos, POS, tmp, fin
- X static s_tbl
- X
- X /ini := 1 & s_tbl := tbl & biggest_pos := 1
- X if ini = 0 then {
- X return &pos
- X }
- X POS := &pos
- X fin := 0
- X
- X repeat {
- X if tmp := !s_tbl[ini] &
- X tab(tmp.op(tmp.arg))
- X then {
- X if tmp.state = fin
- X then return &pos
- X else ini := tmp.state
- X }
- X else (&pos := POS, fail)
- X }
- X
- Xend
- X
- X
- X
- Xprocedure tokenize(s)
- X
- X local token_list, chr, tmp, b_loc, next_one, fixed_length_token_list, i
- X
- X token_list := list()
- X s ? {
- X tab(many('*+?|'))
- X while chr := move(1) do {
- X if chr == "\\"
- X # it can't be a metacharacter; remove the \ and "put"
- X # the integer value of the next chr into token_list
- X then put(token_list,ord(move(1))) | err_out(s,2,chr)
- X else if any('*+()|?.$^',chr)
- X then {
- X # Yuck! Egrep compatibility stuff.
- X case chr of {
- X "*" : {
- X tab(many('*+?'))
- X put(token_list,-ord("*"))
- X }
- X "+" : {
- SHAR_EOF
- true || echo 'restore of findre.icn failed'
- fi
- echo 'End of part 7'
- echo 'File findre.icn is continued in part 8'
- echo 8 > _shar_seq_.tmp
- exit 0
- --
-
- -Richard L. Goerwitz goer%sophist@uchicago.bitnet
- goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer
-