home *** CD-ROM | disk | FTP | other *** search
- ###########################################################################
- #
- # File: dif.icn
- #
- # Subject: Procedure to check for differences
- #
- # Author: Robert J. Alexander
- #
- # Date: May 15, 1990
- #
- ###########################################################################
- #
- # The procedure dif() is a generator that produces a sequence of
- # differences between an arbitrary number of input streams. Each result
- # is returned as a list of diff_recs, one for each input stream, with
- # each diff_rec containing a list of items that differ and their position
- # in the input stream. The diff_rec type is declared as:
- #
- # record diff_rec(pos,diffs)
- #
- # Dif fails if there are no differences, i.e. it produces an empty
- # result sequence.
- #
- # For example, if two input streams are:
- #
- # a b c d e f g h
- # a b d e f i j
- #
- # the output sequence would be:
- #
- # [diff_rec(3,[c]),diff_rec(3,[])]
- # [diff_rec(7,[g,h]),diff_rec(6,[i,j])
- #
- # The arguments to dif(stream,compare,eof,group) are:
- #
- # stream A list of data objects that represent input streams
- # from which dif will extract its input "records".
- # The elements can be of several different types which
- # result in different actions, as follows:
- #
- # Type Action
- # =========== =============================
- # file file is "read" to get records
- #
- # co-expression co-expression is activated to
- # get records
- #
- # list records are "gotten" (get()) from
- # the list
- #
- # diff_proc a record type defined in "dif" to
- # allow a procedure (or procedures)
- # suppled by dif's caller to be called
- # to get records. Diff_proc has two
- # fields, the procedure to call and the
- # argument to call it with. Its
- # definition looks like this:
- #
- # record diff_proc(proc,arg)
- #
- #
- # Optional arguments:
- #
- # compare Item comparison procedure -- succeeds if
- # "equal", otherwise fails (default is the
- # identity "===" comparison). The comparison
- # must allow for the fact that the eof object
- # (see next) might be an argument, and a pair of
- # eofs must compare equal.
- #
- # eof An object that is distinguishable from other
- # objects in the stream. Default is &null.
- #
- # group A procedure that is called with the current number
- # of unmatched items as its argument. It must
- # return the number of matching items required
- # for file synchronization to occur. Default is
- # the formula Trunc((2.0 * Log(M)) + 2.0) where
- # M is the number of unmatched items.
- #
- ############################################################################
-
-
- record diff_rec(pos,diffs)
- record diff_proc(proc,arg)
- record diff_file(stream,queue)
-
-
- procedure dif(stream,compare,eof,group)
- local f,linenbr,line,difflist,gf,i,j,k,l,m,n,x,test,
- result,synclist,nsyncs,syncpoint
- #
- # Provide default arguments and initialize data.
- #
- /compare := proc("===",2)
- /group := groupfactor
- f := []
- every put(f,diff_file(!stream,[]))
- linenbr := list(*stream,0)
- line := list(*stream)
- test := list(*stream)
- difflist := list(*stream)
- every !difflist := []
- #
- # Loop to process all records of all input streams.
- #
- repeat {
- #
- # This is the "idle loop" where we spin until we find a discrepancy
- # among the data streams. A line is read from each stream, with a
- # check for eof on all streams. Then the line from the first
- # stream is compared to the lines from all the others.
- #
- repeat {
- every i := 1 to *stream do
- line[i] := diffread(f[i]) | eof
- if not (every x := !line do
- (x === eof) | break) then break break
- every !linenbr +:= 1
- if (every x := !line[2:0] do
- compare(x,line[1]) | break) then break
- }
- #
- # Aha! We have found a difference. Create a difference list,
- # one entry per stream, primed with the differing line we just found.
- #
- every i := 1 to *stream do
- difflist[i] := [line[i]]
- repeat {
- #
- # Add a new input line from each stream to the difference list.
- # Then build lists of the subset of different lines we need to
- # actually compare.
- #
- every i := 1 to *stream do
- put(difflist[i],diffread(f[i]) | eof)
- gf := group(*difflist[1])
- every i := 1 to *stream do
- test[i] := difflist[i][-gf:0]
- #
- # Create a "synchronization matrix", with a row and column for
- # each input stream. The entries will be initially &null, then
- # will be set to the synchronization position if sync is
- # achieved between the two streams. Another list is created to
- # keep track of how many syncs have been achieved for each stream.
- #
- j := *difflist[1] - gf + 1
- synclist := list(*stream)
- every !synclist := list(*stream)
- every k := 1 to *stream do
- synclist[k][k] := j
- nsyncs := list(*stream,1)
- #
- # Loop through positions to start comparing lines. This set of
- # nested loops will be exited when a stream achieves sync with
- # all other streams.
- #
- every i := 1 to j do {
- #
- # Loop through all streams.
- #
- every k := 1 to *stream do {
- #
- # Loop through all streams.
- #
- every l := 1 to *stream do {
- if /synclist[k][l] then { # avoid unnecessary comparisons
- #
- # Compare items of the test list to the differences list
- # at all possible positions. If they compare, store the
- # current position in the sync matrix and bump the count
- # of streams sync'd to this stream. If all streams are in
- # sync, exit all loops but the outer one.
- #
- m := i - 1
- if not every n := 1 to gf do {
- if not compare(test[k][n],difflist[l][m +:= 1]) then break
- } then {
- synclist[k][l] := i # store current position
- if (nsyncs[k] +:= 1) = *stream then break break break break
- }
- }
- }
- }
- }
- }
- #
- # Prepare an output set. Since we have read the input streams past
- # the point of synchronization, we must queue those lines before their
- # input streams.
- #
- synclist := synclist[k]
- result := list(*stream)
- every i := 1 to *stream do {
- j := synclist[i]
- while difflist[i][j -:= 1] === eof # trim past eof
- result[i] := diff_rec(linenbr[i],difflist[i][1:j + 1])
- f[i].queue := difflist[i][synclist[i] + gf:0] ||| f[i].queue
- linenbr[i] +:= synclist[i] + gf - 2
- difflist[i] := []
- }
- suspend result
- }
- end
-
- #
- # diffread() -- Read a line from an input stream.
- #
- procedure diffread(f)
- local x
- return get(f.queue) | case type(x := f.stream) of {
- "file" | "window": read(x)
- "co-expression": @x
- "diff_proc": x.proc(x.arg)
- "list": get(x)
- }
- end
-
- #
- # groupfactor() -- Determine how many like lines we need to close
- # off a group of differences. This is the default routine -- the
- # caller may provide his own.
- #
- procedure groupfactor(m) # Compute: Trunc((2.0 * Log(m)) + 2.0)
- m := string(m)
- return 2 * *m + if m <<= "316227766"[1+:*m] then 0 else 1
- end
-
-