home *** CD-ROM | disk | FTP | other *** search
- -- Count frequencies of words in standard input.
- -- Uses a binary tree to speed lookups and produce
- -- an alphabetic listing.
-
- -- usage: ex freq < file1 > file2
-
- constant EOF = -1
- constant TRUE = 1
-
- constant STANDARD_IN = 0,
- STANDARD_OUT = 1
-
- constant NAME = 1,
- COUNT = 2,
- LEFT = 3,
- RIGHT = 4
-
- without type_check
-
- type subtree(sequence t)
- -- A detailed recursive type check.
- -- If you choose to always run with type checking
- -- you might prefer a simpler, faster check.
- -- e.g. replace "subtree" by "sequence" below
- if length(t) = 0 then
- return TRUE
- else
- return length(t) = 4 and
- sequence(t[NAME]) and
- integer(t[COUNT]) and
- subtree(t[LEFT]) and
- subtree(t[RIGHT])
- end if
- end type
-
- function next_word()
- -- read standard input to get next alphabetic string
- integer c
- sequence word
-
- word = ""
- while TRUE do
- c = getc(STANDARD_IN)
- if (c >= 'A' and c <= 'Z') or
- (c >= 'a' and c <= 'z') then
- word = word & c
- elsif length(word) > 0 then
- return word
- elsif c = EOF then
- return 0
- end if
- end while
- end function
-
- function insert(subtree node, sequence word)
- -- Insert new word into tree or increment existing word count by 1.
- -- Return modified node.
- -- This is quite fast since internally Euphoria is copying
- -- pointers to subtrees, *not* all the data as it might appear.
- integer x
-
- if length(node) = 0 then
- -- couldn't find it - create new node
- node = {word, 1, {}, {}}
- else
- x = compare(word, node[NAME])
- if x = 0 then
- node[COUNT] = node[COUNT] + 1 -- found it, increment count
- elsif x < 0 then
- node[LEFT] = insert(node[LEFT], word) -- insert it on the left
- else
- node[RIGHT] = insert(node[RIGHT], word)-- insert it on the right
- end if
- end if
- return node
- end function
-
- procedure printTree(subtree node)
- -- recursively print tree information
- -- in alphabetical order
- if length(node) = 0 then
- return
- end if
- printTree(node[LEFT])
- printf(STANDARD_OUT, "%s:%d\n", {node[NAME], node[COUNT]})
- printTree(node[RIGHT])
- end procedure
-
- -- root of the whole tree
- subtree root
- root = {}
-
- procedure word_count()
- -- build a binary tree containing words in standard input
- object word
-
- while TRUE do
- word = next_word()
- if atom(word) then
- exit
- end if
- root = insert(root, word)
- end while
- printTree(root)
- end procedure
-
- word_count()
-
-