home *** CD-ROM | disk | FTP | other *** search
- : Use /bin/sh
- #
- # $Id: tryaffix.X,v 1.6 1994/01/25 07:12:18 geoff Exp $
- #
- # Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- #
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- # 3. All modifications to the source code must be clearly marked as
- # such. Binary redistributions based on modified source code
- # must be clearly marked as modified versions in the documentation
- # and/or other materials provided with the distribution.
- # 4. All advertising materials mentioning features or use of this software
- # must display the following acknowledgment:
- # This product includes software developed by Geoff Kuenning and
- # other unpaid contributors.
- # 5. The name of Geoff Kuenning may not be used to endorse or promote
- # products derived from this software without specific prior
- # written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
- # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- # ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
- # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- # SUCH DAMAGE.
- #
- # Try out affixes to see if they produce valid roots
- #
- # Usage:
- #
- # tryaffix [-p | -s] [-c] dict-file affix[+addition] ...
- #
- # The -p and -s flags specify whether prefixes or suffixes
- # are being tried; if neither is specified, suffixes are assumed.
- #
- # If the -c flag is given, statistics on the various affixes are given:
- # a count of words it potentially applies to, and an estimate of the
- # number of dictionary bytes the flag would save. The estimate will
- # be high if the flag generates words that are currently generated
- # by other flags.
- #
- # The dictionary file, dict-file, must already be expanded and sorted,
- # and things will work best if uppercase has been folded to lower with
- # 'tr'.
- #
- # The "affixes" are things to be stripped from the dictionary
- # file to produce trial roots: for English, "con" and "ing"
- # are examples. The "additions" are letters that would have
- # been stripped off the root before adding the affix. For
- # example, the affix "ing" strips "e" for words ending in "e"
- # (as in "like --> liking") so we might run:
- #
- # tryaffix ing ing+e
- #
- # to cover both cases.
- #
- # $Log: tryaffix.X,v $
- # Revision 1.6 1994/01/25 07:12:18 geoff
- # Get rid of all old RCS log lines in preparation for the 3.1 release.
- #
- #
- SORTTMP="-T ${TMPDIR-/usr/tmp}" # !!SORTTMP!!
- USAGE='tryaffix [-p | -s] [-c] dict-file affix[+addition] ...'
- counts=no
- pre=
- suf='$'
- while :
- do
- case "$1" in
- -p)
- pre='^'
- suf=
- ;;
- -s)
- pre=
- suf='$'
- ;;
- -c)
- counts=yes
- ;;
- -*)
- echo "$USAGE" 1>&2
- exit 1
- ;;
- *)
- break
- ;;
- esac
- shift
- done
- dict="$1"
- shift
- if [ ! -r "$dict" ]
- then
- echo "Can't read $dict" 1>&2
- echo "$USAGE" 1>&2
- exit 1
- elif [ $# -eq 0 ]
- then
- echo "$USAGE" 1>&2
- exit 1
- fi
- while [ $# -ne 0 ]
- do
- case "$1" in
- *+*)
- affix=`expr "$1" : '\(.*\)+'`
- addition=`expr "$1" : '.*+\(.*\)'`
- sedscript="s/$pre$affix$suf/$addition/p"
- ;;
- *)
- sedscript="s/$pre$1$suf//p"
- ;;
- esac
- if [ "$counts" = no ]
- then
- echo ===== "$1" =====
- sed -n "$sedscript" "$dict" | comm -12 - "$dict"
- else
- echo "$1" `sed -n "$sedscript" "$dict" | comm -12 - "$dict" | wc -lc`
- fi
- shift
- done
-