home *** CD-ROM | disk | FTP | other *** search
- /*
- IXAttributeReader.h
- Copyright 1991, NeXT Computer, Inc.
- */
-
- #import <objc/Object.h>
- #import <objc/hashtable.h>
-
- @class List;
-
- // This protocol is adopted by subclasses that wish to perform special case
- // folding or lexeme extraction. The IXJapaneseLexer in the Kanji version of
- // the library is a case in point, since the Kanji encodings involve shifting
- // between one and two by character representations.
-
- @protocol IXLexemeExtraction
-
- - (unsigned)foldCase:(char *)string inLength:(unsigned)length;
- - (unsigned)getLexeme:(char *)string inLength:(unsigned)length
- fromStream:(NXStream *)stream;
-
- @end
-
- @interface IXAttributeReader: Object
- {
- NXHashTable *stopWords; // these words removed from output
- const char *punctuation; // characters that delimit words
- unsigned char *charMapping; // table for mapping characters
- struct {
- unsigned caseFolding:1; // fold upper case to lower case
- unsigned pluralFolding:1; // fold plural to singular form
- unsigned stemsReduced:1; // reduce words to their stems
- unsigned tokenUniquing:1; // unique tokens to pack output
- } booleanOptions;
- }
-
- // analyzes a stream, returning Attribute Reader Format.
- - (NXStream *)analyzeStream:(NXStream *)stream;
-
- - (unsigned)foldPlural:(char *)string inLength:(unsigned)length;
- - (unsigned)reduceStem:(char *)string inLength:(unsigned)length;
-
- @end
-
- @interface IXAttributeReader(Configuration)
-
- - (BOOL)isCaseFolded; // true if case folding enabled
- - setCaseFolded:(BOOL)flag; // enables or disables case folding
-
- - (BOOL)arePluralsFolded; // true if plural folding enabled
- - setPluralsFolded:(BOOL)flag; // enables or disables plural folding
-
- - (BOOL)areStemsReduced; // true if stem removal enabled
- - setStemsReduced:(BOOL)flag; // enables or disables stem removal
-
- - (char *)punctuation; // returns currently defined token delimiters
- - setPunctuation:(const char *)string; // sets token delimiters
-
- - (char *)stopWords; // returns newline delimited stop word string
- - setStopWords:(const char *)string; // sets stop words
-
- - readStopWords:(NXStream *)stream; // reads stop words from a stream
- - writeStopWords:(NXStream *)stream; // writes stop words to a stream
-
- - readStopWordsFromFile:(const char *)filename; // reads stop words from a file
- - writeStopWordsToFile:(const char *)filename; // writes stop words to a file
-
- @end
-
- // The following protocol is obselete, and may be not be defined in future
- // releases. The methods are now declared by the classes that implement them.
-
- @protocol IXAttributeReading
-
- - (NXStream *)analyzeStream:(NXStream *)stream;
-
- @end
-
-