home *** CD-ROM | disk | FTP | other *** search
Java Source | 1998-03-20 | 13.8 KB | 269 lines |
- /*
- * @(#)CharacterBreakData.java 1.9 98/03/18
- *
- * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
- * (C) Copyright IBM Corp. 1996 - All Rights Reserved
- *
- * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
- *
- * The original version of this source code and documentation is copyrighted
- * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
- * materials are provided under terms of a License Agreement between Taligent
- * and Sun. This technology is protected by multiple US and International
- * patents. This notice and attribution to Taligent may not be removed.
- * Taligent is a registered trademark of Taligent, Inc.
- *
- * Permission to use, copy, modify, and distribute this software
- * and its documentation for NON-COMMERCIAL purposes and without
- * fee is hereby granted provided that this copyright notice
- * appears in all copies. Please refer to the file "copyright.html"
- * for further important copyright and licensing information.
- *
- * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
- * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
- * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
- * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
- * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
- *
- */
-
- package java.text;
-
- /**
- * The CharacterBreakData contains data used by SimpleTextBoundary
- * to determine character breaks.
- * @see #BreakIterator
- */
- final class CharacterBreakData extends TextBoundaryData
- {
- private static final byte accent_diacritic = 0;
- private static final byte baseForm = 1;
- private static final byte baseCR = 2;
- private static final byte baseLF = 3;
- private static final byte choseong = 4; // Korean initial consonant
- private static final byte jungseong = 5; // Korean vowel
- private static final byte jongseong = 6; // Korean final consonant
- private static final byte EOS = 7;
- private static final int COL_COUNT = 8;
-
- private static final byte SI = (byte)0x80;
- private static final byte STOP = (byte) 0;
- private static final byte SI_STOP = (byte)SI + STOP;
-
- private static final byte kCharacterForwardData[] =
- {
- // acct base cr lf
- // cho jung jong EOS
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
-
- // 1
- (byte)(SI+2), (byte)(SI+2), (byte)(SI+3), (byte)(SI+7),
- (byte)(SI+4), (byte)(SI+5), (byte)(SI+6), SI_STOP,
-
- // 2
- (byte)(SI+2), SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
-
- // 3
- SI_STOP, SI_STOP, SI_STOP, (byte)(SI+7),
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
-
- // 4
- (byte)(SI+2), SI_STOP, SI_STOP, SI_STOP,
- (byte)(SI+4), (byte)(SI+5), (byte)(SI+6), SI_STOP,
-
- // 5
- (byte)(SI+2), SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, (byte)(SI+5), (byte)(SI+6), SI_STOP,
-
- // 6
- (byte)(SI+2), SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, (byte)(SI+6), SI_STOP,
-
- // 7
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP
- };
- private static final WordBreakTable kCharacterForwardTable =
- new WordBreakTable(COL_COUNT, kCharacterForwardData);
- private static final byte kCharacterBackwardData[] =
- {
- // acct base cr lf
- // cho jung jong EOS
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
-
- // 1
- (byte)(SI+1), SI_STOP, SI_STOP, (byte)(SI+1),
- SI_STOP, (byte)(SI+1), (byte)(SI+1), SI_STOP
- };
-
- private static final WordBreakTable kCharacterBackwardTable =
- new WordBreakTable(COL_COUNT, kCharacterBackwardData);
- private static final int kRawMapping[] =
- {
- baseForm, //UNASSIGNED = 0,
- baseForm, //UPPERCASE_LETTER = 1,
- baseForm, //LOWERCASE_LETTER = 2,
- baseForm, //TITLECASE_LETTER = 3,
- baseForm, //MODIFIER_LETTER = 4,
- baseForm, //OTHER_LETTER = 5,
- accent_diacritic, //NON_SPACING_MARK = 6,
- accent_diacritic, //ENCLOSING_MARK = 7,
- baseForm, //COMBINING_SPACING_MARK = 8,
- baseForm, //DECIMAL_DIGIT_NUMBER = 9,
- baseForm, //LETTER_NUMBER = 10,
- baseForm, //OTHER_NUMBER = 11,
- baseForm, //SPACE_SEPARATOR = 12,
- baseForm, //LINE_SEPARATOR = 13,
- baseForm, //PARAGRAPH_SEPARATOR = 14,
- baseForm, //CONTROL = 15,
- baseForm, //FORMAT = 16,
- baseForm, //???? = 17,
- baseForm, //PRIVATE_USE = 18,
- baseForm, //SURROGATE = 19,
- baseForm, //DASH_PUNCTUATION = 20,
- baseForm, //START_PUNCTUATION = 21,
- baseForm, //END_PUNCTUATION = 22,
- baseForm, //CONNECTOR_PUNCTUATION = 23,
- baseForm, //OTHER_PUNCTUATION = 24,
- baseForm, //MATH_SYMBOL = 25,
- baseForm, //CURRENCY_SYMBOL = 26,
- baseForm, //MODIFIER_SYMBOL = 27,
- baseForm, //OTHER_SYMBOL = 28;
- };
-
- private static final SpecialMapping kExceptionChar[] = //{};
- {
- new SpecialMapping(ASCII_LINEFEED, baseLF),
- new SpecialMapping(ASCII_CARRIAGE_RETURN, baseCR),
- new SpecialMapping(HANGUL_CHOSEONG_LOW, HANGUL_CHOSEONG_HIGH, choseong),
- new SpecialMapping(HANGUL_JUNGSEONG_LOW, HANGUL_JUNGSEONG_HIGH, jungseong),
- new SpecialMapping(HANGUL_JONGSEONG_LOW, HANGUL_JONGSEONG_HIGH, jongseong),
- new SpecialMapping(PUNCTUATION_LINE_SEPARATOR, PUNCTUATION_PARAGRAPH_SEPARATOR, baseLF),
- new SpecialMapping(END_OF_STRING, EOS)
- };
-
- private static final boolean CharacterExceptionFlags[] = {
- false, // kNonCharacter = 0,
- false, // kUppercaseLetter = 1,
- false, // kLowercaseLetter = 2,
- false, // kTitlecaseLetter = 3,
- false, // kModifierLetter = 4,
- true, // kOtherLetter = 5,
- false, // kNonSpacingMark = 6,
- false, // kEnclosingMark = 7,
- false, // kCombiningSpacingMark = 8,
- false, // kDecimalNumber = 9,
- false, // kLetterNumber = 10,
- false, // kOtherNumber = 11,
- false, // kSpaceSeparator = 12,
- true, // kLineSeparator = 13,
- true, // kParagraphSeparator = 14,
- true, // kControlCharacter = 15,
- false, // kFormatCharacter = 16,
- false, // UNDEFINED = 17,
- false, // kPrivateUseCharacter = 18,
- false, // kSurrogate = 19,
- false, // kDashPunctuation = 20,
- false, // kOpenPunctuation = 21,
- false, // kClosePunctuation = 22,
- false, // kConnectorPunctuation = 23,
- false, // kOtherPunctuation = 24,
- false, // kMathSymbol = 25,
- false, // kCurrencySymbol = 26,
- false, // kModifierSymbol = 27,
- false // kOtherSymbol = 28
- };
-
- private static final int kCharacterAsciiValues[] = {
- // null soh stx etx eot enq ask bell
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // bs ht lf vt ff cr so si
- baseForm, baseForm, baseLF, baseForm, baseForm, baseCR, baseForm, baseForm,
- // dle dc1 dc2 dc3 dc4 nak syn etb
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // can em sub esc fs gs rs us
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // sp ! " # $ % & '
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ( ) * + , - . /
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // 0 1 2 3 4 5 6 7
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // 8 9 : ; < = > ?
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // @ A B C D E F G
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // H I J K L M N O
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // P Q R S T U V W
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // X Y Z [ \ ] ^ _
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ` a b c d e f g
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // h i j k l m n o
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // p q r s t u v w
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // x y z { | } ~ del
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // nbsp í ó ú ñ Ñ ª
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ¿ ⌐ ¬ ½ ¼ ¡ « »
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ░ ▒ ▓ │ ┤ ╡ ╢ ╖
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ╕ ╣ ║ ╗ ╝ ╜ ╛ ┐
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // └ ┴ ┬ ├ ─ ┼ ╞ ╟
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ╚ ╔ ╩ ╦ ╠ ═ ╬ ╧
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ╨ ╤ ╥ ╙ ╘ ╒ ╓ ╫
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ╪ ┘ ┌ █ ▄ ▌ ▐ ▀
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // α ß Γ π Σ σ µ τ
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // Φ Θ Ω δ ∞ φ ε ∩
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
- // ° ∙ · √ ⁿ ² ■
- baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm
- };
-
- private static final UnicodeClassMapping kCharacterMap
- = new UnicodeClassMapping(kRawMapping, kExceptionChar, CharacterExceptionFlags,
- kCharacterAsciiValues);
-
-
- public WordBreakTable forward()
- {
- return kCharacterForwardTable;
- }
-
- public WordBreakTable backward()
- {
- return kCharacterBackwardTable;
- }
-
- public UnicodeClassMapping map()
- {
- return kCharacterMap;
- }
- }
-
-