Java 1.2 How-To

home *** CD-ROM | disk | FTP | other *** search

/ Java 1.2 How-To / JavaHowTo.iso / 3rdParty / jbuilder / unsupported / JDK1.2beta3 / SOURCE / SRC.ZIP / java / awt / font / BidiInfo.java < prev next >

Wrap

Java Source | 1998-03-20 | 32.0 KB | 1,012 lines

/* * @(#)BidiInfo.java 1.7 98/03/18 * * Copyright 1997, 1998 by Sun Microsystems, Inc., * 901 San Antonio Road, Palo Alto, California, 94303, U.S.A. * All rights reserved. * * This software is the confidential and proprietary information * of Sun Microsystems, Inc. ("Confidential Information"). You * shall not disclose such Confidential Information and shall use * it only in accordance with the terms of the license agreement * you entered into with Sun. */ /* * (C) Copyright Taligent, Inc. 1996 - 1997, All Rights Reserved * (C) Copyright IBM Corp. 1996 - 1998, All Rights Reserved * * The original version of this source code and documentation is * copyrighted and owned by Taligent, Inc., a wholly-owned subsidiary * of IBM. These materials are provided under terms of a License * Agreement between Taligent and Sun. This technology is protected * by multiple US and International patents. * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. * */ package java.awt.font; // !!! TODO to do too... // 1) arabic characters before segment separators affect interpretation of numbers after them. // so I need to handle segment separators, or pass in extra information // 2) return information on embeddings and overrides independent of resolved levels for clients // to use, i.e. cursor placement. // 3) support all bidi classes and overriding of them via leveldirs (except bs?) if warranted. // probably it is not, and I can have leveldirs only carry embedding/override info. Since // we go to the text to see if a character is arabic, the user can't completely control the // result anyway. Alternatively, given this additional information in leveldirs I could // dispense with. class BidiInfo extends Object { /* Should these all be public so clients can construct a full dir array, or only L and R? */ public static final byte L = 0; /* left to right (strong) */ public static final byte R = 1; /* right to left (strong) */ public static final byte EN = 2; /* european number (weak) */ public static final byte ES = 3; /* european number separator (weak) */ public static final byte ET = 4; /* european number terminator (weak) */ public static final byte AN = 5; /* arabic number (weak) */ public static final byte CS = 6; /* common number separator (weak) */ public static final byte N = 7; /* other neutrals, block separator, segment separator, whitespace */ private static final byte X = 8; // internal code for ignored explicit codes public static final int BASEDIRECTION_DEFAULT = -1; /* These belong in some unicode char naming class. */ private static final char LRE = 0x202A; /* left to right embedding */ private static final char RLE = 0x202B; /* right to left embedding */ private static final char PDF = 0x202C; /* pop directional formatting */ private static final char LRO = 0x202D; /* left to right override */ private static final char RLO = 0x202E; /* right to left override */ private static final char MIN_EXPLICIT_CODE = LRE; // bounds of range of explicit formatting codes private static final char MAX_EXPLICIT_CODE = RLO; private static final char LRM = 0x200E; /* left to right mark */ private static final char RLM = 0x200F; /* right to left mark */ private static final char ALS = 0x0600; /* Arabic Letters start */ private static final char ALE = 0x06EF; /* Arabic Letters end */ private static final char NUMLEVELS = 16; /* number of valid nesting levels. */ private int length; private char[] chars; // the characters private byte[] dirs; // the directional formatting codes private byte[] levels; // the nesting levels private byte[] embeddings; // the embeddings private byte baseLevel; // the base nesting level (line direction) // Return true if the character is arabic, used in resolving weak types. private boolean isArabic(char c) { // !!! TEST VERSION return c >= 'A' && c <= 'M'; } private boolean isBlockSeparator(char c) { return c == '\t' || c == '\n' || c == '\r' || c == '\u2029'; // !!! not LS U+2028 } // Return true if the character is whitespace. We'll rely on this instead of the // internal direction array, so we can remove the distinction between WS and ON // when implementing the algorithm. private boolean isWhiteSpace(char c) { // !!! TEST VERSION return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029'; } public static byte getDirectionClass(char c) { // !!! TEST VERSION byte dir; if (((c >= 'a') && (c <= 'z')) || c == LRO || c == LRE || c == LRM) dir = L; else if (((c >= 'A') && (c <= 'Z')) || c == RLO || c == RLE || c == RLM) dir = R; else if ((c >= '0') && (c <= '4')) dir = EN; else if ((c == '.') || (c == '/')) dir = ES; else if ((c == '$') || (c == '+')) dir = ET; else if ((c >= '5') && (c <= '9')) dir = AN; else if ((c == ',') || (c == ':')) dir = CS; else // explicit formatting codes, block and segment separators, whitespace all map to N dir = N; return dir; } /* Pretty inefficient, might rebuild into a binary search when we really implement it. public static byte getDirectionClass(char c) { char dir; if ((c == 0x0026) || (c == 0x0040) || ((c >= 0x0040) && (c <= 0x005A)) || ((c >= 0x0061) && (c <= 0x007A)) || ((c >= 0x00C0) && (c <= 0x00D6)) || ((c >= 0x00D8) && (c <= 0x00F6)) || ((c >= 0x00F8) && (c <= 0x058F)) || ((c >= 0x0900) && (c <= 0x0E3A)) || ((c >= 0x0E40) && (c <= 0x11FF)) || ((c >= 0x1E00) && (c <= 0x1FFF)) || ((c >= 0x20D0) && (c <= 0x20FF)) || ((c >= 0x2160) && (c <= 0x2182)) || ((c >= 0x3040) && (c <= 0x9FFF)) || (c == LRM) || (c == LRE) || (c == LRO)) dir = L; else if (((c >= 0x0590) && (c <= 0x065F)) || ((c >= 0x066D) && (c <= 0x06EF)) || (c == RLM) || (c == RLE) || (c == RLO)) dir = R; else if (((c >= 0x0030) && (c <= 0x0039)) || ((c >= 0x06F0) && (c <= 0x06F9)) || (c == 0x00B2) || (c == 0x00B3) || (c == 0x00B9) || (c == 0x2070) || ((c >= 0x2074) && (c <= 0x2079)) || ((c >= 0x2080) && (c <= 0x2089))) dir = EN; else if ((c == 0x002E) || (c == 0x002F) || (c == 0x2007)) dir = ES; else if (((c >= 0x0023) && (c <= 0x0025)) || ((c >= 0x00A2) && (c <= 0x00A5)) || (c == 0x0E3F) || (c == 0x066A) || ((c >= 0x20A0) && (c <= 0x20CF)) || (c == 0x002B) || (c == 0x002D) || (c == 0x00B0) || (c == 0x00B1) || (c == 0x2032) || (c == 0x2033) || (c == 0x207A) || (c == 0x207B) || (c == 0x208A) || (c == 0x208B) || (c == 0x2212) || (c == 0x2213)) dir = ET; else if (((c >= 0x0660) && (c <= 0x0669)) || (c == 0x066B) || (c == 0x066C)) dir = AN; else if ((c == 0x002C) || (c == 0x003A)) dir = CS; // else if (c == 0x0009) // dir = S; // else if ((c == 0x2028) || (c == 0x2029) || // (c == 0x0009) || (c == '\r')) // ret, B, and S map to B // dir = B; // else if ((c == 0x0020) || (c == 0x00A0) || // (c == 0x3000) || (c == 0xFEFF) || // ((c >= 0x2000) && (c <= 0x2006)) || // ((c >= 0x2008) && (c <= 0x200B))) // dir = WS; else dir = N; return dir; } */ // // debugging code // // global debug flag private static boolean DEBUGGING = false; private static boolean SHOWFORMAT = false; // names of direction codes private static final String[] dirnames = { "L", "R", "EN", "ES", "ET", "AN", "CS", "N", "X" }; // string used to pad output private static final String padstring = " "; // 10 chars // if s.length() < count, pad s with leading spaces so that s.length() == count private static final String pad(String s, int count) { count -= s.length(); while (count > 0) { int pad = Math.min(count, padstring.length()); s = padstring.substring(0, pad) + s; count -= pad; } return s; } // output a message and the current state of the algorithm private void debug(String message) { if (!DEBUGGING) return; System.out.println(message); System.out.println("length: " + length + ", baseLevel: " + baseLevel); if (chars != null) { for (int i = 0; i < length; ++i) System.out.print(" " + chars[i]); System.out.println(); } if (dirs != null) { for (int i = 0; i < length; ++i) System.out.print(pad(dirnames[dirs[i]], 3)); System.out.println(); } if (levels != null) { for (int i = 0; i < length; ++i) System.out.print(pad(Integer.toString(levels[i]), 3)); System.out.println(); } System.out.println(); } // // Constructors // /** * String is a single, entire block. * Base level will default using standard algorithm. Strong directional formatting * codes will be parsed. Default direction codes will be used. */ public BidiInfo(String str) { reset(str.toCharArray(), -1, null, null); } /** * String is a single, entire block. * Baselevel is the base line direction. * Embeddings reflect preprocessing of explicit formatting codes. See reset. * Dirs reflect external information on character directionality that overrides the default. See reset. */ public BidiInfo(String str, int baseLevel, byte[] embeddings, byte[] dirs) { reset(str.toCharArray(), baseLevel, embeddings, dirs); } /** * Iter is a single, entire block. * Baselevel is the base line direction. * Embeddings reflect preprocessing of explicit formatting codes. See reset. * Dirs reflect external information on character directionality that overrides the default. See reset. */ public BidiInfo(java.text.CharacterIterator iter, int baseLevel, byte[] embeddings, byte[] dirs) { int len = iter.getEndIndex() - iter.getBeginIndex(); char[] chars = new char[len]; char c = iter.first(); for (int i = 0; i < len; i++, c = iter.next()) chars[i] = c; reset(chars, baseLevel, embeddings, dirs); } /** * Chars is a single, entire block. * Baselevel is the base line direction. * Embeddings reflect preprocessing of explicit formatting codes. See reset. * Dirs reflect external information on character directionality that overrides the default. See reset. */ public BidiInfo(char[] chars, int baseLevel, byte[] embeddings, byte[] dirs) { reset(chars, baseLevel, embeddings, dirs); } /** * Reset the BidiInfo and have it compute a new reordering. * * Constructors call this. * * Chars is a block of characters to manipulate. This should not contain any segment or * block separators, any found will be treated like whitespace. Clients wishing to parse * more text should convert it segment by segment. BidiInfo can modify this array. * * BaseLevel is the base level (line direction), either 0, 1, or -1. If -1, the initial * characters will be scanned to default a base level. The start of the text will be * treated like the start of a block, clients who are processing segments should pass * an explicit base level rather than letting it default. * * Embeddings represents the effects of preprocessed explicit formatting codes. * Bits 0-3 are the embedding level, bit 4 indicates a directional override. If present * embeddings.length must be equal to chars.length. If baseLevel == 1 any embeddings of * level 0 will be converted to level 1. * * Dirs represents overrides to the default character directionality. If present, * dirs.length must be equal to chars.length. If null, default directionality codes * will be used. */ private void reset(char[] chars, int baseLevel, byte[] embeddings, byte[] dirs) { length = chars.length; this.chars = chars; if (dirs == null) { dirs = new byte[length]; for (int i = 0; i < length; i++) dirs[i] = getDirectionClass(chars[i]); } else { if (dirs.length != length) throw new IllegalArgumentException("dirs length != text length"); } this.dirs = dirs; if (embeddings != null) { if (embeddings.length != length) throw new IllegalArgumentException("embeddings length != text length"); if (baseLevel == -1) { // clients ought not default this, though. baseLevel = 1; for (int i = 0; i < length; i++) { if (embeddings[i] == 0 || dirs[i] == L || dirs[i] == R) { if (embeddings[i] == 0 || dirs[i] == L) baseLevel = 0; break; } } } else if (baseLevel == 1) { for (int i = 0; i < length; i++) if (embeddings[i] == 0) embeddings[i] = 1; } for (int i = 0; i < length; i++) { if ((embeddings[i] & 0x10) != 0) dirs[i] = (byte)(embeddings[i] & 0x1); // directional overrides } this.embeddings = embeddings; this.baseLevel = (byte)baseLevel; ignoreExplicitFormattingCodes(); } else { if (baseLevel == -1) { // only use when embeddings == null baseLevel = 0; // last-ditch default for (int i = 0; i < length; i++) { byte dir = dirs[i]; if (dir == L || dir == R) { baseLevel = dir == L ? 0 : 1; break; } } } this.baseLevel = (byte)baseLevel; embeddings = new byte[length]; this.embeddings = embeddings; processExplicitFormattingCodes(); // sets embeddings, override directions, etc. } boolean canonical = baseLevel == 0; for (int i = 0; canonical && i < length; i++) canonical = dirs[i] != R; if (canonical) { // hey, we're done! if (DEBUGGING) System.out.println("*** bidi canonical: " + new String(chars)); levels = null; } else { levels = new byte[length]; for (int i = 0; i < length; i++) levels[i] = (byte)(embeddings[i] & 0xf); resolveWeakTypes(); debug("after resolveWeakTypes"); resolveNeutralTypes(); debug("after resolveNeutralTypes"); resolveImplicitLevels(); debug("after resolveImplicitLevels"); } } // Called to compute embedding and direction information from explicit formatting codes. These // codes are converted to L or R as appropriate to the level they control. private void processExplicitFormattingCodes() { // Mark says two adjacent runs of the same kind should be merged // and the intervening codes removed, i.e. LRO a PDF LRO b PDF --> LRO a b PDF // We will handle ignored codes using 'X' values rather than by actually removing // them, sigh. byte level = baseLevel; byte override = -1; byte value = baseLevel; // merged level and override flags for embeddings array int s = 0; // stack counter int skip = 0; // skip counter when codes don't affect the stack byte levelStack[] = new byte[NUMLEVELS]; byte overrideStack[] = new byte[NUMLEVELS]; byte ignorelevel = -1; // flag to catch series of similar formatting codes for (int i = 0; i < length; ++i) { char c = chars[i]; byte newignorelevel = -1; switch (c) { case LRE: case LRO: { byte newlevel = (byte)((level & 0x0e) + 2); if (newlevel < NUMLEVELS) { if (newlevel == ignorelevel) { dirs[i-1] = X; dirs[i] = X; } levelStack[s] = level; overrideStack[s] = override; level = newlevel; if (c == LRO) { override = L; value = (byte)(level + 0x10); } else { override = -1; value = level; } } else { ++skip; } ++s; embeddings[i] = value; } break; case RLE: case RLO: { byte newlevel = (byte)((level + 1) | 0x01); if (newlevel < NUMLEVELS) { if (newlevel == ignorelevel) { dirs[i-1] = X; dirs[i] = X; } levelStack[s] = level; overrideStack[s] = override; level = newlevel; if (c == RLO) { override = R; value = (byte)(level + 0x10); } else { override = -1; value = level; } } else { ++skip; } ++s; embeddings[i] = value; } break; case PDF: embeddings[i] = value; if (s > 0) { dirs[i] = ((level & 0x1) == 0) ? L : R; --s; if (skip > 0) { --skip; } else { newignorelevel = level; level = levelStack[s]; override = overrideStack[s]; value = override == -1 ? level : (byte)(level + 0x10); } } else { dirs[i] = X; } break; default: embeddings[i] = value; if (override != -1) dirs[i] = override; break; } ignorelevel = newignorelevel; } } // Mark all explicit codes as X private void ignoreExplicitFormattingCodes() { for (int i = 0; i < length; i++) { if (dirs[i] < MIN_EXPLICIT_CODE) continue; if (dirs[i] > MAX_EXPLICIT_CODE) continue; dirs[i] = X; } } // This resolves serially in order from left to right, with the results of previous changes // taken into account for later characters. So, for example, a series of ET's after an EN // will all change to EN, since once the first ET changes to EN, it is then treated as EN // for transforming the following ET, and so on. It will also process ETs before EN by // scanning forward across runs of ET and checking the following character. // // This does not take embedded levels into account. private void resolveWeakTypes() { byte prev = -1; int i = 0; while (i < length && dirs[i] == X) i++; byte cur = dirs[i]; boolean lastStrongWasArabic = cur <= R && isArabic(chars[i]); while (i < length) { int ii = i + 1; while (ii < length && dirs[ii] == X) { dirs[ii] = N; // set it, but we'll ignore it ii++; } byte next = (ii == length) ? -1 : dirs[ii]; if (next == EN && lastStrongWasArabic) next = AN; switch (cur) { case L: case R: lastStrongWasArabic = isArabic(chars[i]); break; case ES: if (prev == EN && next == EN) cur = EN; else cur = N; break; case CS: if (prev == EN && next == EN) cur = EN; else if (prev == AN && next == AN) cur = AN; else cur = N; break; case ET: if (prev == EN || next == EN) { cur = EN; } else if (next == ET && !lastStrongWasArabic) { // forward scan to handle ET ET EN for (int j = ii + 1; j < length; ++j) { if (dirs[j] == ET || dirs[j] == X) continue; // we'll map X to EN if we succeed, but that's ok if (dirs[j] == EN) { while (ii < j) dirs[ii++] = EN; cur = EN; next = EN; } break; } } else { cur = N; } break; default: break; } dirs[i] = cur; i = ii; prev = cur; cur = next; } } // According to Mark, this operation should never span a level boundary. The start and end // of the level should be treated like sot and eot, with the base direction the direction of the // level. private void resolveNeutralTypes() { int i = 0; while (i < length) { byte tempBaseLevel = levels[i]; byte tempBaseDir = ((tempBaseLevel & 0x1) == 0) ? L : R; int eot = i + 1; while (eot < length && levels[eot] == tempBaseLevel) eot++; byte last = tempBaseDir; byte lastStrongDir = tempBaseDir; while (i < eot) { if (dirs[i] == N) { int j = i + 1; while (j < eot && dirs[j] == N) j++; byte next = tempBaseDir; if (j < eot) { switch(dirs[j]) { case L: next = L; break; case R: next = R; break; case EN: next = lastStrongDir; break; case AN: next = R; break; } } if (last != next) last = tempBaseDir; while (i < j) { dirs[i] = last; i++; } if (i == eot) break; } switch (dirs[i]) { case L: last = lastStrongDir = L; break; case R: last = lastStrongDir = R; break; case EN: last = lastStrongDir; break; case AN: last = R; break; } i++; } } } // Mark says to not use "global direction" but instead use the resolved level. // EN processing is influenced by level boundaries. private void resolveImplicitLevels() { for (int i = 0; i < length; i++) { byte level = levels[i]; if (isBlockSeparator(chars[i])) { level = baseLevel; } else { switch (dirs[i]) { case L: level = (byte)((level + 1) & 0xe); break; case R: level = (byte)(level | 0x1); break; case AN: level = (byte)((level + 2) & 0xe); break; case EN: if ((level & 0x1) != 0) level += 1; else if (i == 0 || (levels[i-1] != level) || dirs[i-1] == L || dirs[i-1] == EN) level += 2; break; } if (level < NUMLEVELS) levels[i] = level; } } } // Create mapping to reflect resolved levels, using entire text. public int[] createVisualToLogicalOrdering() { return createVisualToLogicalOrdering(0, length); } // Create mapping to reflect resolved levels, using a subrange of the text // to represent a line. Whitespace at the end of the line is mapped to the // base level. public int[] createVisualToLogicalOrdering(int start, int limit) { if (levels == null) return null; boolean canonical = true; for (int i = start; canonical && i < limit; i++) canonical = (levels[i] & 0x1) == 0; if (canonical) { if (DEBUGGING) System.out.println("*** ordering canonical from " + start + " to " + limit); return null; } int maplen = limit - start; int[] mapping = new int[maplen]; // find out how much trailing whitespace there is int ws = 0; for (int i = limit - 1; i >= start && isWhiteSpace(chars[i]); --i) ws++; // don't process these values, we'll special case them later limit -= ws; int mapstart = baseLevel == 0 ? 0 : ws; byte lowestOddLevel = (byte)(NUMLEVELS + 1); byte highestLevel = 0; // initialize mapping and levels for (int i = start; i < limit; i++) { mapping[i - start + mapstart] = i; byte level = levels[i]; if (level > highestLevel) highestLevel = level; if (((level & 0x01) != 0) && (level < lowestOddLevel)) lowestOddLevel = level; } while (highestLevel >= lowestOddLevel) { int i = start; for (;;) { while ((i < limit) && (levels[i] < highestLevel)) i++; int begin = i++; if (begin == limit) break; // no more runs at this level while ((i < limit) && (levels[i] >= highestLevel)) i++; int end = i - 1; begin -= start - mapstart; end -= start - mapstart; while (begin < end) { int temp = mapping[begin]; mapping[begin] = mapping[end]; mapping[end] = temp; ++begin; --end; } } // debug("after remap " + highestLevel + " " + mappedString()); --highestLevel; } // now let's handle the whitespace if (baseLevel == 0) { for (int i = limit; ws > 0; --ws, ++i) mapping[i - start] = i; } else { while (ws > 0) mapping[--ws] = limit++; } return mapping; } /** * return base direction */ public byte getBaseLevel() { return baseLevel; } /** * Convenience to interpret the base level as LTR or RTL. */ public boolean isDirectionLTR() { return (baseLevel & 0x1) == 0; } /* * return the level array. */ public byte[] createLevels() { return createLevels(0, length); } // !!! Optimize away level arrays where everything is even, on the assumption // that the system won't want to represent levels in a special way, and all // it cares about is the directionality. But if this assumption changes, the // test will need to be recoded. public byte[] createLevels(int start, int limit) { if (levels == null) return null; boolean canonical = true; for (int i = start; canonical && i < limit; i++) canonical = (levels[i] & 0x1) == 0; // ??? or should I only test == 0? if (canonical) { if (DEBUGGING) System.out.println("*** levels canonical from " + start + " to " + limit); return null; } int levlen = limit - start; byte[] newlevels = new byte[levlen]; System.arraycopy(levels, start, newlevels, 0, levlen); // set trailing whitespace to base level. Don't worry about // extra work if this is a lower odd level than the ideal odd // level for this line, this situation won't happen often. for (int i = limit - 1; i >= start && isWhiteSpace(chars[i]); --i) newlevels[i - start] = baseLevel; return newlevels; } /* * this is for debugging only, remapping is something fonts do to glyphs */ private static char mappedChar(char c) { switch (c) { case '(': return ')'; case ')': return '('; case '[': return ']'; case ']': return '['; case '<': return '>'; case '>': return '<'; case '{': return '}'; case '}': return '{'; } return c; } /* * Return a string containing the reordered characters. Debugging only. */ public String mappedString() { return mappedString(0, length); } public String mappedString(int start, int limit) { String result = null; int[] mapping = createVisualToLogicalOrdering(start, limit); if (DEBUGGING && mapping != null) { for (int i = 0; i < mapping.length; ++i) System.out.print(pad(Integer.toString(mapping[i]), 3)); System.out.println(); } if (mapping == null) { result = new String(chars, start, limit - start); } else { StringBuffer buffer = new StringBuffer(mapping.length); for (int i = 0; i < mapping.length; i++) { char c = chars[mapping[i]]; switch (c) { case LRE: if (SHOWFORMAT) buffer.append("[LRE]"); break; case LRO: if (SHOWFORMAT) buffer.append("[LRO]"); break; case RLE: if (SHOWFORMAT) buffer.append("[RLE]"); break; case RLO: if (SHOWFORMAT) buffer.append("[RLO]"); break; case PDF: if (SHOWFORMAT) buffer.append("[PDF]"); break; case LRM: if (SHOWFORMAT) buffer.append("[LRM]"); break; case RLM: if (SHOWFORMAT) buffer.append("[RLM]"); break; default: if ((levels[mapping[i]] & 0x1) != 0) buffer.append(mappedChar(c)); else buffer.append(c); break; } } result = buffer.toString(); } return result; } public static void main(String args[]) { // symantec 1.53 NT 3.51 312-360 avg about 328 (long string) // String str = "HE SAID [" + LRE + "she said (" + RLE + "you SAID TO BUY 20, 30, or 40" + PDF + ")" + PDF + "]? "; String str = "I OWZ 123 dollars"; if (DEBUGGING == false) { // timing test int strlen = str.length(); for (int trials = 10; trials > 0; --trials) { System.gc(); long t = System.currentTimeMillis(); for (int i = 0; i < 10; ++i) { BidiInfo b = new BidiInfo(str); for (int j = 0; j < strlen; ++j) { for (int k = j; k <= strlen; ++k) { byte[] levels = b.createLevels(j, k); int[] mapping = b.createVisualToLogicalOrdering(j, k); } } } t = System.currentTimeMillis() - t; System.out.println("timing test: " + t); } } // if (args.length > 0) // str = args[0]; str = "AbcdEFGHijklMNOPqrsT "; System.out.println("source: " + str); System.out.println("result: " + new BidiInfo(str).mappedString()); System.out.println("result: " + new BidiInfo(str, 0, null, null).mappedString()); str = "HE SAID [she said (you SAID)]? "; byte[] embeddings = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 1, 1, 1, 1 }; System.out.println("result: " + new BidiInfo(str, 1, embeddings, null).mappedString()); System.out.println("result: " + new BidiInfo(str, 0, embeddings, null).mappedString()); str = "start: " + LRE + "one run" + PDF + LRE + "another run" + PDF + ". "; System.out.println("source: " + str); System.out.println("result: " + new BidiInfo(str, 0, null, null).mappedString()); System.out.println("result: " + new BidiInfo(str, 1, null, null).mappedString()); str = "he said \"" + RLE + "IT IS A bmw 400, OK." + PDF + "\" "; System.out.println("source: " + str); System.out.println("result: " + new BidiInfo(str).mappedString()); str = "he said [" + RLE + "THEY ARE 123, 456, 789, OK." + PDF + "] "; System.out.println("source: " + str); System.out.println("result: " + new BidiInfo(str).mappedString()); BidiInfo b = new BidiInfo(str); DEBUGGING = false; for (int i = 0; i <= str.length(); ++i) System.out.println("[0," + pad(Integer.toString(i), 2) + "] >" + b.mappedString(0, i) + "<"); for (int i = 0; i <= str.length() - 20; ++i) System.out.println("[" + pad(Integer.toString(i), 2) + "," + pad(Integer.toString(i + 20), 2) + "] >" + b.mappedString(i, i + 20) + "<"); try { for (;;) Thread.sleep(100); } catch (Exception e) { System.out.println(e); } } }