home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Java 1.2 How-To
/
JavaHowTo.iso
/
3rdParty
/
jbuilder
/
unsupported
/
JDK1.2beta3
/
SOURCE
/
SRC.ZIP
/
java
/
awt
/
font
/
BidiInfo.java
< prev
next >
Encoding:
Amiga
Atari
Commodore
DOS
FM Towns/JPY
Macintosh
Macintosh JP
NeXTSTEP
RISC OS/Acorn
UTF-8
Wrap
Java Source
|
1998-03-20
|
32.0 KB
|
1,012 lines
/*
* @(#)BidiInfo.java 1.7 98/03/18
*
* Copyright 1997, 1998 by Sun Microsystems, Inc.,
* 901 San Antonio Road, Palo Alto, California, 94303, U.S.A.
* All rights reserved.
*
* This software is the confidential and proprietary information
* of Sun Microsystems, Inc. ("Confidential Information"). You
* shall not disclose such Confidential Information and shall use
* it only in accordance with the terms of the license agreement
* you entered into with Sun.
*/
/*
* (C) Copyright Taligent, Inc. 1996 - 1997, All Rights Reserved
* (C) Copyright IBM Corp. 1996 - 1998, All Rights Reserved
*
* The original version of this source code and documentation is
* copyrighted and owned by Taligent, Inc., a wholly-owned subsidiary
* of IBM. These materials are provided under terms of a License
* Agreement between Taligent and Sun. This technology is protected
* by multiple US and International patents.
*
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
*/
package java.awt.font;
// !!! TODO to do too...
// 1) arabic characters before segment separators affect interpretation of numbers after them.
// so I need to handle segment separators, or pass in extra information
// 2) return information on embeddings and overrides independent of resolved levels for clients
// to use, i.e. cursor placement.
// 3) support all bidi classes and overriding of them via leveldirs (except bs?) if warranted.
// probably it is not, and I can have leveldirs only carry embedding/override info. Since
// we go to the text to see if a character is arabic, the user can't completely control the
// result anyway. Alternatively, given this additional information in leveldirs I could
// dispense with.
class BidiInfo extends Object {
/* Should these all be public so clients can construct a full dir array, or only L and R? */
public static final byte L = 0; /* left to right (strong) */
public static final byte R = 1; /* right to left (strong) */
public static final byte EN = 2; /* european number (weak) */
public static final byte ES = 3; /* european number separator (weak) */
public static final byte ET = 4; /* european number terminator (weak) */
public static final byte AN = 5; /* arabic number (weak) */
public static final byte CS = 6; /* common number separator (weak) */
public static final byte N = 7; /* other neutrals, block separator, segment separator, whitespace */
private static final byte X = 8; // internal code for ignored explicit codes
public static final int BASEDIRECTION_DEFAULT = -1;
/* These belong in some unicode char naming class. */
private static final char LRE = 0x202A; /* left to right embedding */
private static final char RLE = 0x202B; /* right to left embedding */
private static final char PDF = 0x202C; /* pop directional formatting */
private static final char LRO = 0x202D; /* left to right override */
private static final char RLO = 0x202E; /* right to left override */
private static final char MIN_EXPLICIT_CODE = LRE; // bounds of range of explicit formatting codes
private static final char MAX_EXPLICIT_CODE = RLO;
private static final char LRM = 0x200E; /* left to right mark */
private static final char RLM = 0x200F; /* right to left mark */
private static final char ALS = 0x0600; /* Arabic Letters start */
private static final char ALE = 0x06EF; /* Arabic Letters end */
private static final char NUMLEVELS = 16; /* number of valid nesting levels. */
private int length;
private char[] chars; // the characters
private byte[] dirs; // the directional formatting codes
private byte[] levels; // the nesting levels
private byte[] embeddings; // the embeddings
private byte baseLevel; // the base nesting level (line direction)
// Return true if the character is arabic, used in resolving weak types.
private boolean isArabic(char c)
{
// !!! TEST VERSION
return c >= 'A' && c <= 'M';
}
private boolean isBlockSeparator(char c)
{
return c == '\t' || c == '\n' || c == '\r' || c == '\u2029'; // !!! not LS U+2028
}
// Return true if the character is whitespace. We'll rely on this instead of the
// internal direction array, so we can remove the distinction between WS and ON
// when implementing the algorithm.
private boolean isWhiteSpace(char c)
{
// !!! TEST VERSION
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029';
}
public static byte getDirectionClass(char c)
{
// !!! TEST VERSION
byte dir;
if (((c >= 'a') && (c <= 'z')) || c == LRO || c == LRE || c == LRM)
dir = L;
else if (((c >= 'A') && (c <= 'Z')) || c == RLO || c == RLE || c == RLM)
dir = R;
else if ((c >= '0') && (c <= '4'))
dir = EN;
else if ((c == '.') || (c == '/'))
dir = ES;
else if ((c == '$') || (c == '+'))
dir = ET;
else if ((c >= '5') && (c <= '9'))
dir = AN;
else if ((c == ',') || (c == ':'))
dir = CS;
else // explicit formatting codes, block and segment separators, whitespace all map to N
dir = N;
return dir;
}
/* Pretty inefficient, might rebuild into a binary search when we really implement it.
public static byte getDirectionClass(char c)
{
char dir;
if ((c == 0x0026) || (c == 0x0040) ||
((c >= 0x0040) && (c <= 0x005A)) ||
((c >= 0x0061) && (c <= 0x007A)) ||
((c >= 0x00C0) && (c <= 0x00D6)) ||
((c >= 0x00D8) && (c <= 0x00F6)) ||
((c >= 0x00F8) && (c <= 0x058F)) ||
((c >= 0x0900) && (c <= 0x0E3A)) ||
((c >= 0x0E40) && (c <= 0x11FF)) ||
((c >= 0x1E00) && (c <= 0x1FFF)) ||
((c >= 0x20D0) && (c <= 0x20FF)) ||
((c >= 0x2160) && (c <= 0x2182)) ||
((c >= 0x3040) && (c <= 0x9FFF)) ||
(c == LRM) || (c == LRE) || (c == LRO))
dir = L;
else if (((c >= 0x0590) && (c <= 0x065F)) ||
((c >= 0x066D) && (c <= 0x06EF)) ||
(c == RLM) || (c == RLE) || (c == RLO))
dir = R;
else if (((c >= 0x0030) && (c <= 0x0039)) ||
((c >= 0x06F0) && (c <= 0x06F9)) ||
(c == 0x00B2) || (c == 0x00B3) ||
(c == 0x00B9) || (c == 0x2070) ||
((c >= 0x2074) && (c <= 0x2079)) ||
((c >= 0x2080) && (c <= 0x2089)))
dir = EN;
else if ((c == 0x002E) || (c == 0x002F) || (c == 0x2007))
dir = ES;
else if (((c >= 0x0023) && (c <= 0x0025)) ||
((c >= 0x00A2) && (c <= 0x00A5)) ||
(c == 0x0E3F) || (c == 0x066A) ||
((c >= 0x20A0) && (c <= 0x20CF)) ||
(c == 0x002B) || (c == 0x002D) ||
(c == 0x00B0) || (c == 0x00B1) ||
(c == 0x2032) || (c == 0x2033) ||
(c == 0x207A) || (c == 0x207B) ||
(c == 0x208A) || (c == 0x208B) ||
(c == 0x2212) || (c == 0x2213))
dir = ET;
else if (((c >= 0x0660) && (c <= 0x0669)) ||
(c == 0x066B) || (c == 0x066C))
dir = AN;
else if ((c == 0x002C) || (c == 0x003A))
dir = CS;
// else if (c == 0x0009)
// dir = S;
// else if ((c == 0x2028) || (c == 0x2029) ||
// (c == 0x0009) || (c == '\r')) // ret, B, and S map to B
// dir = B;
// else if ((c == 0x0020) || (c == 0x00A0) ||
// (c == 0x3000) || (c == 0xFEFF) ||
// ((c >= 0x2000) && (c <= 0x2006)) ||
// ((c >= 0x2008) && (c <= 0x200B)))
// dir = WS;
else
dir = N;
return dir;
}
*/
//
// debugging code
//
// global debug flag
private static boolean DEBUGGING = false;
private static boolean SHOWFORMAT = false;
// names of direction codes
private static final String[] dirnames = {
"L",
"R",
"EN",
"ES",
"ET",
"AN",
"CS",
"N",
"X"
};
// string used to pad output
private static final String padstring = " "; // 10 chars
// if s.length() < count, pad s with leading spaces so that s.length() == count
private static final String pad(String s, int count)
{
count -= s.length();
while (count > 0) {
int pad = Math.min(count, padstring.length());
s = padstring.substring(0, pad) + s;
count -= pad;
}
return s;
}
// output a message and the current state of the algorithm
private void debug(String message)
{
if (!DEBUGGING) return;
System.out.println(message);
System.out.println("length: " + length + ", baseLevel: " + baseLevel);
if (chars != null) {
for (int i = 0; i < length; ++i)
System.out.print(" " + chars[i]);
System.out.println();
}
if (dirs != null) {
for (int i = 0; i < length; ++i)
System.out.print(pad(dirnames[dirs[i]], 3));
System.out.println();
}
if (levels != null) {
for (int i = 0; i < length; ++i)
System.out.print(pad(Integer.toString(levels[i]), 3));
System.out.println();
}
System.out.println();
}
//
// Constructors
//
/**
* String is a single, entire block.
* Base level will default using standard algorithm. Strong directional formatting
* codes will be parsed. Default direction codes will be used.
*/
public BidiInfo(String str)
{
reset(str.toCharArray(), -1, null, null);
}
/**
* String is a single, entire block.
* Baselevel is the base line direction.
* Embeddings reflect preprocessing of explicit formatting codes. See reset.
* Dirs reflect external information on character directionality that overrides the default. See reset.
*/
public BidiInfo(String str, int baseLevel, byte[] embeddings, byte[] dirs)
{
reset(str.toCharArray(), baseLevel, embeddings, dirs);
}
/**
* Iter is a single, entire block.
* Baselevel is the base line direction.
* Embeddings reflect preprocessing of explicit formatting codes. See reset.
* Dirs reflect external information on character directionality that overrides the default. See reset.
*/
public BidiInfo(java.text.CharacterIterator iter, int baseLevel, byte[] embeddings, byte[] dirs)
{
int len = iter.getEndIndex() - iter.getBeginIndex();
char[] chars = new char[len];
char c = iter.first();
for (int i = 0; i < len; i++, c = iter.next())
chars[i] = c;
reset(chars, baseLevel, embeddings, dirs);
}
/**
* Chars is a single, entire block.
* Baselevel is the base line direction.
* Embeddings reflect preprocessing of explicit formatting codes. See reset.
* Dirs reflect external information on character directionality that overrides the default. See reset.
*/
public BidiInfo(char[] chars, int baseLevel, byte[] embeddings, byte[] dirs)
{
reset(chars, baseLevel, embeddings, dirs);
}
/**
* Reset the BidiInfo and have it compute a new reordering.
*
* Constructors call this.
*
* Chars is a block of characters to manipulate. This should not contain any segment or
* block separators, any found will be treated like whitespace. Clients wishing to parse
* more text should convert it segment by segment. BidiInfo can modify this array.
*
* BaseLevel is the base level (line direction), either 0, 1, or -1. If -1, the initial
* characters will be scanned to default a base level. The start of the text will be
* treated like the start of a block, clients who are processing segments should pass
* an explicit base level rather than letting it default.
*
* Embeddings represents the effects of preprocessed explicit formatting codes.
* Bits 0-3 are the embedding level, bit 4 indicates a directional override. If present
* embeddings.length must be equal to chars.length. If baseLevel == 1 any embeddings of
* level 0 will be converted to level 1.
*
* Dirs represents overrides to the default character directionality. If present,
* dirs.length must be equal to chars.length. If null, default directionality codes
* will be used.
*/
private void reset(char[] chars, int baseLevel, byte[] embeddings, byte[] dirs)
{
length = chars.length;
this.chars = chars;
if (dirs == null) {
dirs = new byte[length];
for (int i = 0; i < length; i++)
dirs[i] = getDirectionClass(chars[i]);
} else {
if (dirs.length != length)
throw new IllegalArgumentException("dirs length != text length");
}
this.dirs = dirs;
if (embeddings != null) {
if (embeddings.length != length)
throw new IllegalArgumentException("embeddings length != text length");
if (baseLevel == -1) { // clients ought not default this, though.
baseLevel = 1;
for (int i = 0; i < length; i++) {
if (embeddings[i] == 0 || dirs[i] == L || dirs[i] == R) {
if (embeddings[i] == 0 || dirs[i] == L)
baseLevel = 0;
break;
}
}
} else if (baseLevel == 1) {
for (int i = 0; i < length; i++)
if (embeddings[i] == 0)
embeddings[i] = 1;
}
for (int i = 0; i < length; i++) {
if ((embeddings[i] & 0x10) != 0)
dirs[i] = (byte)(embeddings[i] & 0x1); // directional overrides
}
this.embeddings = embeddings;
this.baseLevel = (byte)baseLevel;
ignoreExplicitFormattingCodes();
} else {
if (baseLevel == -1) { // only use when embeddings == null
baseLevel = 0; // last-ditch default
for (int i = 0; i < length; i++) {
byte dir = dirs[i];
if (dir == L || dir == R) {
baseLevel = dir == L ? 0 : 1;
break;
}
}
}
this.baseLevel = (byte)baseLevel;
embeddings = new byte[length];
this.embeddings = embeddings;
processExplicitFormattingCodes(); // sets embeddings, override directions, etc.
}
boolean canonical = baseLevel == 0;
for (int i = 0; canonical && i < length; i++)
canonical = dirs[i] != R;
if (canonical) { // hey, we're done!
if (DEBUGGING) System.out.println("*** bidi canonical: " + new String(chars));
levels = null;
} else {
levels = new byte[length];
for (int i = 0; i < length; i++)
levels[i] = (byte)(embeddings[i] & 0xf);
resolveWeakTypes();
debug("after resolveWeakTypes");
resolveNeutralTypes();
debug("after resolveNeutralTypes");
resolveImplicitLevels();
debug("after resolveImplicitLevels");
}
}
// Called to compute embedding and direction information from explicit formatting codes. These
// codes are converted to L or R as appropriate to the level they control.
private void processExplicitFormattingCodes()
{
// Mark says two adjacent runs of the same kind should be merged
// and the intervening codes removed, i.e. LRO a PDF LRO b PDF --> LRO a b PDF
// We will handle ignored codes using 'X' values rather than by actually removing
// them, sigh.
byte level = baseLevel;
byte override = -1;
byte value = baseLevel; // merged level and override flags for embeddings array
int s = 0; // stack counter
int skip = 0; // skip counter when codes don't affect the stack
byte levelStack[] = new byte[NUMLEVELS];
byte overrideStack[] = new byte[NUMLEVELS];
byte ignorelevel = -1; // flag to catch series of similar formatting codes
for (int i = 0; i < length; ++i) {
char c = chars[i];
byte newignorelevel = -1;
switch (c) {
case LRE:
case LRO: {
byte newlevel = (byte)((level & 0x0e) + 2);
if (newlevel < NUMLEVELS) {
if (newlevel == ignorelevel) {
dirs[i-1] = X;
dirs[i] = X;
}
levelStack[s] = level;
overrideStack[s] = override;
level = newlevel;
if (c == LRO) {
override = L;
value = (byte)(level + 0x10);
} else {
override = -1;
value = level;
}
} else {
++skip;
}
++s;
embeddings[i] = value;
} break;
case RLE:
case RLO: {
byte newlevel = (byte)((level + 1) | 0x01);
if (newlevel < NUMLEVELS) {
if (newlevel == ignorelevel) {
dirs[i-1] = X;
dirs[i] = X;
}
levelStack[s] = level;
overrideStack[s] = override;
level = newlevel;
if (c == RLO) {
override = R;
value = (byte)(level + 0x10);
} else {
override = -1;
value = level;
}
} else {
++skip;
}
++s;
embeddings[i] = value;
} break;
case PDF:
embeddings[i] = value;
if (s > 0) {
dirs[i] = ((level & 0x1) == 0) ? L : R;
--s;
if (skip > 0) {
--skip;
} else {
newignorelevel = level;
level = levelStack[s];
override = overrideStack[s];
value = override == -1 ? level : (byte)(level + 0x10);
}
} else {
dirs[i] = X;
}
break;
default:
embeddings[i] = value;
if (override != -1)
dirs[i] = override;
break;
}
ignorelevel = newignorelevel;
}
}
// Mark all explicit codes as X
private void ignoreExplicitFormattingCodes()
{
for (int i = 0; i < length; i++) {
if (dirs[i] < MIN_EXPLICIT_CODE) continue;
if (dirs[i] > MAX_EXPLICIT_CODE) continue;
dirs[i] = X;
}
}
// This resolves serially in order from left to right, with the results of previous changes
// taken into account for later characters. So, for example, a series of ET's after an EN
// will all change to EN, since once the first ET changes to EN, it is then treated as EN
// for transforming the following ET, and so on. It will also process ETs before EN by
// scanning forward across runs of ET and checking the following character.
//
// This does not take embedded levels into account.
private void resolveWeakTypes()
{
byte prev = -1;
int i = 0;
while (i < length && dirs[i] == X)
i++;
byte cur = dirs[i];
boolean lastStrongWasArabic = cur <= R && isArabic(chars[i]);
while (i < length) {
int ii = i + 1;
while (ii < length && dirs[ii] == X) {
dirs[ii] = N; // set it, but we'll ignore it
ii++;
}
byte next = (ii == length) ? -1 : dirs[ii];
if (next == EN && lastStrongWasArabic)
next = AN;
switch (cur) {
case L:
case R:
lastStrongWasArabic = isArabic(chars[i]);
break;
case ES:
if (prev == EN && next == EN)
cur = EN;
else
cur = N;
break;
case CS:
if (prev == EN && next == EN)
cur = EN;
else if (prev == AN && next == AN)
cur = AN;
else
cur = N;
break;
case ET:
if (prev == EN || next == EN) {
cur = EN;
} else if (next == ET && !lastStrongWasArabic) { // forward scan to handle ET ET EN
for (int j = ii + 1; j < length; ++j) {
if (dirs[j] == ET || dirs[j] == X)
continue; // we'll map X to EN if we succeed, but that's ok
if (dirs[j] == EN) {
while (ii < j)
dirs[ii++] = EN;
cur = EN;
next = EN;
}
break;
}
} else {
cur = N;
}
break;
default:
break;
}
dirs[i] = cur;
i = ii;
prev = cur;
cur = next;
}
}
// According to Mark, this operation should never span a level boundary. The start and end
// of the level should be treated like sot and eot, with the base direction the direction of the
// level.
private void resolveNeutralTypes()
{
int i = 0;
while (i < length) {
byte tempBaseLevel = levels[i];
byte tempBaseDir = ((tempBaseLevel & 0x1) == 0) ? L : R;
int eot = i + 1;
while (eot < length && levels[eot] == tempBaseLevel)
eot++;
byte last = tempBaseDir;
byte lastStrongDir = tempBaseDir;
while (i < eot) {
if (dirs[i] == N) {
int j = i + 1;
while (j < eot && dirs[j] == N)
j++;
byte next = tempBaseDir;
if (j < eot) {
switch(dirs[j]) {
case L: next = L; break;
case R: next = R; break;
case EN: next = lastStrongDir; break;
case AN: next = R; break;
}
}
if (last != next)
last = tempBaseDir;
while (i < j) {
dirs[i] = last;
i++;
}
if (i == eot)
break;
}
switch (dirs[i]) {
case L: last = lastStrongDir = L; break;
case R: last = lastStrongDir = R; break;
case EN: last = lastStrongDir; break;
case AN: last = R; break;
}
i++;
}
}
}
// Mark says to not use "global direction" but instead use the resolved level.
// EN processing is influenced by level boundaries.
private void resolveImplicitLevels()
{
for (int i = 0; i < length; i++) {
byte level = levels[i];
if (isBlockSeparator(chars[i])) {
level = baseLevel;
} else {
switch (dirs[i]) {
case L: level = (byte)((level + 1) & 0xe); break;
case R: level = (byte)(level | 0x1); break;
case AN: level = (byte)((level + 2) & 0xe); break;
case EN: if ((level & 0x1) != 0)
level += 1;
else if (i == 0 || (levels[i-1] != level) || dirs[i-1] == L || dirs[i-1] == EN)
level += 2;
break;
}
if (level < NUMLEVELS)
levels[i] = level;
}
}
}
// Create mapping to reflect resolved levels, using entire text.
public int[] createVisualToLogicalOrdering()
{
return createVisualToLogicalOrdering(0, length);
}
// Create mapping to reflect resolved levels, using a subrange of the text
// to represent a line. Whitespace at the end of the line is mapped to the
// base level.
public int[] createVisualToLogicalOrdering(int start, int limit)
{
if (levels == null)
return null;
boolean canonical = true;
for (int i = start; canonical && i < limit; i++)
canonical = (levels[i] & 0x1) == 0;
if (canonical) {
if (DEBUGGING) System.out.println("*** ordering canonical from " + start + " to " + limit);
return null;
}
int maplen = limit - start;
int[] mapping = new int[maplen];
// find out how much trailing whitespace there is
int ws = 0;
for (int i = limit - 1; i >= start && isWhiteSpace(chars[i]); --i)
ws++;
// don't process these values, we'll special case them later
limit -= ws;
int mapstart = baseLevel == 0 ? 0 : ws;
byte lowestOddLevel = (byte)(NUMLEVELS + 1);
byte highestLevel = 0;
// initialize mapping and levels
for (int i = start; i < limit; i++) {
mapping[i - start + mapstart] = i;
byte level = levels[i];
if (level > highestLevel)
highestLevel = level;
if (((level & 0x01) != 0) && (level < lowestOddLevel))
lowestOddLevel = level;
}
while (highestLevel >= lowestOddLevel) {
int i = start;
for (;;) {
while ((i < limit) && (levels[i] < highestLevel))
i++;
int begin = i++;
if (begin == limit)
break; // no more runs at this level
while ((i < limit) && (levels[i] >= highestLevel))
i++;
int end = i - 1;
begin -= start - mapstart;
end -= start - mapstart;
while (begin < end) {
int temp = mapping[begin];
mapping[begin] = mapping[end];
mapping[end] = temp;
++begin;
--end;
}
}
// debug("after remap " + highestLevel + " " + mappedString());
--highestLevel;
}
// now let's handle the whitespace
if (baseLevel == 0) {
for (int i = limit; ws > 0; --ws, ++i)
mapping[i - start] = i;
} else {
while (ws > 0)
mapping[--ws] = limit++;
}
return mapping;
}
/**
* return base direction
*/
public byte getBaseLevel()
{
return baseLevel;
}
/**
* Convenience to interpret the base level as LTR or RTL.
*/
public boolean isDirectionLTR()
{
return (baseLevel & 0x1) == 0;
}
/*
* return the level array.
*/
public byte[] createLevels()
{
return createLevels(0, length);
}
// !!! Optimize away level arrays where everything is even, on the assumption
// that the system won't want to represent levels in a special way, and all
// it cares about is the directionality. But if this assumption changes, the
// test will need to be recoded.
public byte[] createLevels(int start, int limit)
{
if (levels == null)
return null;
boolean canonical = true;
for (int i = start; canonical && i < limit; i++)
canonical = (levels[i] & 0x1) == 0; // ??? or should I only test == 0?
if (canonical) {
if (DEBUGGING) System.out.println("*** levels canonical from " + start + " to " + limit);
return null;
}
int levlen = limit - start;
byte[] newlevels = new byte[levlen];
System.arraycopy(levels, start, newlevels, 0, levlen);
// set trailing whitespace to base level. Don't worry about
// extra work if this is a lower odd level than the ideal odd
// level for this line, this situation won't happen often.
for (int i = limit - 1; i >= start && isWhiteSpace(chars[i]); --i)
newlevels[i - start] = baseLevel;
return newlevels;
}
/*
* this is for debugging only, remapping is something fonts do to glyphs
*/
private static char mappedChar(char c)
{
switch (c) {
case '(': return ')';
case ')': return '(';
case '[': return ']';
case ']': return '[';
case '<': return '>';
case '>': return '<';
case '{': return '}';
case '}': return '{';
}
return c;
}
/*
* Return a string containing the reordered characters. Debugging only.
*/
public String mappedString()
{
return mappedString(0, length);
}
public String mappedString(int start, int limit)
{
String result = null;
int[] mapping = createVisualToLogicalOrdering(start, limit);
if (DEBUGGING && mapping != null) {
for (int i = 0; i < mapping.length; ++i)
System.out.print(pad(Integer.toString(mapping[i]), 3));
System.out.println();
}
if (mapping == null) {
result = new String(chars, start, limit - start);
} else {
StringBuffer buffer = new StringBuffer(mapping.length);
for (int i = 0; i < mapping.length; i++) {
char c = chars[mapping[i]];
switch (c) {
case LRE: if (SHOWFORMAT) buffer.append("[LRE]"); break;
case LRO: if (SHOWFORMAT) buffer.append("[LRO]"); break;
case RLE: if (SHOWFORMAT) buffer.append("[RLE]"); break;
case RLO: if (SHOWFORMAT) buffer.append("[RLO]"); break;
case PDF: if (SHOWFORMAT) buffer.append("[PDF]"); break;
case LRM: if (SHOWFORMAT) buffer.append("[LRM]"); break;
case RLM: if (SHOWFORMAT) buffer.append("[RLM]"); break;
default:
if ((levels[mapping[i]] & 0x1) != 0)
buffer.append(mappedChar(c));
else
buffer.append(c);
break;
}
}
result = buffer.toString();
}
return result;
}
public static void main(String args[])
{
// symantec 1.53 NT 3.51 312-360 avg about 328 (long string)
// String str = "HE SAID [" + LRE + "she said (" + RLE + "you SAID TO BUY 20, 30, or 40" + PDF + ")" + PDF + "]? ";
String str = "I OWZ 123 dollars";
if (DEBUGGING == false) {
// timing test
int strlen = str.length();
for (int trials = 10; trials > 0; --trials) {
System.gc();
long t = System.currentTimeMillis();
for (int i = 0; i < 10; ++i) {
BidiInfo b = new BidiInfo(str);
for (int j = 0; j < strlen; ++j) {
for (int k = j; k <= strlen; ++k) {
byte[] levels = b.createLevels(j, k);
int[] mapping = b.createVisualToLogicalOrdering(j, k);
}
}
}
t = System.currentTimeMillis() - t;
System.out.println("timing test: " + t);
}
}
// if (args.length > 0)
// str = args[0];
str = "AbcdEFGHijklMNOPqrsT ";
System.out.println("source: " + str);
System.out.println("result: " + new BidiInfo(str).mappedString());
System.out.println("result: " + new BidiInfo(str, 0, null, null).mappedString());
str = "HE SAID [she said (you SAID)]? ";
byte[] embeddings = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 1, 1, 1, 1
};
System.out.println("result: " + new BidiInfo(str, 1, embeddings, null).mappedString());
System.out.println("result: " + new BidiInfo(str, 0, embeddings, null).mappedString());
str = "start: " + LRE + "one run" + PDF + LRE + "another run" + PDF + ". ";
System.out.println("source: " + str);
System.out.println("result: " + new BidiInfo(str, 0, null, null).mappedString());
System.out.println("result: " + new BidiInfo(str, 1, null, null).mappedString());
str = "he said \"" + RLE + "IT IS A bmw 400, OK." + PDF + "\" ";
System.out.println("source: " + str);
System.out.println("result: " + new BidiInfo(str).mappedString());
str = "he said [" + RLE + "THEY ARE 123, 456, 789, OK." + PDF + "] ";
System.out.println("source: " + str);
System.out.println("result: " + new BidiInfo(str).mappedString());
BidiInfo b = new BidiInfo(str);
DEBUGGING = false;
for (int i = 0; i <= str.length(); ++i)
System.out.println("[0," + pad(Integer.toString(i), 2) + "] >" + b.mappedString(0, i) + "<");
for (int i = 0; i <= str.length() - 20; ++i)
System.out.println("[" + pad(Integer.toString(i), 2) + "," + pad(Integer.toString(i + 20), 2) + "] >" +
b.mappedString(i, i + 20) + "<");
try {
for (;;)
Thread.sleep(100);
}
catch (Exception e) {
System.out.println(e);
}
}
}