home *** CD-ROM | disk | FTP | other *** search
- /*==================================================================
- File: ZStringParser.cpp
-
- Contains: Class for parsing named ZStrings into platform-
- specific strings.
-
- Written by: Eric Traut
-
- Copyright: 2000-2001 Connectix Corporation
-
- This source has been placed into the public domain by
- Connectix Corporation. You have the right to modify,
- distribute or use this code without any legal limitations
- or finanicial/licensing requirements. Connectix is not
- liable for any problems that result from the use of this
- code.
-
- If you have comments, feedback, questions, or would like
- to submit bug fixes or updates to this code, please email
- opensource@connectix.com.
- ==================================================================*/
-
- #include "ZStringParser.h"
- #include "ZStringDictionary.h"
- #include "ZString.h"
-
- #include <stdio.h>
- #include <ctype.h>
-
- ZStringParser * ZStringParser::sParser = NULL;
-
- typedef struct
- {
- const char * fTagName; // Tag name (alpha representation)
- ZStringTagID fTagID; // Internal tag ID
- Z_Boolean fCheckForPartialTag; // Indicates that the parser should check for a tag missing &
- } ZStringParseTag;
-
-
- // WARNING: For performance reasons, we use a binary
- // search lookup, so the following table must remain
- // sorted alphabetically. if you add more items,
- // make sure to add them in the appropriate place.
- static const ZStringParseTag sParseTags[] = {
- {"À", kZTag_Agrave, true },
- {"Å", kZTag_Aring, false },
- {"Ã", kZTag_Atilde, true },
- {"Ä", kZTag_Auml, true },
- {"Ç", kZTag_Ccedil, true },
- {"É", kZTag_Eacute, true },
- {"Ê", kZTag_Ecirc, true },
- {"È", kZTag_Egrave, true },
- {"Ë", kZTag_Euml, true },
- {"Í", kZTag_Iacute, true },
- {"Î", kZTag_Icirc, true },
- {"Ì", kZTag_Igrave, true },
- {"Ï", kZTag_Iuml, true },
- {"Ñ", kZTag_Ntilde, true },
- {"Ó", kZTag_Oacute, true },
- {"Ô", kZTag_Ocirc, true },
- {"Ò", kZTag_Ograve, true },
- {"Ø", kZTag_Oslash, true },
- {"Õ", kZTag_Otilde, true },
- {"Ö", kZTag_Ouml, true },
- {"Ú", kZTag_Uacute, true },
- {"Û", kZTag_Ucirc, true },
- {"Ù", kZTag_Ugrave, true },
- {"Ü", kZTag_Uuml, true },
- {"á", kZTag_aacute, true },
- {"â", kZTag_acirc, true },
- {"æ", kZTag_aelig, true },
- {"à", kZTag_agrave, true },
- {"&", kZTag_amp, false },
- {"å", kZTag_aring , false },
- {"ã", kZTag_atilde, true },
- {"ä", kZTag_auml, true },
- {"&bdquo", kZTag_bdquo, true },
- {"&bull", kZTag_bull, false },
- {"ç", kZTag_ccedil, true },
- {"¢", kZTag_cent, false },
- {"©", kZTag_copy, false },
- {"é", kZTag_eacute, true },
- {"ê", kZTag_ecirc, true },
- {"è", kZTag_egrave, true },
- {"ë", kZTag_euml, true },
- {">", kZTag_gt, false },
- {"&hellip", kZTag_hellip, false },
- {"í", kZTag_iacute, true },
- {"î", kZTag_icirc, true },
- {"¡", kZTag_iexcl, true },
- {"ì", kZTag_igrave, true },
- {"¿", kZTag_iquest, true },
- {"ï", kZTag_iuml, true },
- {"&ldquo", kZTag_ldquo, true },
- {"&lsquo", kZTag_lsquo, true },
- {"<", kZTag_lt, false },
- {"&mdash", kZTag_mdash, true },
- {"µ", kZTag_micro, false },
- {" ", kZTag_nbsp, true },
- {"&ndash", kZTag_ndash, true },
- {"ñ", kZTag_ntilde, true },
- {"ó", kZTag_oacute, true },
- {"ô", kZTag_ocirc, true },
- {"ò", kZTag_ograve, true },
- {"ø", kZTag_oslash, true },
- {"õ", kZTag_otilde, true },
- {"ö", kZTag_ouml, true },
- {"¶", kZTag_para, false },
- {"&pi", kZTag_pi, false },
- {"£", kZTag_pound, false },
- {"&rdquo", kZTag_rdquo, true },
- {"®", kZTag_reg, false },
- {"&replace", kZTag_replace, true },
- {"&rsquo", kZTag_rsquo, true },
- {"&sbquo", kZTag_sbquo, true },
- {"ß", kZTag_szlig, true },
- {"&trade", kZTag_trade, false },
- {"ú", kZTag_uacute, true },
- {"û", kZTag_ucirc, true },
- {"ù", kZTag_ugrave, true },
- {"ü", kZTag_uuml, true },
- {"¥", kZTag_yen, false },
- {"ÿ", kZTag_yuml, true },
- {"<br>", kZTag_br, true }
- };
-
- static const ZStringTagID sNumericParseTags[] = {
- kZTag_Invalid, // 000
- kZTag_Invalid, // 001
- kZTag_Invalid, // 002
- kZTag_Invalid, // 003
- kZTag_Invalid, // 004
- kZTag_Invalid, // 005
- kZTag_Invalid, // 006
- kZTag_Invalid, // 007
- kZTag_Invalid, // 008
- kZTag_Invalid, // 009
-
- kZTag_Invalid, // 010
- kZTag_Invalid, // 011
- kZTag_Invalid, // 012
- kZTag_Invalid, // 013
- kZTag_Invalid, // 014
- kZTag_Invalid, // 015
- kZTag_Invalid, // 016
- kZTag_Invalid, // 017
- kZTag_Invalid, // 018
- kZTag_Invalid, // 019
-
- kZTag_Invalid, // 020
- kZTag_Invalid, // 021
- kZTag_Invalid, // 022
- kZTag_Invalid, // 023
- kZTag_Invalid, // 024
- kZTag_Invalid, // 025
- kZTag_Invalid, // 026
- kZTag_Invalid, // 027
- kZTag_Invalid, // 028
- kZTag_Invalid, // 029
-
- kZTag_Invalid, // 030
- kZTag_Invalid, // 031
- kZTag_Invalid, // 032
- kZTag_Invalid, // 033
- kZTag_Invalid, // 034
- kZTag_Invalid, // 035
- kZTag_Invalid, // 036
- kZTag_Invalid, // 037
- kZTag_amp, // 038
- kZTag_Invalid, // 039
-
- kZTag_Invalid, // 040
- kZTag_Invalid, // 041
- kZTag_Invalid, // 042
- kZTag_Invalid, // 043
- kZTag_Invalid, // 044
- kZTag_Invalid, // 045
- kZTag_Invalid, // 046
- kZTag_Invalid, // 047
- kZTag_Invalid, // 048
- kZTag_Invalid, // 049
-
- kZTag_Invalid, // 050
- kZTag_Invalid, // 051
- kZTag_Invalid, // 052
- kZTag_Invalid, // 053
- kZTag_Invalid, // 054
- kZTag_Invalid, // 055
- kZTag_Invalid, // 056
- kZTag_Invalid, // 057
- kZTag_Invalid, // 058
- kZTag_Invalid, // 059
-
- kZTag_lt, // 060
- kZTag_Invalid, // 061
- kZTag_gt, // 062
- kZTag_Invalid, // 063
- kZTag_Invalid, // 064
- kZTag_Invalid, // 065
- kZTag_Invalid, // 066
- kZTag_Invalid, // 067
- kZTag_Invalid, // 068
- kZTag_Invalid, // 069
-
- kZTag_Invalid, // 070
- kZTag_Invalid, // 071
- kZTag_Invalid, // 072
- kZTag_Invalid, // 073
- kZTag_Invalid, // 074
- kZTag_Invalid, // 075
- kZTag_Invalid, // 076
- kZTag_Invalid, // 077
- kZTag_Invalid, // 078
- kZTag_Invalid, // 079
-
- kZTag_Invalid, // 080
- kZTag_Invalid, // 081
- kZTag_Invalid, // 082
- kZTag_Invalid, // 083
- kZTag_Invalid, // 084
- kZTag_Invalid, // 085
- kZTag_Invalid, // 086
- kZTag_Invalid, // 087
- kZTag_Invalid, // 088
- kZTag_Invalid, // 089
-
- kZTag_Invalid, // 090
- kZTag_Invalid, // 091
- kZTag_Invalid, // 092
- kZTag_Invalid, // 093
- kZTag_Invalid, // 094
- kZTag_Invalid, // 095
- kZTag_Invalid, // 096
- kZTag_Invalid, // 097
- kZTag_Invalid, // 098
- kZTag_Invalid, // 099
-
- kZTag_Invalid, // 100
- kZTag_Invalid, // 101
- kZTag_Invalid, // 102
- kZTag_Invalid, // 103
- kZTag_Invalid, // 104
- kZTag_Invalid, // 105
- kZTag_Invalid, // 106
- kZTag_Invalid, // 107
- kZTag_Invalid, // 108
- kZTag_Invalid, // 109
-
- kZTag_Invalid, // 110
- kZTag_Invalid, // 111
- kZTag_pi, // 112
- kZTag_Invalid, // 113
- kZTag_Invalid, // 114
- kZTag_Invalid, // 115
- kZTag_Invalid, // 116
- kZTag_Invalid, // 117
- kZTag_Invalid, // 118
- kZTag_Invalid, // 119
-
- kZTag_Invalid, // 120
- kZTag_Invalid, // 121
- kZTag_Invalid, // 122
- kZTag_Invalid, // 123
- kZTag_Invalid, // 124
- kZTag_Invalid, // 125
- kZTag_Invalid, // 126
- kZTag_Invalid, // 127
- kZTag_Invalid, // 128
- kZTag_Invalid, // 129
-
- kZTag_Invalid, // 130
- kZTag_Invalid, // 131
- kZTag_bdquo, // 132
- kZTag_hellip, // 133
- kZTag_Invalid, // 134
- kZTag_Invalid, // 135
- kZTag_Invalid, // 136
- kZTag_Invalid, // 137
- kZTag_Invalid, // 138
- kZTag_Invalid, // 139
-
- kZTag_Invalid, // 140
- kZTag_Invalid, // 141
- kZTag_Invalid, // 142
- kZTag_Invalid, // 143
- kZTag_Invalid, // 144
- kZTag_lsquo, // 145
- kZTag_rsquo, // 146
- kZTag_ldquo, // 147
- kZTag_rdquo, // 148
- kZTag_bull, // 149
-
- kZTag_ndash, // 150
- kZTag_mdash, // 151
- kZTag_Invalid, // 152
- kZTag_trade, // 153
- kZTag_Invalid, // 154
- kZTag_Invalid, // 155
- kZTag_Invalid, // 156
- kZTag_Invalid, // 157
- kZTag_Invalid, // 158
- kZTag_Invalid, // 159
-
- kZTag_nbsp, // 160
- kZTag_iexcl, // 161
- kZTag_cent, // 162
- kZTag_pound, // 163
- kZTag_Invalid, // 164
- kZTag_yen, // 165
- kZTag_Invalid, // 166
- kZTag_Invalid, // 167
- kZTag_Invalid, // 168
- kZTag_copy, // 169
-
- kZTag_Invalid, // 170
- kZTag_Invalid, // 171
- kZTag_Invalid, // 172
- kZTag_Invalid, // 173
- kZTag_reg, // 174
- kZTag_Invalid, // 175
- kZTag_Invalid, // 176
- kZTag_Invalid, // 177
- kZTag_Invalid, // 178
- kZTag_Invalid, // 179
-
- kZTag_Invalid, // 180
- kZTag_micro, // 181
- kZTag_para, // 182
- kZTag_Invalid, // 183
- kZTag_Invalid, // 184
- kZTag_Invalid, // 185
- kZTag_Invalid, // 186
- kZTag_Invalid, // 187
- kZTag_Invalid, // 188
- kZTag_Invalid, // 189
-
- kZTag_Invalid, // 190
- kZTag_iquest, // 191
- kZTag_Agrave, // 192
- kZTag_Aacute, // 193
- kZTag_Acirc, // 194
- kZTag_Atilde, // 195
- kZTag_Auml, // 196
- kZTag_Aring, // 197
- kZTag_AElig, // 198
- kZTag_Ccedil, // 199
-
- kZTag_Egrave, // 200
- kZTag_Eacute, // 201
- kZTag_Ecirc, // 202
- kZTag_Euml, // 203
- kZTag_Igrave, // 204
- kZTag_Iacute, // 205
- kZTag_Icirc, // 206
- kZTag_Iuml, // 207
- kZTag_Invalid, // 208
- kZTag_Ntilde, // 209
-
- kZTag_Ograve, // 210
- kZTag_Oacute, // 211
- kZTag_Ocirc, // 212
- kZTag_Otilde, // 213
- kZTag_Ouml, // 214
- kZTag_Invalid, // 215
- kZTag_Oslash, // 216
- kZTag_Ugrave, // 217
- kZTag_Uacute, // 218
- kZTag_Ucirc, // 219
-
- kZTag_Uuml, // 220
- kZTag_Invalid, // 221
- kZTag_Invalid, // 222
- kZTag_szlig, // 223
- kZTag_agrave, // 224
- kZTag_aacute, // 225
- kZTag_acirc, // 226
- kZTag_atilde, // 227
- kZTag_auml, // 228
- kZTag_aring, // 229
-
- kZTag_aelig, // 230
- kZTag_ccedil, // 231
- kZTag_egrave, // 232
- kZTag_eacute, // 233
- kZTag_ecirc, // 234
- kZTag_euml, // 235
- kZTag_igrave, // 236
- kZTag_iacute, // 237
- kZTag_icirc, // 238
- kZTag_iuml, // 239
-
- kZTag_Invalid, // 240
- kZTag_ntilde, // 241
- kZTag_ograve, // 242
- kZTag_oacute, // 243
- kZTag_ocirc, // 244
- kZTag_otilde, // 245
- kZTag_ouml, // 246
- kZTag_Invalid, // 247
- kZTag_oslash, // 248
- kZTag_ugrave, // 249
-
- kZTag_uacute, // 250
- kZTag_ucirc, // 251
- kZTag_uuml, // 252
- kZTag_Invalid, // 253
- kZTag_Invalid, // 254
- kZTag_yuml // 255
- };
-
- /*------------------------------------------------------------------
- ZToolOptions
-
- The defualt constructor contains the default values for the tool.
- ------------------------------------------------------------------*/
-
- ZToolOptions::ZToolOptions()
- {
- // default settings
- mOutputNumericTags = true;
- mCategorizeOutput = true;
- mAllowTagSemicolon = false;
- mHasOTags = false;
- mOutputWarnings = true;
- mConvertHighASCIIChar = true;
- mPrintErrorsOnly = false;
- mFlagDuplicates = false;
- }
-
- /*------------------------------------------------------------------
- ZToolOptions
-
- This constructor sets most of the variables based upon the input
- values. Two variables, mHasOTags and mPrintErrorsOnly, are
- exceptions because they are only set to true in a particular
- instance and do not depend on the user's option choices.
- ------------------------------------------------------------------*/
-
- ZToolOptions::ZToolOptions(
- Z_Boolean inOutputNumeric,
- Z_Boolean inCategorizeOutput,
- Z_Boolean inAllowSemicolon,
- Z_Boolean inFlagDuplicates,
- Z_Boolean inOutputWarnings,
- Z_Boolean inConvertHighASCIIChar)
- {
- mOutputNumericTags = inOutputNumeric;
- mCategorizeOutput = inCategorizeOutput;
- mAllowTagSemicolon = inAllowSemicolon;
- mFlagDuplicates = inFlagDuplicates;
- mOutputWarnings = inOutputWarnings;
- mConvertHighASCIIChar = inConvertHighASCIIChar;
- mHasOTags = false;
- mPrintErrorsOnly = false;
- }
-
- /*------------------------------------------------------------------
- ZStringParser
- ------------------------------------------------------------------*/
-
- ZStringParser::ZStringParser()
- {
- // This is a singleton class
- check(sParser == NULL);
- sParser = this;
-
- // By default, the two-byte table is all zeros.
- memset(mTwoByteTable, 0, sizeof(mTwoByteTable));
- }
-
-
- /*------------------------------------------------------------------
- OverrideTwoByteTable
- ------------------------------------------------------------------*/
-
- void
- ZStringParser::OverrideTwoByteTable(
- Z_UInt8 * inNewTable)
- {
- memcpy(mTwoByteTable, inNewTable, sizeof(mTwoByteTable));
- }
-
-
- /*------------------------------------------------------------------
- SkipOverSpaces
-
- This method skips over all spaces.
- ------------------------------------------------------------------*/
-
- const char *
- ZStringParser::SkipOverSpaces(
- const char * inCurrentPtr)
- {
- const char * newPtr = inCurrentPtr;
- while (isspace(*newPtr))
- newPtr++;
- return newPtr;
- }
-
-
- /*------------------------------------------------------------------
- ParseAdditionalParameters
-
- This method parses additional parameters contained in the ZString
- tag. Currently, there is only the limit parameter, in the format
- limit=# where # may be an arbitrary length. The limit parameter
- sets the maximum data size allowed for the ZString.
- ------------------------------------------------------------------*/
-
- Z_Boolean
- ZStringParser::ParseAdditionalParameters(
- const char * inCurrentPtr,
- ZStringParseInfo & outParseInfo)
- {
- const char * startOfTagPtr;
- const char * currentNumPtr;
- const char * equalSignPtr;
-
- startOfTagPtr = SkipOverSpaces(inCurrentPtr);
-
- if (*startOfTagPtr == '>') // Found the end marker
- return true;
-
- // Parse a limit parameter
- if (strncmp(startOfTagPtr, "limit=", 6) == 0) // Found a limit tag
- {
- currentNumPtr = strpbrk(startOfTagPtr, " >") - 1; // Go to the last number
-
- Z_UInt16 numericTagValue = 0;
- Z_UInt16 multiple = 1;
-
- equalSignPtr = startOfTagPtr + 5;
-
- // Parse the number by starting at the end and working back to the equal sign
- while (currentNumPtr > equalSignPtr)
- {
- if (*currentNumPtr < '0' || *currentNumPtr > '9')
- return false;
- numericTagValue += (*currentNumPtr - '0') * multiple;
- multiple *= 10;
- currentNumPtr--;
- }
-
- outParseInfo.fHasMaxDataLen = true;
- outParseInfo.fMaxDataLen = numericTagValue;
-
- return true;
- }
-
- return false; // None ofthe parameters matched the ones listed here
- }
-
-
- /*------------------------------------------------------------------
- ParseNamedString
- ------------------------------------------------------------------*/
-
- Z_Boolean
- ZStringParser::ParseNamedString(
- const char * inNamedString,
- ZStringParseInfo & outParseInfo,
- Z_Boolean inDataIsVolatile)
- {
- const char * startOfName;
- const char * endOfName;
- const char * endTag;
- const char * expectedEndTag;
-
- // Until we see otherwise, assume the
- // string is valid.
- outParseInfo.fValidNamedString = true;
-
- // Set the volatile flag
- outParseInfo.fIsVolatile = inDataIsVolatile;
-
- // Set the limit flag (assumed to not be limited)
- outParseInfo.fHasMaxDataLen = false;
-
- // Make sure it starts with a valid tag.
- if (inNamedString[0] != '<')
- {
- debug_str("Badly-formed named string");
- goto BadEndTag;
- }
-
- startOfName = SkipOverSpaces(inNamedString+1);
-
- // Decide which ending should be expected
- expectedEndTag = "</Z>";
- if (strncmp(startOfName, "Z name=", 7) != 0)
- {
- expectedEndTag = "</O>";
- if (strncmp(startOfName, "O name=", 7) != 0)
- {
- debug_str("Badly-formed named string");
- goto BadEndTag;
- }
- }
-
- outParseInfo.fNamedStringStart = inNamedString;
- outParseInfo.fNameStr = &inNamedString[8];
-
- // Scan for a space or a right brace. This signals the end
- // of the tag or additional parameters.
- endOfName = strpbrk(outParseInfo.fNameStr, " >");
- if (endOfName == NULL)
- {
- debug_str("Badly-formed named string");
- goto BadStringName;
- }
-
- // This part parses additional parameters in the tag
- while (*endOfName == ' ')
- {
- if(!ParseAdditionalParameters(endOfName, outParseInfo))
- goto BadStringName;
-
- endOfName = strpbrk(endOfName + 1, " >"); // Find the next space or >
- if (endOfName == NULL) // It is an error if it can't find either
- {
- debug_str("Badly-formed named string");
- goto BadStringName;
- }
- }
-
- if (*endOfName != '>')
- {
- debug_str("Badly-formed named string");
- goto BadStringName;
- }
-
- outParseInfo.fNameStrLen = endOfName - outParseInfo.fNameStr;
- outParseInfo.fValueStr = endOfName + 1;
-
- // Make sure the string is closed off with an end tag.
- endTag = strstr(outParseInfo.fValueStr, expectedEndTag);
- if (endTag == NULL)
- {
- debug_str("Badly-formed named string");
- goto BadEndTag;
- }
-
- outParseInfo.fValueStrLen = endTag - outParseInfo.fValueStr;
- outParseInfo.fNamedStringLimit = endTag + 4;
-
- check(outParseInfo.fNamedStringLimit > outParseInfo.fNamedStringStart);
-
- return true;
-
- BadEndTag:
- BadStringName:
- outParseInfo.fNamedStringLimit = outParseInfo.fNamedStringStart + 256; // doesn't have an end, so assign one
- outParseInfo.fValidNamedString = false;
- return false;
- }
-
-
- /*------------------------------------------------------------------
- CreateNewZString
- ------------------------------------------------------------------*/
-
- void
- ZStringParser::CreateNewZString(
- const ZStringParseInfo & inParseInfo,
- ZString & outDestString)
- {
- const char * curInputPtr;
- const char * limitInputPtr;
- Z_UInt16 outputCount = 0;
-
- // First, scan the string to validate it and count
- // the number of characters we'll need.
- curInputPtr = inParseInfo.fValueStr;
- limitInputPtr = curInputPtr + inParseInfo.fValueStrLen;
-
- while (curInputPtr < limitInputPtr)
- {
- if (*curInputPtr == '&' || *curInputPtr == '<')
- {
- Z_UInt16 tagNameLength;
-
- ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength);
- check(tagID != kZTag_Invalid);
-
- if (tagID == kZTag_Invalid)
- {
- curInputPtr++;
- outputCount++;
- }
- else if (tagID == kZTag_replace)
- {
- // We special-case the "replace" tag because
- // it doesn't get replaced, and it has two additional
- // characters.
- curInputPtr += 8;
- check(curInputPtr[0] >= '0' && curInputPtr[0] <= '9');
- check(curInputPtr[1] >= '0' && curInputPtr[1] <= '9');
- curInputPtr += 2;
- check(curInputPtr <= limitInputPtr);
- outputCount += 10;
- }
- else
- {
- curInputPtr += tagNameLength;
- outputCount += GetTagReplacement(tagID, NULL);
- }
- }
- else
- {
- outputCount += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
- curInputPtr += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
- }
- }
-
- // Allocate the space for the string.
- outDestString.AllocateData(outputCount);
-
- if (outDestString.GetData() != NULL)
- {
- // Next, do a second scan to replace the characters.
- curInputPtr = inParseInfo.fValueStr;
- limitInputPtr = curInputPtr + inParseInfo.fValueStrLen;
-
- char * curOutputPtr = outDestString.GetData()->GetDataArray();
-
- while (curInputPtr < limitInputPtr)
- {
- if (*curInputPtr == '&' || *curInputPtr == '<')
- {
- Z_UInt16 tagNameLength;
-
- ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength);
-
- if (tagID == kZTag_Invalid)
- {
- *curOutputPtr++ = *curInputPtr++;
- }
- else if (tagID == kZTag_replace)
- {
- // We special-case the "replace" tag because
- // it doesn't get replaced, and it has two additional
- // characters.
- memcpy(curOutputPtr, curInputPtr, 10);
- curOutputPtr += 10;
- curInputPtr += 10;
- }
- else
- {
- curOutputPtr += GetTagReplacement(tagID, curOutputPtr);
- curInputPtr += tagNameLength;
- }
- }
- else
- {
- Z_UInt8 charCount = mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
-
- while (charCount > 0)
- {
- *curOutputPtr++ = *curInputPtr++;
- charCount--;
- }
- }
- }
- }
-
- // Register the string with the dictionary so
- // we don't have to continue parsing it each time.
- ZStringDictionary::GetZStringDictionary().RegisterString(inParseInfo, outDestString);
- }
-
-
- /*------------------------------------------------------------------
- ConvertNamedStringToTag [static]
-
- This method converts the parsed named string to a named string
- that contains only tags.
- ------------------------------------------------------------------*/
-
- Z_Boolean
- ZStringParser::ConvertNamedStringToTag(
- const ZStringParseInfo & inParseInfo,
- const ZToolOptions & inOptions,
- ZString & outDestString,
- ZParserWarningType & outWarningType)
- {
- check(inParseInfo.fValidNamedString);
-
- // Initialize the warning type to none
- outWarningType = kZParser_NoWarnings;
-
- // Copy the portion of the named string before the actual string prototype
- outDestString.SetString(inParseInfo.fNamedStringStart, inParseInfo.fValueStr - inParseInfo.fNamedStringStart);
-
- // Now, copy the prototype one character at a time until we hit a tag
- const char * curInputPtr;
- char tagString[32];
-
- curInputPtr = inParseInfo.fValueStr;
-
- while (curInputPtr < inParseInfo.fValueStr + inParseInfo.fValueStrLen)
- {
- if (*curInputPtr == '&' || *curInputPtr == '<')
- {
- ZStringTagID tagID;
- Z_UInt16 tagLength;
- Z_Boolean isNumeric;
-
- isNumeric = (curInputPtr[0] == '&' && curInputPtr[1] == '#');
-
- tagID = LookUpTagID(curInputPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength);
-
- check(tagID != kZTag_Invalid);
- if (tagID == kZTag_Invalid)
- {
- return false;
- }
- else if (tagID == kZTag_replace)
- {
- ZString replaceString;
-
- replaceString.SetString(curInputPtr, tagLength);
- outDestString += replaceString;
- curInputPtr += tagLength;
- }
- else if (*curInputPtr == '<')
- {
- memcpy(tagString, curInputPtr, tagLength);
- tagString[tagLength] = '\0';
- outDestString += tagString;
- curInputPtr += tagLength;
- }
- else if (inOptions.mOutputNumericTags) // Output numeric tags
- {
- sprintf(tagString, "%.3d;", ConvertTagIDToNumeric(tagID));
- outDestString += tagString;
- curInputPtr += tagLength;
- }
- else // Output alpha tags
- {
- sprintf(tagString, "%s", ConvertTagIDToString(tagID));
- outDestString += tagString;
- curInputPtr += tagLength;
- }
-
- if (!isNumeric && inOptions.mAllowTagSemicolon && *curInputPtr == ';')
- {
- // for alphabetic tags, skips over ;'s at the end if option is enabled
- curInputPtr++;
- }
- }
- else
- {
- unsigned char charValue = (unsigned char)(*curInputPtr);
- if (charValue > 127 && inOptions.mConvertHighASCIIChar) // if we want to try to convert high ASCII characters
- {
- ZStringTagID zStringTag = sNumericParseTags[charValue];
- if (zStringTag != kZTag_Invalid)
- {
- // Translate into html code (based on numeric or alpha option)
- if (inOptions.mOutputNumericTags)
- sprintf(tagString, "%.3d;", ConvertTagIDToNumeric(zStringTag));
- else
- sprintf(tagString, "%s", ConvertTagIDToString(zStringTag));
-
- outDestString += tagString; // Add the tag to the stream
- curInputPtr++;
- outWarningType |= kZParser_ChangedString;
- }
- else // Append the character to the output string
- {
- outDestString += *curInputPtr++;
- outWarningType |= kZParser_HasHighASCII;
- }
- }
- else if (inOptions.mOutputWarnings) // Only check if we are printing warnings
- {
- if (FindPossibleTag(curInputPtr, inParseInfo))
- outWarningType |= kZParser_FoundPossibleTag;
- outDestString += *curInputPtr++;
- }
- else
- outDestString += *curInputPtr++;
- }
- }
-
- // Finally, copy the rest of the named string
- ZString stringEnd;
- stringEnd.SetString(inParseInfo.fValueStr + inParseInfo.fValueStrLen,
- inParseInfo.fNamedStringLimit - (inParseInfo.fValueStr + inParseInfo.fValueStrLen));
- outDestString += stringEnd;
-
- return true;
- }
-
-
- /*------------------------------------------------------------------
- FindPossibleTag
-
- This method searches the parse tags to see if one of them matches
- the current input. It returns true if it does and false otherwise.
- ------------------------------------------------------------------*/
-
- Z_Boolean
- ZStringParser::FindPossibleTag(
- const char * inCurInputPtr,
- const ZStringParseInfo & inParseInfo)
- {
- Z_UInt32 strLength;
- Z_UInt32 curTagIndex;
- Z_UInt32 totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag);
-
- if (!isalpha(*inCurInputPtr))
- return false;
-
- // Scan through the list of numerics for possible match
- for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
- {
- // We are ignoring the tags that could be stand-alone words or parts of words
- if (!sParseTags[curTagIndex].fCheckForPartialTag)
- continue;
-
- strLength = strlen(sParseTags[curTagIndex].fTagName) - 1; // we are ignoring the initial &
- if ((inCurInputPtr + strLength) <= inParseInfo.fValueStr + inParseInfo.fValueStrLen)
- {
- if (strncmp(inCurInputPtr, (sParseTags[curTagIndex].fTagName+1), strLength)==0)
- if (sParseTags[curTagIndex].fTagID == kZTag_replace)
- {
- if (inCurInputPtr[7] >= '0' && inCurInputPtr[7] <= '9' &&
- inCurInputPtr[8] >= '0' && inCurInputPtr[8] <= '9')
- return true;
- }
- else
- return true;
- }
- }
- return false;
- }
-
-
- /*------------------------------------------------------------------
- CheckDataLength
- ------------------------------------------------------------------*/
-
- Z_Boolean
- ZStringParser::CheckDataLength(
- const ZStringParseInfo & inParseInfo)
- {
- Z_UInt16 stringLen = inParseInfo.fValueStrLen;
- const char * curPtr;
- char * searchString = new char[stringLen+1];
- char * endOfStringPtr = searchString + stringLen;
-
- strncpy(searchString, inParseInfo.fValueStr, stringLen);
- searchString[stringLen] = '\0';
- curPtr = searchString;
-
- while (curPtr < endOfStringPtr)
- {
- curPtr = strpbrk(curPtr, "&");
- if (curPtr == NULL) // No tag found so break
- break;
-
- // if it is numeric, it is a set length of 6 characters (ex: {)
- if ((curPtr+1) < endOfStringPtr && curPtr[1] == '#')
- {
- stringLen -= 5; // 6 characters represent 1 letter, so only subtract (6-1)=5
- }
- else // It is alpha tag
- {
- ZStringTagID tagID;
- Z_UInt16 tagLength;
-
- tagID = LookUpTagID(curPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength);
- if (tagID != kZTag_Invalid) // No tag ID found, so ignore this tag.
- stringLen -= (tagLength - 1);
- }
- curPtr++;
- }
-
- delete [] searchString;
-
- return (stringLen > inParseInfo.fMaxDataLen);
- }
-
-
- /*------------------------------------------------------------------
- CompareTagStrings
- ------------------------------------------------------------------*/
-
- Z_SInt32
- ZStringParser::CompareTagStrings(
- const char * inTagFromTable,
- const char * inParseString,
- const char * inParseStringLimit)
- {
- while (*inTagFromTable != '\0' &&
- inParseString < inParseStringLimit)
- {
- if (*inTagFromTable < *inParseString)
- return -1;
- else if (*inTagFromTable > *inParseString)
- return 1;
-
- inTagFromTable++;
- inParseString++;
- }
-
- return 0;
- }
-
-
- /*------------------------------------------------------------------
- ConvertTagIDToNumeric
- ------------------------------------------------------------------*/
-
- Z_UInt32
- ZStringParser::ConvertTagIDToNumeric(
- ZStringTagID inTag)
- {
- // The replace tag has no equivalent
- check(inTag != kZTag_Invalid && inTag != kZTag_replace);
-
- Z_UInt32 curTagIndex;
- Z_UInt32 totalEntries = sizeof(sNumericParseTags) / sizeof(ZStringTagID);
-
- // Scan through the entire list of numerics
- // for this tag.
- for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
- {
- if (sNumericParseTags[curTagIndex] == inTag)
- return curTagIndex;
- }
-
- debug_str("Didn't find tag");
-
- return 0;
- }
-
-
- /*------------------------------------------------------------------
- ConvertTagIDToString
- ------------------------------------------------------------------*/
-
- const char *
- ZStringParser::ConvertTagIDToString(
- ZStringTagID inTag)
- {
- // The replace tag has no equivalent
- check(inTag != kZTag_Invalid && inTag != kZTag_replace);
-
- Z_UInt32 curTagIndex;
- Z_UInt32 totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag);
-
- // Scan through the entire list of numerics
- // for this tag.
- for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
- {
- if (sParseTags[curTagIndex].fTagID == inTag)
- return sParseTags[curTagIndex].fTagName;
- }
-
- debug_str("Didn't find tag");
-
- return NULL;
- }
-
-
- /*------------------------------------------------------------------
- LookUpTagID [static]
-
- Looks up a tag starting with an ampersand ("&") or a
- left bracket ("<").
- ------------------------------------------------------------------*/
-
- ZStringTagID
- ZStringParser::LookUpTagID(
- const char * inStartOfTag,
- const char * inStringLimit,
- Z_UInt16 & outTagNameLength)
- {
- ZStringTagID zStringTag = kZTag_Invalid;
-
- // Make sure I didn't forget to type an entry
- check(sizeof(sNumericParseTags) == 256 * sizeof(ZStringTagID));
-
- check(inStartOfTag[0] == '&' || inStartOfTag[0] == '<');
-
- // Is it a numeric tag?
- if (inStartOfTag[0] == '&' && inStartOfTag[1] == '#')
- {
- Z_UInt32 numericTagValue = 0;
-
- // There should be at least five characters in the tag
- check(inStringLimit >= inStartOfTag + 6);
- if (inStringLimit < inStartOfTag + 6)
- return kZTag_Invalid;
-
- check(inStartOfTag[2] >= '0' && inStartOfTag[2] <= '9');
- if (inStartOfTag[2] < '0' || inStartOfTag[2] > '9')
- return kZTag_Invalid;
- numericTagValue = (inStartOfTag[2] - '0') * 100;
-
- check(inStartOfTag[3] >= '0' && inStartOfTag[3] <= '9');
- if (inStartOfTag[3] < '0' || inStartOfTag[3] > '9')
- return kZTag_Invalid;
- numericTagValue += (inStartOfTag[3] - '0') * 10;
-
- check(inStartOfTag[4] >= '0' && inStartOfTag[4] <= '9');
- if (inStartOfTag[4] < '0' || inStartOfTag[4] > '9')
- return kZTag_Invalid;
- numericTagValue += (inStartOfTag[4] - '0');
-
- // It should end in a semicolon
- check(inStartOfTag[5] == ';');
- if (inStartOfTag[5] != ';')
- return kZTag_Invalid;
-
- if (numericTagValue >= 256)
- return kZTag_Invalid;
-
- zStringTag = sNumericParseTags[numericTagValue];
- check(zStringTag != kZTag_Invalid);
-
- if (zStringTag != kZTag_Invalid)
- outTagNameLength = 6;
- }
- else
- {
- Z_UInt32 minEntryIndex = 0;
- Z_UInt32 maxEntryIndex = sizeof(sParseTags) / sizeof(ZStringParseTag);
- Z_UInt32 curEntryIndex;
- Z_UInt32 maxNumberOfEntries = maxEntryIndex;
-
- while (true)
- {
- // Cut the search in half.
- curEntryIndex = (minEntryIndex + maxEntryIndex) / 2;
-
- if (curEntryIndex > maxNumberOfEntries)
- return kZTag_Invalid;
-
- Z_SInt32 compareResult;
- compareResult = CompareTagStrings(
- sParseTags[curEntryIndex].fTagName,
- inStartOfTag,
- inStringLimit);
-
- if (compareResult == 0)
- {
- Z_UInt32 tagNameLength = strlen(sParseTags[curEntryIndex].fTagName);
-
- // Make sure we didn't just swallow a portion
- // of one of the tag names.
- if (tagNameLength <= inStringLimit - inStartOfTag)
- {
- zStringTag = sParseTags[curEntryIndex].fTagID;
- outTagNameLength = tagNameLength;
- }
- break;
- }
- else if (compareResult > 0)
- {
- // The tag was less than the value in the table,
- // so we need to search further up the table.
- maxEntryIndex = curEntryIndex - 1;
- }
- else // if (compareResult < 0)
- {
- // The tag was greater than the value in the table,
- // so we need to search further down the table.
- minEntryIndex = curEntryIndex + 1;
- }
-
- // If we converged, but didn't find a match,
- // break out of the loop.
- if (maxEntryIndex < minEntryIndex)
- break;
- }
- }
-
- return zStringTag;
- }
-
-
-
-