MacHack 2001

home *** CD-ROM | disk | FTP | other *** search

/ MacHack 2001 / MacHack 2001.toast / pc / Sessions / Traut / ZStrings / Source / CrossPlatform / ZStringParser.cpp < prev next >

Wrap

C/C++ Source or Header | 2001-06-23 | 31.9 KB | 1,186 lines

/*================================================================== File: ZStringParser.cpp Contains: Class for parsing named ZStrings into platform- specific strings. Written by: Eric Traut Copyright: 2000-2001 Connectix Corporation This source has been placed into the public domain by Connectix Corporation. You have the right to modify, distribute or use this code without any legal limitations or finanicial/licensing requirements. Connectix is not liable for any problems that result from the use of this code. If you have comments, feedback, questions, or would like to submit bug fixes or updates to this code, please email opensource@connectix.com. ==================================================================*/ #include "ZStringParser.h" #include "ZStringDictionary.h" #include "ZString.h" #include <stdio.h> #include <ctype.h> ZStringParser * ZStringParser::sParser = NULL; typedef struct { const char * fTagName; // Tag name (alpha representation) ZStringTagID fTagID; // Internal tag ID Z_Boolean fCheckForPartialTag; // Indicates that the parser should check for a tag missing & } ZStringParseTag; // WARNING: For performance reasons, we use a binary // search lookup, so the following table must remain // sorted alphabetically. if you add more items, // make sure to add them in the appropriate place. static const ZStringParseTag sParseTags[] = { {"À", kZTag_Agrave, true }, {"Å", kZTag_Aring, false }, {"Ã", kZTag_Atilde, true }, {"Ä", kZTag_Auml, true }, {"Ç", kZTag_Ccedil, true }, {"É", kZTag_Eacute, true }, {"Ê", kZTag_Ecirc, true }, {"È", kZTag_Egrave, true }, {"Ë", kZTag_Euml, true }, {"Í", kZTag_Iacute, true }, {"Î", kZTag_Icirc, true }, {"Ì", kZTag_Igrave, true }, {"Ï", kZTag_Iuml, true }, {"Ñ", kZTag_Ntilde, true }, {"Ó", kZTag_Oacute, true }, {"Ô", kZTag_Ocirc, true }, {"Ò", kZTag_Ograve, true }, {"Ø", kZTag_Oslash, true }, {"Õ", kZTag_Otilde, true }, {"Ö", kZTag_Ouml, true }, {"Ú", kZTag_Uacute, true }, {"Û", kZTag_Ucirc, true }, {"Ù", kZTag_Ugrave, true }, {"Ü", kZTag_Uuml, true }, {"á", kZTag_aacute, true }, {"â", kZTag_acirc, true }, {"æ", kZTag_aelig, true }, {"à", kZTag_agrave, true }, {"&", kZTag_amp, false }, {"å", kZTag_aring , false }, {"ã", kZTag_atilde, true }, {"ä", kZTag_auml, true }, {"&bdquo", kZTag_bdquo, true }, {"&bull", kZTag_bull, false }, {"ç", kZTag_ccedil, true }, {"¢", kZTag_cent, false }, {"©", kZTag_copy, false }, {"é", kZTag_eacute, true }, {"ê", kZTag_ecirc, true }, {"è", kZTag_egrave, true }, {"ë", kZTag_euml, true }, {">", kZTag_gt, false }, {"&hellip", kZTag_hellip, false }, {"í", kZTag_iacute, true }, {"î", kZTag_icirc, true }, {"¡", kZTag_iexcl, true }, {"ì", kZTag_igrave, true }, {"¿", kZTag_iquest, true }, {"ï", kZTag_iuml, true }, {"&ldquo", kZTag_ldquo, true }, {"&lsquo", kZTag_lsquo, true }, {"<", kZTag_lt, false }, {"&mdash", kZTag_mdash, true }, {"µ", kZTag_micro, false }, {" ", kZTag_nbsp, true }, {"&ndash", kZTag_ndash, true }, {"ñ", kZTag_ntilde, true }, {"ó", kZTag_oacute, true }, {"ô", kZTag_ocirc, true }, {"ò", kZTag_ograve, true }, {"ø", kZTag_oslash, true }, {"õ", kZTag_otilde, true }, {"ö", kZTag_ouml, true }, {"¶", kZTag_para, false }, {"&pi", kZTag_pi, false }, {"£", kZTag_pound, false }, {"&rdquo", kZTag_rdquo, true }, {"®", kZTag_reg, false }, {"&replace", kZTag_replace, true }, {"&rsquo", kZTag_rsquo, true }, {"&sbquo", kZTag_sbquo, true }, {"ß", kZTag_szlig, true }, {"&trade", kZTag_trade, false }, {"ú", kZTag_uacute, true }, {"û", kZTag_ucirc, true }, {"ù", kZTag_ugrave, true }, {"ü", kZTag_uuml, true }, {"¥", kZTag_yen, false }, {"ÿ", kZTag_yuml, true }, {"<br>", kZTag_br, true } }; static const ZStringTagID sNumericParseTags[] = { kZTag_Invalid, // 000 kZTag_Invalid, // 001 kZTag_Invalid, // 002 kZTag_Invalid, // 003 kZTag_Invalid, // 004 kZTag_Invalid, // 005 kZTag_Invalid, // 006 kZTag_Invalid, // 007 kZTag_Invalid, // 008 kZTag_Invalid, // 009 kZTag_Invalid, // 010 kZTag_Invalid, // 011 kZTag_Invalid, // 012 kZTag_Invalid, // 013 kZTag_Invalid, // 014 kZTag_Invalid, // 015 kZTag_Invalid, // 016 kZTag_Invalid, // 017 kZTag_Invalid, // 018 kZTag_Invalid, // 019 kZTag_Invalid, // 020 kZTag_Invalid, // 021 kZTag_Invalid, // 022 kZTag_Invalid, // 023 kZTag_Invalid, // 024 kZTag_Invalid, // 025 kZTag_Invalid, // 026 kZTag_Invalid, // 027 kZTag_Invalid, // 028 kZTag_Invalid, // 029 kZTag_Invalid, // 030 kZTag_Invalid, // 031 kZTag_Invalid, // 032 kZTag_Invalid, // 033 kZTag_Invalid, // 034 kZTag_Invalid, // 035 kZTag_Invalid, // 036 kZTag_Invalid, // 037 kZTag_amp, // 038 kZTag_Invalid, // 039 kZTag_Invalid, // 040 kZTag_Invalid, // 041 kZTag_Invalid, // 042 kZTag_Invalid, // 043 kZTag_Invalid, // 044 kZTag_Invalid, // 045 kZTag_Invalid, // 046 kZTag_Invalid, // 047 kZTag_Invalid, // 048 kZTag_Invalid, // 049 kZTag_Invalid, // 050 kZTag_Invalid, // 051 kZTag_Invalid, // 052 kZTag_Invalid, // 053 kZTag_Invalid, // 054 kZTag_Invalid, // 055 kZTag_Invalid, // 056 kZTag_Invalid, // 057 kZTag_Invalid, // 058 kZTag_Invalid, // 059 kZTag_lt, // 060 kZTag_Invalid, // 061 kZTag_gt, // 062 kZTag_Invalid, // 063 kZTag_Invalid, // 064 kZTag_Invalid, // 065 kZTag_Invalid, // 066 kZTag_Invalid, // 067 kZTag_Invalid, // 068 kZTag_Invalid, // 069 kZTag_Invalid, // 070 kZTag_Invalid, // 071 kZTag_Invalid, // 072 kZTag_Invalid, // 073 kZTag_Invalid, // 074 kZTag_Invalid, // 075 kZTag_Invalid, // 076 kZTag_Invalid, // 077 kZTag_Invalid, // 078 kZTag_Invalid, // 079 kZTag_Invalid, // 080 kZTag_Invalid, // 081 kZTag_Invalid, // 082 kZTag_Invalid, // 083 kZTag_Invalid, // 084 kZTag_Invalid, // 085 kZTag_Invalid, // 086 kZTag_Invalid, // 087 kZTag_Invalid, // 088 kZTag_Invalid, // 089 kZTag_Invalid, // 090 kZTag_Invalid, // 091 kZTag_Invalid, // 092 kZTag_Invalid, // 093 kZTag_Invalid, // 094 kZTag_Invalid, // 095 kZTag_Invalid, // 096 kZTag_Invalid, // 097 kZTag_Invalid, // 098 kZTag_Invalid, // 099 kZTag_Invalid, // 100 kZTag_Invalid, // 101 kZTag_Invalid, // 102 kZTag_Invalid, // 103 kZTag_Invalid, // 104 kZTag_Invalid, // 105 kZTag_Invalid, // 106 kZTag_Invalid, // 107 kZTag_Invalid, // 108 kZTag_Invalid, // 109 kZTag_Invalid, // 110 kZTag_Invalid, // 111 kZTag_pi, // 112 kZTag_Invalid, // 113 kZTag_Invalid, // 114 kZTag_Invalid, // 115 kZTag_Invalid, // 116 kZTag_Invalid, // 117 kZTag_Invalid, // 118 kZTag_Invalid, // 119 kZTag_Invalid, // 120 kZTag_Invalid, // 121 kZTag_Invalid, // 122 kZTag_Invalid, // 123 kZTag_Invalid, // 124 kZTag_Invalid, // 125 kZTag_Invalid, // 126 kZTag_Invalid, // 127 kZTag_Invalid, // 128 kZTag_Invalid, // 129 kZTag_Invalid, // 130 kZTag_Invalid, // 131 kZTag_bdquo, // 132 kZTag_hellip, // 133 kZTag_Invalid, // 134 kZTag_Invalid, // 135 kZTag_Invalid, // 136 kZTag_Invalid, // 137 kZTag_Invalid, // 138 kZTag_Invalid, // 139 kZTag_Invalid, // 140 kZTag_Invalid, // 141 kZTag_Invalid, // 142 kZTag_Invalid, // 143 kZTag_Invalid, // 144 kZTag_lsquo, // 145 kZTag_rsquo, // 146 kZTag_ldquo, // 147 kZTag_rdquo, // 148 kZTag_bull, // 149 kZTag_ndash, // 150 kZTag_mdash, // 151 kZTag_Invalid, // 152 kZTag_trade, // 153 kZTag_Invalid, // 154 kZTag_Invalid, // 155 kZTag_Invalid, // 156 kZTag_Invalid, // 157 kZTag_Invalid, // 158 kZTag_Invalid, // 159 kZTag_nbsp, // 160 kZTag_iexcl, // 161 kZTag_cent, // 162 kZTag_pound, // 163 kZTag_Invalid, // 164 kZTag_yen, // 165 kZTag_Invalid, // 166 kZTag_Invalid, // 167 kZTag_Invalid, // 168 kZTag_copy, // 169 kZTag_Invalid, // 170 kZTag_Invalid, // 171 kZTag_Invalid, // 172 kZTag_Invalid, // 173 kZTag_reg, // 174 kZTag_Invalid, // 175 kZTag_Invalid, // 176 kZTag_Invalid, // 177 kZTag_Invalid, // 178 kZTag_Invalid, // 179 kZTag_Invalid, // 180 kZTag_micro, // 181 kZTag_para, // 182 kZTag_Invalid, // 183 kZTag_Invalid, // 184 kZTag_Invalid, // 185 kZTag_Invalid, // 186 kZTag_Invalid, // 187 kZTag_Invalid, // 188 kZTag_Invalid, // 189 kZTag_Invalid, // 190 kZTag_iquest, // 191 kZTag_Agrave, // 192 kZTag_Aacute, // 193 kZTag_Acirc, // 194 kZTag_Atilde, // 195 kZTag_Auml, // 196 kZTag_Aring, // 197 kZTag_AElig, // 198 kZTag_Ccedil, // 199 kZTag_Egrave, // 200 kZTag_Eacute, // 201 kZTag_Ecirc, // 202 kZTag_Euml, // 203 kZTag_Igrave, // 204 kZTag_Iacute, // 205 kZTag_Icirc, // 206 kZTag_Iuml, // 207 kZTag_Invalid, // 208 kZTag_Ntilde, // 209 kZTag_Ograve, // 210 kZTag_Oacute, // 211 kZTag_Ocirc, // 212 kZTag_Otilde, // 213 kZTag_Ouml, // 214 kZTag_Invalid, // 215 kZTag_Oslash, // 216 kZTag_Ugrave, // 217 kZTag_Uacute, // 218 kZTag_Ucirc, // 219 kZTag_Uuml, // 220 kZTag_Invalid, // 221 kZTag_Invalid, // 222 kZTag_szlig, // 223 kZTag_agrave, // 224 kZTag_aacute, // 225 kZTag_acirc, // 226 kZTag_atilde, // 227 kZTag_auml, // 228 kZTag_aring, // 229 kZTag_aelig, // 230 kZTag_ccedil, // 231 kZTag_egrave, // 232 kZTag_eacute, // 233 kZTag_ecirc, // 234 kZTag_euml, // 235 kZTag_igrave, // 236 kZTag_iacute, // 237 kZTag_icirc, // 238 kZTag_iuml, // 239 kZTag_Invalid, // 240 kZTag_ntilde, // 241 kZTag_ograve, // 242 kZTag_oacute, // 243 kZTag_ocirc, // 244 kZTag_otilde, // 245 kZTag_ouml, // 246 kZTag_Invalid, // 247 kZTag_oslash, // 248 kZTag_ugrave, // 249 kZTag_uacute, // 250 kZTag_ucirc, // 251 kZTag_uuml, // 252 kZTag_Invalid, // 253 kZTag_Invalid, // 254 kZTag_yuml // 255 }; /*------------------------------------------------------------------ ZToolOptions The defualt constructor contains the default values for the tool. ------------------------------------------------------------------*/ ZToolOptions::ZToolOptions() { // default settings mOutputNumericTags = true; mCategorizeOutput = true; mAllowTagSemicolon = false; mHasOTags = false; mOutputWarnings = true; mConvertHighASCIIChar = true; mPrintErrorsOnly = false; mFlagDuplicates = false; } /*------------------------------------------------------------------ ZToolOptions This constructor sets most of the variables based upon the input values. Two variables, mHasOTags and mPrintErrorsOnly, are exceptions because they are only set to true in a particular instance and do not depend on the user's option choices. ------------------------------------------------------------------*/ ZToolOptions::ZToolOptions( Z_Boolean inOutputNumeric, Z_Boolean inCategorizeOutput, Z_Boolean inAllowSemicolon, Z_Boolean inFlagDuplicates, Z_Boolean inOutputWarnings, Z_Boolean inConvertHighASCIIChar) { mOutputNumericTags = inOutputNumeric; mCategorizeOutput = inCategorizeOutput; mAllowTagSemicolon = inAllowSemicolon; mFlagDuplicates = inFlagDuplicates; mOutputWarnings = inOutputWarnings; mConvertHighASCIIChar = inConvertHighASCIIChar; mHasOTags = false; mPrintErrorsOnly = false; } /*------------------------------------------------------------------ ZStringParser ------------------------------------------------------------------*/ ZStringParser::ZStringParser() { // This is a singleton class check(sParser == NULL); sParser = this; // By default, the two-byte table is all zeros. memset(mTwoByteTable, 0, sizeof(mTwoByteTable)); } /*------------------------------------------------------------------ OverrideTwoByteTable ------------------------------------------------------------------*/ void ZStringParser::OverrideTwoByteTable( Z_UInt8 * inNewTable) { memcpy(mTwoByteTable, inNewTable, sizeof(mTwoByteTable)); } /*------------------------------------------------------------------ SkipOverSpaces This method skips over all spaces. ------------------------------------------------------------------*/ const char * ZStringParser::SkipOverSpaces( const char * inCurrentPtr) { const char * newPtr = inCurrentPtr; while (isspace(*newPtr)) newPtr++; return newPtr; } /*------------------------------------------------------------------ ParseAdditionalParameters This method parses additional parameters contained in the ZString tag. Currently, there is only the limit parameter, in the format limit=# where # may be an arbitrary length. The limit parameter sets the maximum data size allowed for the ZString. ------------------------------------------------------------------*/ Z_Boolean ZStringParser::ParseAdditionalParameters( const char * inCurrentPtr, ZStringParseInfo & outParseInfo) { const char * startOfTagPtr; const char * currentNumPtr; const char * equalSignPtr; startOfTagPtr = SkipOverSpaces(inCurrentPtr); if (*startOfTagPtr == '>') // Found the end marker return true; // Parse a limit parameter if (strncmp(startOfTagPtr, "limit=", 6) == 0) // Found a limit tag { currentNumPtr = strpbrk(startOfTagPtr, " >") - 1; // Go to the last number Z_UInt16 numericTagValue = 0; Z_UInt16 multiple = 1; equalSignPtr = startOfTagPtr + 5; // Parse the number by starting at the end and working back to the equal sign while (currentNumPtr > equalSignPtr) { if (*currentNumPtr < '0' || *currentNumPtr > '9') return false; numericTagValue += (*currentNumPtr - '0') * multiple; multiple *= 10; currentNumPtr--; } outParseInfo.fHasMaxDataLen = true; outParseInfo.fMaxDataLen = numericTagValue; return true; } return false; // None ofthe parameters matched the ones listed here } /*------------------------------------------------------------------ ParseNamedString ------------------------------------------------------------------*/ Z_Boolean ZStringParser::ParseNamedString( const char * inNamedString, ZStringParseInfo & outParseInfo, Z_Boolean inDataIsVolatile) { const char * startOfName; const char * endOfName; const char * endTag; const char * expectedEndTag; // Until we see otherwise, assume the // string is valid. outParseInfo.fValidNamedString = true; // Set the volatile flag outParseInfo.fIsVolatile = inDataIsVolatile; // Set the limit flag (assumed to not be limited) outParseInfo.fHasMaxDataLen = false; // Make sure it starts with a valid tag. if (inNamedString[0] != '<') { debug_str("Badly-formed named string"); goto BadEndTag; } startOfName = SkipOverSpaces(inNamedString+1); // Decide which ending should be expected expectedEndTag = "</Z>"; if (strncmp(startOfName, "Z name=", 7) != 0) { expectedEndTag = "</O>"; if (strncmp(startOfName, "O name=", 7) != 0) { debug_str("Badly-formed named string"); goto BadEndTag; } } outParseInfo.fNamedStringStart = inNamedString; outParseInfo.fNameStr = &inNamedString[8]; // Scan for a space or a right brace. This signals the end // of the tag or additional parameters. endOfName = strpbrk(outParseInfo.fNameStr, " >"); if (endOfName == NULL) { debug_str("Badly-formed named string"); goto BadStringName; } // This part parses additional parameters in the tag while (*endOfName == ' ') { if(!ParseAdditionalParameters(endOfName, outParseInfo)) goto BadStringName; endOfName = strpbrk(endOfName + 1, " >"); // Find the next space or > if (endOfName == NULL) // It is an error if it can't find either { debug_str("Badly-formed named string"); goto BadStringName; } } if (*endOfName != '>') { debug_str("Badly-formed named string"); goto BadStringName; } outParseInfo.fNameStrLen = endOfName - outParseInfo.fNameStr; outParseInfo.fValueStr = endOfName + 1; // Make sure the string is closed off with an end tag. endTag = strstr(outParseInfo.fValueStr, expectedEndTag); if (endTag == NULL) { debug_str("Badly-formed named string"); goto BadEndTag; } outParseInfo.fValueStrLen = endTag - outParseInfo.fValueStr; outParseInfo.fNamedStringLimit = endTag + 4; check(outParseInfo.fNamedStringLimit > outParseInfo.fNamedStringStart); return true; BadEndTag: BadStringName: outParseInfo.fNamedStringLimit = outParseInfo.fNamedStringStart + 256; // doesn't have an end, so assign one outParseInfo.fValidNamedString = false; return false; } /*------------------------------------------------------------------ CreateNewZString ------------------------------------------------------------------*/ void ZStringParser::CreateNewZString( const ZStringParseInfo & inParseInfo, ZString & outDestString) { const char * curInputPtr; const char * limitInputPtr; Z_UInt16 outputCount = 0; // First, scan the string to validate it and count // the number of characters we'll need. curInputPtr = inParseInfo.fValueStr; limitInputPtr = curInputPtr + inParseInfo.fValueStrLen; while (curInputPtr < limitInputPtr) { if (*curInputPtr == '&' || *curInputPtr == '<') { Z_UInt16 tagNameLength; ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength); check(tagID != kZTag_Invalid); if (tagID == kZTag_Invalid) { curInputPtr++; outputCount++; } else if (tagID == kZTag_replace) { // We special-case the "replace" tag because // it doesn't get replaced, and it has two additional // characters. curInputPtr += 8; check(curInputPtr[0] >= '0' && curInputPtr[0] <= '9'); check(curInputPtr[1] >= '0' && curInputPtr[1] <= '9'); curInputPtr += 2; check(curInputPtr <= limitInputPtr); outputCount += 10; } else { curInputPtr += tagNameLength; outputCount += GetTagReplacement(tagID, NULL); } } else { outputCount += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1; curInputPtr += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1; } } // Allocate the space for the string. outDestString.AllocateData(outputCount); if (outDestString.GetData() != NULL) { // Next, do a second scan to replace the characters. curInputPtr = inParseInfo.fValueStr; limitInputPtr = curInputPtr + inParseInfo.fValueStrLen; char * curOutputPtr = outDestString.GetData()->GetDataArray(); while (curInputPtr < limitInputPtr) { if (*curInputPtr == '&' || *curInputPtr == '<') { Z_UInt16 tagNameLength; ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength); if (tagID == kZTag_Invalid) { *curOutputPtr++ = *curInputPtr++; } else if (tagID == kZTag_replace) { // We special-case the "replace" tag because // it doesn't get replaced, and it has two additional // characters. memcpy(curOutputPtr, curInputPtr, 10); curOutputPtr += 10; curInputPtr += 10; } else { curOutputPtr += GetTagReplacement(tagID, curOutputPtr); curInputPtr += tagNameLength; } } else { Z_UInt8 charCount = mTwoByteTable[(Z_UInt8)*curInputPtr] + 1; while (charCount > 0) { *curOutputPtr++ = *curInputPtr++; charCount--; } } } } // Register the string with the dictionary so // we don't have to continue parsing it each time. ZStringDictionary::GetZStringDictionary().RegisterString(inParseInfo, outDestString); } /*------------------------------------------------------------------ ConvertNamedStringToTag [static] This method converts the parsed named string to a named string that contains only tags. ------------------------------------------------------------------*/ Z_Boolean ZStringParser::ConvertNamedStringToTag( const ZStringParseInfo & inParseInfo, const ZToolOptions & inOptions, ZString & outDestString, ZParserWarningType & outWarningType) { check(inParseInfo.fValidNamedString); // Initialize the warning type to none outWarningType = kZParser_NoWarnings; // Copy the portion of the named string before the actual string prototype outDestString.SetString(inParseInfo.fNamedStringStart, inParseInfo.fValueStr - inParseInfo.fNamedStringStart); // Now, copy the prototype one character at a time until we hit a tag const char * curInputPtr; char tagString[32]; curInputPtr = inParseInfo.fValueStr; while (curInputPtr < inParseInfo.fValueStr + inParseInfo.fValueStrLen) { if (*curInputPtr == '&' || *curInputPtr == '<') { ZStringTagID tagID; Z_UInt16 tagLength; Z_Boolean isNumeric; isNumeric = (curInputPtr[0] == '&' && curInputPtr[1] == '#'); tagID = LookUpTagID(curInputPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength); check(tagID != kZTag_Invalid); if (tagID == kZTag_Invalid) { return false; } else if (tagID == kZTag_replace) { ZString replaceString; replaceString.SetString(curInputPtr, tagLength); outDestString += replaceString; curInputPtr += tagLength; } else if (*curInputPtr == '<') { memcpy(tagString, curInputPtr, tagLength); tagString[tagLength] = '\0'; outDestString += tagString; curInputPtr += tagLength; } else if (inOptions.mOutputNumericTags) // Output numeric tags { sprintf(tagString, "&#%.3d;", ConvertTagIDToNumeric(tagID)); outDestString += tagString; curInputPtr += tagLength; } else // Output alpha tags { sprintf(tagString, "%s", ConvertTagIDToString(tagID)); outDestString += tagString; curInputPtr += tagLength; } if (!isNumeric && inOptions.mAllowTagSemicolon && *curInputPtr == ';') { // for alphabetic tags, skips over ;'s at the end if option is enabled curInputPtr++; } } else { unsigned char charValue = (unsigned char)(*curInputPtr); if (charValue > 127 && inOptions.mConvertHighASCIIChar) // if we want to try to convert high ASCII characters { ZStringTagID zStringTag = sNumericParseTags[charValue]; if (zStringTag != kZTag_Invalid) { // Translate into html code (based on numeric or alpha option) if (inOptions.mOutputNumericTags) sprintf(tagString, "&#%.3d;", ConvertTagIDToNumeric(zStringTag)); else sprintf(tagString, "%s", ConvertTagIDToString(zStringTag)); outDestString += tagString; // Add the tag to the stream curInputPtr++; outWarningType |= kZParser_ChangedString; } else // Append the character to the output string { outDestString += *curInputPtr++; outWarningType |= kZParser_HasHighASCII; } } else if (inOptions.mOutputWarnings) // Only check if we are printing warnings { if (FindPossibleTag(curInputPtr, inParseInfo)) outWarningType |= kZParser_FoundPossibleTag; outDestString += *curInputPtr++; } else outDestString += *curInputPtr++; } } // Finally, copy the rest of the named string ZString stringEnd; stringEnd.SetString(inParseInfo.fValueStr + inParseInfo.fValueStrLen, inParseInfo.fNamedStringLimit - (inParseInfo.fValueStr + inParseInfo.fValueStrLen)); outDestString += stringEnd; return true; } /*------------------------------------------------------------------ FindPossibleTag This method searches the parse tags to see if one of them matches the current input. It returns true if it does and false otherwise. ------------------------------------------------------------------*/ Z_Boolean ZStringParser::FindPossibleTag( const char * inCurInputPtr, const ZStringParseInfo & inParseInfo) { Z_UInt32 strLength; Z_UInt32 curTagIndex; Z_UInt32 totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag); if (!isalpha(*inCurInputPtr)) return false; // Scan through the list of numerics for possible match for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++) { // We are ignoring the tags that could be stand-alone words or parts of words if (!sParseTags[curTagIndex].fCheckForPartialTag) continue; strLength = strlen(sParseTags[curTagIndex].fTagName) - 1; // we are ignoring the initial & if ((inCurInputPtr + strLength) <= inParseInfo.fValueStr + inParseInfo.fValueStrLen) { if (strncmp(inCurInputPtr, (sParseTags[curTagIndex].fTagName+1), strLength)==0) if (sParseTags[curTagIndex].fTagID == kZTag_replace) { if (inCurInputPtr[7] >= '0' && inCurInputPtr[7] <= '9' && inCurInputPtr[8] >= '0' && inCurInputPtr[8] <= '9') return true; } else return true; } } return false; } /*------------------------------------------------------------------ CheckDataLength ------------------------------------------------------------------*/ Z_Boolean ZStringParser::CheckDataLength( const ZStringParseInfo & inParseInfo) { Z_UInt16 stringLen = inParseInfo.fValueStrLen; const char * curPtr; char * searchString = new char[stringLen+1]; char * endOfStringPtr = searchString + stringLen; strncpy(searchString, inParseInfo.fValueStr, stringLen); searchString[stringLen] = '\0'; curPtr = searchString; while (curPtr < endOfStringPtr) { curPtr = strpbrk(curPtr, "&"); if (curPtr == NULL) // No tag found so break break; // if it is numeric, it is a set length of 6 characters (ex: {) if ((curPtr+1) < endOfStringPtr && curPtr[1] == '#') { stringLen -= 5; // 6 characters represent 1 letter, so only subtract (6-1)=5 } else // It is alpha tag { ZStringTagID tagID; Z_UInt16 tagLength; tagID = LookUpTagID(curPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength); if (tagID != kZTag_Invalid) // No tag ID found, so ignore this tag. stringLen -= (tagLength - 1); } curPtr++; } delete [] searchString; return (stringLen > inParseInfo.fMaxDataLen); } /*------------------------------------------------------------------ CompareTagStrings ------------------------------------------------------------------*/ Z_SInt32 ZStringParser::CompareTagStrings( const char * inTagFromTable, const char * inParseString, const char * inParseStringLimit) { while (*inTagFromTable != '\0' && inParseString < inParseStringLimit) { if (*inTagFromTable < *inParseString) return -1; else if (*inTagFromTable > *inParseString) return 1; inTagFromTable++; inParseString++; } return 0; } /*------------------------------------------------------------------ ConvertTagIDToNumeric ------------------------------------------------------------------*/ Z_UInt32 ZStringParser::ConvertTagIDToNumeric( ZStringTagID inTag) { // The replace tag has no equivalent check(inTag != kZTag_Invalid && inTag != kZTag_replace); Z_UInt32 curTagIndex; Z_UInt32 totalEntries = sizeof(sNumericParseTags) / sizeof(ZStringTagID); // Scan through the entire list of numerics // for this tag. for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++) { if (sNumericParseTags[curTagIndex] == inTag) return curTagIndex; } debug_str("Didn't find tag"); return 0; } /*------------------------------------------------------------------ ConvertTagIDToString ------------------------------------------------------------------*/ const char * ZStringParser::ConvertTagIDToString( ZStringTagID inTag) { // The replace tag has no equivalent check(inTag != kZTag_Invalid && inTag != kZTag_replace); Z_UInt32 curTagIndex; Z_UInt32 totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag); // Scan through the entire list of numerics // for this tag. for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++) { if (sParseTags[curTagIndex].fTagID == inTag) return sParseTags[curTagIndex].fTagName; } debug_str("Didn't find tag"); return NULL; } /*------------------------------------------------------------------ LookUpTagID [static] Looks up a tag starting with an ampersand ("&") or a left bracket ("<"). ------------------------------------------------------------------*/ ZStringTagID ZStringParser::LookUpTagID( const char * inStartOfTag, const char * inStringLimit, Z_UInt16 & outTagNameLength) { ZStringTagID zStringTag = kZTag_Invalid; // Make sure I didn't forget to type an entry check(sizeof(sNumericParseTags) == 256 * sizeof(ZStringTagID)); check(inStartOfTag[0] == '&' || inStartOfTag[0] == '<'); // Is it a numeric tag? if (inStartOfTag[0] == '&' && inStartOfTag[1] == '#') { Z_UInt32 numericTagValue = 0; // There should be at least five characters in the tag check(inStringLimit >= inStartOfTag + 6); if (inStringLimit < inStartOfTag + 6) return kZTag_Invalid; check(inStartOfTag[2] >= '0' && inStartOfTag[2] <= '9'); if (inStartOfTag[2] < '0' || inStartOfTag[2] > '9') return kZTag_Invalid; numericTagValue = (inStartOfTag[2] - '0') * 100; check(inStartOfTag[3] >= '0' && inStartOfTag[3] <= '9'); if (inStartOfTag[3] < '0' || inStartOfTag[3] > '9') return kZTag_Invalid; numericTagValue += (inStartOfTag[3] - '0') * 10; check(inStartOfTag[4] >= '0' && inStartOfTag[4] <= '9'); if (inStartOfTag[4] < '0' || inStartOfTag[4] > '9') return kZTag_Invalid; numericTagValue += (inStartOfTag[4] - '0'); // It should end in a semicolon check(inStartOfTag[5] == ';'); if (inStartOfTag[5] != ';') return kZTag_Invalid; if (numericTagValue >= 256) return kZTag_Invalid; zStringTag = sNumericParseTags[numericTagValue]; check(zStringTag != kZTag_Invalid); if (zStringTag != kZTag_Invalid) outTagNameLength = 6; } else { Z_UInt32 minEntryIndex = 0; Z_UInt32 maxEntryIndex = sizeof(sParseTags) / sizeof(ZStringParseTag); Z_UInt32 curEntryIndex; Z_UInt32 maxNumberOfEntries = maxEntryIndex; while (true) { // Cut the search in half. curEntryIndex = (minEntryIndex + maxEntryIndex) / 2; if (curEntryIndex > maxNumberOfEntries) return kZTag_Invalid; Z_SInt32 compareResult; compareResult = CompareTagStrings( sParseTags[curEntryIndex].fTagName, inStartOfTag, inStringLimit); if (compareResult == 0) { Z_UInt32 tagNameLength = strlen(sParseTags[curEntryIndex].fTagName); // Make sure we didn't just swallow a portion // of one of the tag names. if (tagNameLength <= inStringLimit - inStartOfTag) { zStringTag = sParseTags[curEntryIndex].fTagID; outTagNameLength = tagNameLength; } break; } else if (compareResult > 0) { // The tag was less than the value in the table, // so we need to search further up the table. maxEntryIndex = curEntryIndex - 1; } else // if (compareResult < 0) { // The tag was greater than the value in the table, // so we need to search further down the table. minEntryIndex = curEntryIndex + 1; } // If we converged, but didn't find a match, // break out of the loop. if (maxEntryIndex < minEntryIndex) break; } } return zStringTag; }