home *** CD-ROM | disk | FTP | other *** search
- // Copyright (c) 1994 James Clark
- // See the file COPYING for copying permission.
-
- #ifndef Parser_INCLUDED
- #define Parser_INCLUDED 1
-
- #ifdef __GNUG__
- #pragma interface
- #endif
-
- #include "types.h"
- #include "Attribute.h"
- #include "Attributed.h"
- #include "Boolean.h"
- #include "StringC.h"
- #include "ElementType.h"
- #include "Entity.h"
- #include "Event.h"
- #include "IList.h"
- #include "ISet.h"
- #include "Location.h"
- #include "Owner.h"
- #include "ParserState.h"
- #include "Ptr.h"
- #include "SgmlParser.h"
- #include "StringOf.h"
- #include "Undo.h"
- #include "Vector.h"
-
- #ifdef SP_NAMESPACE
- namespace SP_NAMESPACE {
- #endif
-
- class AllowedParams;
- class Param;
- class ExternalId;
- class PublicId;
- class GroupToken;
- class AllowedGroupTokens;
- struct GroupConnector;
- class AllowedGroupConnectors;
- class AllowedSdParams;
- class Text;
- class AttributeList;
- class AttributeDefinition;
- class AttributeDefinitionList;
- class UnivCharsetDesc;
- class CharsetInfo;
- class CharsetDecl;
- class DeclaredValue;
- struct SdBuilder;
- struct SdParam;
- class Syntax;
- class ElementDefinition;
- class CharSwitcher;
- struct StandardSyntaxSpec;
- class Undo;
- class Decl;
-
- class Parser : private ParserState {
- public:
- Parser(const SgmlParser::Params &);
- Event *nextEvent();
- void parseAll(EventHandler &, const volatile sig_atomic_t *cancelPtr);
- ParserState::sdPointer;
- ParserState::instanceSyntaxPointer;
- ParserState::prologSyntaxPointer;
- ParserState::activateLinkType;
- ParserState::allLinkTypesActivated;
- ParserState::entityManager;
- ParserState::entityCatalog;
- ParserState::baseDtd;
- ParserState::options;
- private:
- Parser(const Parser &); // undefined
- void operator=(const Parser &); // undefined
- Boolean setStandardSyntax(Syntax &syn, const StandardSyntaxSpec &,
- const CharsetInfo &docCharset,
- CharSwitcher &,
- Boolean www);
- Boolean addRefDelimShortref(Syntax &syntax,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- CharSwitcher &switcher);
- Boolean setRefDelimGeneral(Syntax &syntax,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- CharSwitcher &switcher);
- void setRefNames(Syntax &syntax, const CharsetInfo &docCharset, Boolean www);
-
- void giveUp();
- void compileSdModes();
- void compilePrologModes();
- void compileInstanceModes();
- void addNeededShortrefs(Dtd &, const Syntax &);
- Boolean shortrefCanPreemptDelim(const StringC &sr,
- const StringC &d,
- Boolean dIsSr,
- const Syntax &);
- void compileModes(const Mode *modes, int n, const Dtd *);
- void compileNormalMap();
-
- void doInit();
- void doProlog();
- void doDeclSubset();
- void doInstanceStart();
- void doContent();
- void extendNameToken(size_t, const MessageType1 &);
- void extendNumber(size_t, const MessageType1 &);
- void extendHexNumber();
- void extendData();
- void extendS();
- void extendContentS();
- void declSubsetRecover(unsigned startLevel);
- void prologRecover();
- void skipDeclaration(unsigned startLevel);
- Boolean parseElementDecl();
- Boolean parseAttlistDecl();
- Boolean parseNotationDecl();
- Boolean parseEntityDecl();
- Boolean parseShortrefDecl();
- Boolean parseUsemapDecl();
- Boolean parseUselinkDecl();
- Boolean parseDoctypeDeclStart();
- Boolean parseDoctypeDeclEnd(Boolean fake = 0);
- Boolean parseMarkedSectionDeclStart();
- void handleMarkedSectionEnd();
- Boolean parseCommentDecl();
- void emptyCommentDecl();
- Boolean parseExternalId(const AllowedParams &,
- const AllowedParams &,
- Boolean,
- unsigned,
- Param &,
- ExternalId &);
- Boolean parseParam(const AllowedParams &, unsigned, Param &);
- Boolean parseMinimumLiteral(Boolean, Text &);
- Boolean parseAttributeValueLiteral(Boolean, Text &);
- Boolean parseTokenizedAttributeValueLiteral(Boolean, Text &);
- Boolean parseSystemIdentifier(Boolean, Text &);
- Boolean parseParameterLiteral(Boolean, Text &);
- Boolean parseDataTagParameterLiteral(Boolean, Text &);
- // flags for parseLiteral()
- enum {
- literalSingleSpace = 01,
- literalDataTag = 02,
- literalMinimumData = 04,
- // Keep info about delimiters
- literalDelimInfo = 010,
- // Ignore references in the literal
- literalNoProcess = 020,
- // Allow numeric character references to non-SGML characters
- literalNonSgml = 040
- };
- Boolean parseLiteral(Mode litMode, Mode liteMode, size_t maxLength,
- const MessageType1 &tooLongMessage,
- unsigned flags, Text &text);
-
- Boolean parseGroupToken(const AllowedGroupTokens &allow,
- unsigned nestingLevel,
- unsigned declInputLevel,
- unsigned groupInputLevel,
- GroupToken >);
- Boolean parseGroupConnector(const AllowedGroupConnectors &allow,
- unsigned declInputLevel,
- unsigned groupInputLevel,
- GroupConnector &gc);
- Boolean parseGroup(const AllowedGroupTokens &allowToken,
- unsigned declInputLevel,
- Param &parm);
- Boolean parseModelGroup(unsigned nestingLevel, unsigned declInputLevel,
- ModelGroup *&, Mode);
- Boolean parseNameGroup(unsigned declInputLevel, Param &);
- Boolean parseNameTokenGroup(unsigned declInputLevel, Param &);
- Boolean parseDataTagGroup(unsigned nestingLevel, unsigned declInputLevel,
- GroupToken &);
- Boolean parseDataTagTemplateGroup(unsigned nestingLevel,
- unsigned declInputLevel, GroupToken &);
-
- Boolean parseElementNameGroup(unsigned declInputLevel, Param &);
- Boolean parseReservedName(const AllowedParams &allow, Param &parm);
- Boolean parseIndicatedReservedName(const AllowedParams &allow, Param &parm);
- Boolean getReservedName(Syntax::ReservedName *);
- Boolean getIndicatedReservedName(Syntax::ReservedName *);
- Boolean parseAttributeValueParam(Param &parm);
- Boolean parseEntityReference(Boolean isParameter,
- int ignoreLevel,
- ConstPtr<Entity> &entity,
- Ptr<EntityOrigin> &origin);
- ContentToken::OccurrenceIndicator getOccurrenceIndicator(Mode);
- Boolean parseComment(Mode);
- Boolean parseNamedCharRef();
- Boolean parseNumericCharRef(Boolean isHex, Char &, Location &);
- Boolean translateNumericCharRef(Char &ch, Boolean &isSgmlChar);
- Boolean parseDeclarationName(Syntax::ReservedName *, Boolean allowAfdr = 0);
- void paramInvalidToken(Token, const AllowedParams &);
- void groupTokenInvalidToken(Token, const AllowedGroupTokens &);
- void groupConnectorInvalidToken(Token, const AllowedGroupConnectors &);
- ElementType *lookupCreateElement(const StringC &);
- RankStem *lookupCreateRankStem(const StringC &);
- Boolean parseExceptions(unsigned declInputLevel,
- Ptr<ElementDefinition> &def);
- void parsePcdata();
- void parseStartTag();
- ElementType *completeRankStem(const StringC &);
- void handleRankedElement(const ElementType *);
- void parseEmptyStartTag();
- void acceptPcdata(const Location &);
- void acceptStartTag(const ElementType *, StartElementEvent *,
- Boolean netEnabling);
- void handleBadStartTag(const ElementType *, StartElementEvent *,
- Boolean netEnabling);
- void undo(IList<Undo> &);
- Boolean tryStartTag(const ElementType *, StartElementEvent *,
- Boolean netEnabling, IList<Event> &);
- void checkExclusion(const ElementType *e);
- Boolean tryImplyTag(const Location &, unsigned &, unsigned &,
- IList<Undo> &, IList<Event> &);
- void pushElementCheck(const ElementType *, StartElementEvent *,
- Boolean netEnabling);
- void pushElementCheck(const ElementType *, StartElementEvent *,
- IList<Undo> &, IList<Event> &);
- void queueElementEvents(IList<Event> &);
- Boolean parseAttributeSpec(Boolean inDeclaration,
- AttributeList &,
- Boolean &netEnabling,
- Ptr<AttributeDefinitionList> &);
-
- Boolean handleAttributeNameToken(Text &text,
- AttributeList &,
- unsigned &specLength);
- struct AttributeParameter {
- enum Type {
- end,
- name,
- nameToken,
- vi,
- recoverUnquoted
- };
- };
-
- Boolean parseAttributeParameter(Boolean inDecl,
- Boolean allowVi,
- AttributeParameter::Type &result,
- Boolean &netEnabling);
- void extendUnquotedAttributeValue();
-
- Boolean parseAttributeValueSpec(Boolean inDecl,
- const StringC &name,
- AttributeList &atts,
- unsigned &specLength,
- Ptr<AttributeDefinitionList> &newAttDefList);
-
- EndElementEvent *parseEndTag();
- void parseEndTagClose();
- void parseEmptyEndTag();
- void parseNullEndTag();
- void endAllElements();
- void acceptEndTag(EndElementEvent *);
- void endTagEmptyElement(const ElementType *,
- Boolean netEnabling,
- Boolean included,
- const Location &startLoc);
- void implyCurrentElementEnd(const Location &);
- void implyEmptyElementEnd(const ElementType *, Boolean included, const Location &);
- void maybeDefineEntity(const Ptr<Entity> &entity);
- Notation *lookupCreateNotation(const StringC &name);
- Boolean parseExternalEntity(StringC &name,
- Entity::DeclType declType,
- unsigned declInputLevel,
- Param &parm);
- ShortReferenceMap *lookupCreateMap(const StringC &);
- StringC prettifyDelim(const StringC &delim);
- void handleShortref(int index);
- Boolean parseProcessingInstruction();
- Boolean parseAttributed(unsigned declInputLevel, Param &parm,
- Vector<Attributed *> &attributed,
- Boolean &isNotation);
- Boolean parseDeclaredValue(unsigned declInputLevel, Boolean isNotation,
- Param &parm, Owner<DeclaredValue> &value);
- Boolean parseDefaultValue(unsigned declInputLevel, Boolean isNotation,
- Param &parm, const StringC &attributeName,
- Owner<DeclaredValue> &declaredValue,
- Owner<AttributeDefinition> &def,
- Boolean &anyCurrent);
- Boolean reportNonSgmlCharacter();
- void endInstance();
- Boolean implySgmlDecl();
- Boolean scanForSgmlDecl(const CharsetInfo &initCharset);
- void findMissingMinimum(const CharsetInfo &charset, ISet<WideChar> &);
- Boolean parseSgmlDecl();
- Boolean sdParseSgmlDeclRef(SdBuilder &, SdParam &, ExternalId &);
- Boolean sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseScope(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseExplicitSyntax(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseFunction(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseNaming(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseDelim(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseNames(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseEntities(SdBuilder &sdBuilder, SdParam &parm);
- Boolean sdParseFeatures(SdBuilder &sd, SdParam &parm);
- Boolean sdParseAppinfo(SdBuilder &sd, SdParam &parm);
- Boolean sdParseSeealso(SdBuilder &sd, SdParam &parm);
- void requireWWW(SdBuilder &sdBuilder);
- Boolean parseSdParam(const AllowedSdParams &allow, SdParam &);
- Boolean parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str);
- Boolean parseSdSystemIdentifier(Boolean lita, Text &);
- Boolean stringToNumber(const Char *s, size_t length, unsigned long &);
- void sdParamConvertToLiteral(SdParam &parm);
- void sdParamInvalidToken(Token token, const AllowedSdParams &);
- Boolean sdParseCharset(SdBuilder &sdBuilder, SdParam &parm,
- Boolean isDocument,
- CharsetDecl &, UnivCharsetDesc &);
- Boolean sdParseExternalCharset(Sd &, UnivCharsetDesc &desc);
- UnivChar charNameToUniv(Sd &sd, const StringC &name);
- Boolean translateSyntax(CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- WideChar syntaxChar,
- Char &docChar);
- Boolean translateSyntax(SdBuilder &sdBuilder,
- WideChar syntaxChar, Char &docChar);
- Boolean translateSyntax(SdBuilder &sdBuilder,
- const String<SyntaxChar> &syntaxString,
- StringC &docString);
- Boolean translateSyntaxNoSwitch(SdBuilder &sdBuilder,
- WideChar syntaxChar, Char &docChar,
- Number &count);
- Boolean translateName(SdBuilder &sdBuilder,
- const StringC &name,
- StringC &str);
- void translateRange(SdBuilder &sdBuilder, SyntaxChar start,
- SyntaxChar end, ISet<Char> &chars);
- UnivChar translateUniv(UnivChar univChar,
- CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset);
- Boolean univToDescCheck(const CharsetInfo &charset, UnivChar from,
- Char &to);
- Boolean univToDescCheck(const CharsetInfo &charset, UnivChar from,
- Char &to, WideChar &count);
- void translateDocSet(const CharsetInfo &fromCharset,
- const CharsetInfo &toCharset,
- const ISet<Char> &fromSet,
- ISet<Char> &toSet);
- Boolean checkNotFunction(const Syntax &syn, Char c);
- Boolean checkGeneralDelim(const Syntax &syn, const StringC &delim);
- Boolean checkShortrefDelim(const Syntax &syn,
- const CharsetInfo &charset,
- const StringC &delim);
- Boolean checkNmchars(const ISet<Char> &set, const Syntax &syntax);
- void intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
- ISet<WideChar> &inter);
- Boolean checkSwitches(CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset);
- Boolean checkSwitchesMarkup(CharSwitcher &switcher);
-
- const StandardSyntaxSpec *lookupSyntax(const PublicId &id);
- Boolean referencePublic(const PublicId &id, PublicId::TextClass,
- Boolean &givenError);
- void checkIdrefs();
- void checkTaglen(Index tagStartIndex);
- void checkSyntaxNamelen(const Syntax &syn);
- void checkElementAttribute(const ElementType *e, size_t checkFrom = 0);
- void checkDtd(Dtd &dtd);
- Boolean maybeStatusKeyword(const Entity &entity);
- void reportAmbiguity(const LeafContentToken *from,
- const LeafContentToken *to1,
- const LeafContentToken *to2,
- unsigned ambigAndDepth);
- Boolean parseLinktypeDeclStart();
- Boolean parseLinktypeDeclEnd();
- Boolean parseLinkDecl();
- Boolean parseIdlinkDecl();
- Boolean parseLinkSet(Boolean idlink);
- void addIdLinkRule(const StringC &id, IdLinkRule &rule);
- void addLinkRule(LinkSet *linkSet,
- const ElementType *sourceElement,
- const ConstPtr<SourceLinkRuleResource> &linkRule);
- Boolean parseResultElementSpec(unsigned declInputLevel,
- Param &parm,
- Boolean idlink,
- Boolean &implied,
- const ElementType *&resultType,
- AttributeList &attributes);
- LinkSet *lookupCreateLinkSet(const StringC &name);
- ElementType *lookupResultElementType(const StringC &name);
- void endProlog();
- Boolean parseEntityReferenceNameGroup(Boolean &ignore);
- Boolean parseTagNameGroup(Boolean &active);
- void parseGroupStartTag();
- void parseGroupEndTag();
- Boolean skipAttributeSpec();
- Boolean lookingAtStartTag(StringC &gi);
- void implyDtd(const StringC &gi);
- void findMissingTag(const ElementType *e, Vector<const ElementType *> &);
- unsigned paramsSubdocLevel(const SgmlParser::Params &);
- void addCommonAttributes(Dtd &dtd);
- Boolean parseAfdrDecl();
- void setSdOverrides(Sd &sd);
- };
-
- #ifdef SP_NAMESPACE
- }
- #endif
-
- #endif /* not Parser_INCLUDED */
-