SAXParser.hpp

Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: SAXParser.hpp 673975 2008-07-04 09:23:56Z borisk $
00020  */
00021 
00022 #if !defined(XERCESC_INCLUDE_GUARD_SAXPARSER_HPP)
00023 #define XERCESC_INCLUDE_GUARD_SAXPARSER_HPP
00024 
00025 #include <xercesc/sax/Parser.hpp>
00026 #include <xercesc/internal/VecAttrListImpl.hpp>
00027 #include <xercesc/framework/XMLDocumentHandler.hpp>
00028 #include <xercesc/framework/XMLElementDecl.hpp>
00029 #include <xercesc/framework/XMLEntityHandler.hpp>
00030 #include <xercesc/framework/XMLErrorReporter.hpp>
00031 #include <xercesc/framework/XMLBuffer.hpp>
00032 #include <xercesc/util/SecurityManager.hpp>
00033 #include <xercesc/validators/common/Grammar.hpp>
00034 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00035 
00036 
00037 XERCES_CPP_NAMESPACE_BEGIN
00038 
00039 
00040 class DocumentHandler;
00041 class EntityResolver;
00042 class XMLPScanToken;
00043 class XMLScanner;
00044 class XMLValidator;
00045 class GrammarResolver;
00046 class XMLGrammarPool;
00047 class XMLEntityResolver;
00048 class XMLResourceIdentifier;
00049 class PSVIHandler;
00050 
00069 class PARSERS_EXPORT SAXParser :
00070 
00071     public XMemory
00072     , public Parser
00073     , public XMLDocumentHandler
00074     , public XMLErrorReporter
00075     , public XMLEntityHandler
00076     , public DocTypeHandler
00077 {
00078 public :
00079     // -----------------------------------------------------------------------
00080     //  Class types
00081     // -----------------------------------------------------------------------
00090     enum ValSchemes
00091     {
00092         Val_Never
00093         , Val_Always
00094         , Val_Auto
00095     };
00096 
00097 
00098     // -----------------------------------------------------------------------
00099     //  Constructors and Destructor
00100     // -----------------------------------------------------------------------
00111     SAXParser
00112     (
00113           XMLValidator*   const valToAdopt = 0
00114         , MemoryManager*  const manager = XMLPlatformUtils::fgMemoryManager
00115         , XMLGrammarPool* const gramPool = 0
00116     );
00117 
00121     ~SAXParser();
00123 
00124 
00125     // -----------------------------------------------------------------------
00126     //  Getter Methods
00127     // -----------------------------------------------------------------------
00136     DocumentHandler* getDocumentHandler();
00137 
00144     const DocumentHandler* getDocumentHandler() const;
00145 
00152     EntityResolver* getEntityResolver();
00153 
00160     const EntityResolver* getEntityResolver() const;
00161 
00168     XMLEntityResolver* getXMLEntityResolver();
00169 
00176     const XMLEntityResolver* getXMLEntityResolver() const;
00177 
00184     ErrorHandler* getErrorHandler();
00185 
00192     const ErrorHandler* getErrorHandler() const;
00193 
00200     PSVIHandler* getPSVIHandler();
00201 
00208     const PSVIHandler* getPSVIHandler() const;
00209 
00216     const XMLValidator& getValidator() const;
00217 
00225     ValSchemes getValidationScheme() const;
00226 
00237     bool getDoSchema() const;
00238 
00249     bool getValidationSchemaFullChecking() const;
00250 
00261     bool getIdentityConstraintChecking() const;
00262 
00273     int getErrorCount() const;
00274 
00284     bool getDoNamespaces() const;
00285 
00295     bool getExitOnFirstFatalError() const;
00296 
00307     bool getValidationConstraintFatal() const;
00308 
00328     XMLCh* getExternalSchemaLocation() const;
00329 
00349     XMLCh* getExternalNoNamespaceSchemaLocation() const;
00350 
00366     SecurityManager* getSecurityManager() const;
00367 
00379     bool getLoadExternalDTD() const;
00380 
00392     bool getLoadSchema() const;
00393 
00404     bool isCachingGrammarFromParse() const;
00405 
00416     bool isUsingCachedGrammarInParse() const;
00417 
00429     bool getCalculateSrcOfs() const;
00430 
00441     bool getStandardUriConformant() const;
00442 
00449     Grammar* getGrammar(const XMLCh* const nameSpaceKey);
00450 
00456     Grammar* getRootGrammar();
00457 
00464     const XMLCh* getURIText(unsigned int uriId) const;
00465 
00472     XMLFilePos getSrcOffset() const;
00473 
00485     bool getGenerateSyntheticAnnotations() const;
00486 
00494     bool getValidateAnnotations() const;
00495 
00503     bool getIgnoreCachedDTD() const;
00504 
00512     bool getIgnoreAnnotations() const;
00513 
00521     bool getDisableDefaultEntityResolution() const;
00522 
00530     bool getSkipDTDValidation() const;
00531 
00539     bool getHandleMultipleImports() const;
00541 
00542 
00543     // -----------------------------------------------------------------------
00544     //  Setter methods
00545     // -----------------------------------------------------------------------
00546 
00559     void setGenerateSyntheticAnnotations(const bool newValue);
00560 
00568     void setValidateAnnotations(const bool newValue);
00569 
00583     void setDoNamespaces(const bool newState);
00584 
00601     void setValidationScheme(const ValSchemes newScheme);
00602 
00618     void setDoSchema(const bool newState);
00619 
00636     void setValidationSchemaFullChecking(const bool schemaFullChecking);
00637 
00649     void setIdentityConstraintChecking(const bool identityConstraintChecking);
00650 
00666     void setExitOnFirstFatalError(const bool newState);
00667 
00687     void setValidationConstraintFatal(const bool newState);
00688 
00709     void setExternalSchemaLocation(const XMLCh* const schemaLocation);
00710 
00719     void setExternalSchemaLocation(const char* const schemaLocation);
00720 
00735     void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
00736 
00745     void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
00746 
00762     void setSecurityManager(SecurityManager* const securityManager);
00763 
00780     void setLoadExternalDTD(const bool newState);
00781 
00797     void setLoadSchema(const bool newState);
00798 
00816     void cacheGrammarFromParse(const bool newState);
00817 
00836     void useCachedGrammarInParse(const bool newState);
00837 
00850     void setCalculateSrcOfs(const bool newState);
00851 
00862     void setStandardUriConformant(const bool newState);
00863 
00871     void useScanner(const XMLCh* const scannerName);
00872 
00883     void setInputBufferSize(const XMLSize_t bufferSize);
00884 
00899     void setIgnoreCachedDTD(const bool newValue);
00900 
00910     void setIgnoreAnnotations(const bool newValue);
00911 
00926     void setDisableDefaultEntityResolution(const bool newValue);
00927 
00940     void setSkipDTDValidation(const bool newValue);
00941 
00953     void setHandleMultipleImports(const bool newValue);
00955 
00956 
00957     // -----------------------------------------------------------------------
00958     //  Advanced document handler list maintenance methods
00959     // -----------------------------------------------------------------------
00960 
00978     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00979 
00991     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00993 
00994 
00995     // -----------------------------------------------------------------------
00996     //  Progressive scan methods
00997     // -----------------------------------------------------------------------
00998 
01001 
01029     bool parseFirst
01030     (
01031         const   XMLCh* const    systemId
01032         ,       XMLPScanToken&  toFill
01033     );
01034 
01062     bool parseFirst
01063     (
01064         const   char* const     systemId
01065         ,       XMLPScanToken&  toFill
01066     );
01067 
01095     bool parseFirst
01096     (
01097         const   InputSource&    source
01098         ,       XMLPScanToken&  toFill
01099     );
01100 
01125     bool parseNext(XMLPScanToken& token);
01126 
01148     void parseReset(XMLPScanToken& token);
01149 
01151 
01152     // -----------------------------------------------------------------------
01153     //  Grammar preparsing interface
01154     // -----------------------------------------------------------------------
01155 
01185     Grammar* loadGrammar(const InputSource& source,
01186                          const Grammar::GrammarType grammarType,
01187                          const bool toCache = false);
01188 
01214     Grammar* loadGrammar(const XMLCh* const systemId,
01215                          const Grammar::GrammarType grammarType,
01216                          const bool toCache = false);
01217 
01242     Grammar* loadGrammar(const char* const systemId,
01243                          const Grammar::GrammarType grammarType,
01244                          const bool toCache = false);
01245 
01249     void resetCachedGrammarPool();
01250 
01252 
01253 
01254     // -----------------------------------------------------------------------
01255     //  Implementation of the SAX Parser interface
01256     // -----------------------------------------------------------------------
01257 
01269     virtual void parse(const InputSource& source);
01270 
01280     virtual void parse(const XMLCh* const systemId);
01281 
01289     virtual void parse(const char* const systemId);
01290 
01301     virtual void setDocumentHandler(DocumentHandler* const handler);
01302 
01312     virtual void setDTDHandler(DTDHandler* const handler);
01313 
01324     virtual void setErrorHandler(ErrorHandler* const handler);
01325 
01336     virtual void setPSVIHandler(PSVIHandler* const handler);
01337 
01353     virtual void setEntityResolver(EntityResolver* const resolver);
01354 
01370     virtual void setXMLEntityResolver(XMLEntityResolver* const resolver);
01371 
01373 
01374 
01375     // -----------------------------------------------------------------------
01376     //  Implementation of the XMLDocumentHandler interface
01377     // -----------------------------------------------------------------------
01378 
01396     virtual void docCharacters
01397     (
01398         const   XMLCh* const    chars
01399         , const XMLSize_t       length
01400         , const bool            cdataSection
01401     );
01402 
01412     virtual void docComment
01413     (
01414         const   XMLCh* const    comment
01415     );
01416 
01436     virtual void docPI
01437     (
01438         const   XMLCh* const    target
01439         , const XMLCh* const    data
01440     );
01441 
01453     virtual void endDocument();
01454 
01474     virtual void endElement
01475     (
01476         const   XMLElementDecl& elemDecl
01477         , const unsigned int    urlId
01478         , const bool            isRoot
01479         , const XMLCh* const    elemPrefix
01480     );
01481 
01492     virtual void endEntityReference
01493     (
01494         const   XMLEntityDecl&  entDecl
01495     );
01496 
01516     virtual void ignorableWhitespace
01517     (
01518         const   XMLCh* const    chars
01519         , const XMLSize_t       length
01520         , const bool            cdataSection
01521     );
01522 
01527     virtual void resetDocument();
01528 
01539     virtual void startDocument();
01540 
01567     virtual void startElement
01568     (
01569         const   XMLElementDecl&         elemDecl
01570         , const unsigned int            urlId
01571         , const XMLCh* const            elemPrefix
01572         , const RefVectorOf<XMLAttr>&   attrList
01573         , const XMLSize_t               attrCount
01574         , const bool                    isEmpty
01575         , const bool                    isRoot
01576     );
01577 
01587     virtual void startEntityReference
01588     (
01589         const   XMLEntityDecl&  entDecl
01590     );
01591 
01609     virtual void XMLDecl
01610     (
01611         const   XMLCh* const    versionStr
01612         , const XMLCh* const    encodingStr
01613         , const XMLCh* const    standaloneStr
01614         , const XMLCh* const    actualEncodingStr
01615     );
01617 
01618 
01619     // -----------------------------------------------------------------------
01620     //  Implementation of the XMLErrorReporter interface
01621     // -----------------------------------------------------------------------
01622 
01648     virtual void error
01649     (
01650         const   unsigned int                errCode
01651         , const XMLCh* const                msgDomain
01652         , const XMLErrorReporter::ErrTypes  errType
01653         , const XMLCh* const                errorText
01654         , const XMLCh* const                systemId
01655         , const XMLCh* const                publicId
01656         , const XMLFileLoc                  lineNum
01657         , const XMLFileLoc                  colNum
01658     );
01659 
01668     virtual void resetErrors();
01670 
01671 
01672     // -----------------------------------------------------------------------
01673     //  Implementation of the XMLEntityHandler interface
01674     // -----------------------------------------------------------------------
01675 
01689     virtual void endInputSource(const InputSource& inputSource);
01690 
01705     virtual bool expandSystemId
01706     (
01707         const   XMLCh* const    systemId
01708         ,       XMLBuffer&      toFill
01709     );
01710 
01718     virtual void resetEntities();
01719 
01736     virtual InputSource* resolveEntity
01737     (
01738         XMLResourceIdentifier* resourceIdentifier
01739     );
01740 
01752     virtual void startInputSource(const InputSource& inputSource);
01754 
01755 
01756     // -----------------------------------------------------------------------
01757     //  Implementation of the Deprecated DocTypeHandler Interface
01758     // -----------------------------------------------------------------------
01775     virtual void attDef
01776     (
01777         const   DTDElementDecl& elemDecl
01778         , const DTDAttDef&      attDef
01779         , const bool            ignore
01780     );
01781 
01791     virtual void doctypeComment
01792     (
01793         const   XMLCh* const    comment
01794     );
01795 
01814     virtual void doctypeDecl
01815     (
01816         const   DTDElementDecl& elemDecl
01817         , const XMLCh* const    publicId
01818         , const XMLCh* const    systemId
01819         , const bool            hasIntSubset
01820         , const bool            hasExtSubset = false
01821     );
01822 
01836     virtual void doctypePI
01837     (
01838         const   XMLCh* const    target
01839         , const XMLCh* const    data
01840     );
01841 
01853     virtual void doctypeWhitespace
01854     (
01855         const   XMLCh* const    chars
01856         , const XMLSize_t       length
01857     );
01858 
01871     virtual void elementDecl
01872     (
01873         const   DTDElementDecl& decl
01874         , const bool            isIgnored
01875     );
01876 
01887     virtual void endAttList
01888     (
01889         const   DTDElementDecl& elemDecl
01890     );
01891 
01898     virtual void endIntSubset();
01899 
01906     virtual void endExtSubset();
01907 
01922     virtual void entityDecl
01923     (
01924         const   DTDEntityDecl&  entityDecl
01925         , const bool            isPEDecl
01926         , const bool            isIgnored
01927     );
01928 
01933     virtual void resetDocType();
01934 
01947     virtual void notationDecl
01948     (
01949         const   XMLNotationDecl&    notDecl
01950         , const bool                isIgnored
01951     );
01952 
01963     virtual void startAttList
01964     (
01965         const   DTDElementDecl& elemDecl
01966     );
01967 
01974     virtual void startIntSubset();
01975 
01982     virtual void startExtSubset();
01983 
01996     virtual void TextDecl
01997     (
01998         const   XMLCh* const    versionStr
01999         , const XMLCh* const    encodingStr
02000     );
02002 
02003 protected :
02004     // -----------------------------------------------------------------------
02005     //  Protected Methods
02006     // -----------------------------------------------------------------------
02013     const XMLScanner& getScanner() const;
02014 
02019     GrammarResolver* getGrammarResolver() const;
02020 
02021 
02022 private:
02023     // -----------------------------------------------------------------------
02024     //  Unimplemented constructors and operators
02025     // -----------------------------------------------------------------------
02026     SAXParser(const SAXParser&);
02027     SAXParser& operator=(const SAXParser&);
02028 
02029     // -----------------------------------------------------------------------
02030     //  Initialize/Cleanup methods
02031     // -----------------------------------------------------------------------
02032     void initialize();
02033     void cleanUp();
02034     void resetInProgress();
02035 
02036     // -----------------------------------------------------------------------
02037     //  Private data members
02038     //
02039     //  fAttrList
02040     //      A temporary implementation of the basic SAX attribute list
02041     //      interface. We use this one over and over on each startElement
02042     //      event to allow SAX-like access to the element attributes.
02043     //
02044     //  fDocHandler
02045     //      The installed SAX doc handler, if any. Null if none.
02046     //
02047     //  fDTDHandler
02048     //      The installed SAX DTD handler, if any. Null if none.
02049     //
02050     //  fElemDepth
02051     //      This is used to track the element nesting depth, so that we can
02052     //      know when we are inside content. This is so we can ignore char
02053     //      data outside of content.
02054     //
02055     //  fEntityResolver
02056     //      The installed SAX entity handler, if any. Null if none.
02057     //
02058     //  fErrorHandler
02059     //      The installed SAX error handler, if any. Null if none.
02060     //
02061     //  fPSVIHandler
02062     //      The installed PSVI handler, if any. Null if none.
02063     //
02064     //  fAdvDHCount
02065     //  fAdvDHList
02066     //  fAdvDHListSize
02067     //      This is an array of pointers to XMLDocumentHandlers, which is
02068     //      how we see installed advanced document handlers. There will
02069     //      usually not be very many at all, so a simple array is used
02070     //      instead of a collection, for performance. It will grow if needed,
02071     //      but that is unlikely.
02072     //
02073     //      The count is how many handlers are currently installed. The size
02074     //      is how big the array itself is (for expansion purposes.) When
02075     //      count == size, is time to expand.
02076     //
02077     //  fParseInProgress
02078     //      This flag is set once a parse starts. It is used to prevent
02079     //      multiple entrance or reentrance of the parser.
02080     //
02081     //  fScanner
02082     //      The scanner being used by this parser. It is created internally
02083     //      during construction.
02084     //
02085     //   fGrammarPool
02086     //      The grammar pool passed from external application (through derivatives).
02087     //      which could be 0, not owned.
02088     //
02089     // -----------------------------------------------------------------------
02090     bool                 fParseInProgress;
02091     XMLSize_t            fElemDepth;
02092     XMLSize_t            fAdvDHCount;
02093     XMLSize_t            fAdvDHListSize;
02094     VecAttrListImpl      fAttrList;
02095     DocumentHandler*     fDocHandler;
02096     DTDHandler*          fDTDHandler;
02097     EntityResolver*      fEntityResolver;
02098     XMLEntityResolver*   fXMLEntityResolver;
02099     ErrorHandler*        fErrorHandler;
02100     PSVIHandler*         fPSVIHandler;
02101     XMLDocumentHandler** fAdvDHList;
02102     XMLScanner*          fScanner;
02103     GrammarResolver*     fGrammarResolver;
02104     XMLStringPool*       fURIStringPool;
02105     XMLValidator*        fValidator;
02106     MemoryManager*       fMemoryManager;
02107     XMLGrammarPool*      fGrammarPool;
02108     XMLBuffer            fElemQNameBuf;
02109 };
02110 
02111 
02112 // ---------------------------------------------------------------------------
02113 //  SAXParser: Getter methods
02114 // ---------------------------------------------------------------------------
02115 inline DocumentHandler* SAXParser::getDocumentHandler()
02116 {
02117     return fDocHandler;
02118 }
02119 
02120 inline const DocumentHandler* SAXParser::getDocumentHandler() const
02121 {
02122     return fDocHandler;
02123 }
02124 
02125 inline EntityResolver* SAXParser::getEntityResolver()
02126 {
02127     return fEntityResolver;
02128 }
02129 
02130 inline XMLEntityResolver* SAXParser::getXMLEntityResolver()
02131 {
02132     return fXMLEntityResolver;
02133 }
02134 
02135 inline const XMLEntityResolver* SAXParser::getXMLEntityResolver() const
02136 {
02137     return fXMLEntityResolver;
02138 }
02139 
02140 inline const EntityResolver* SAXParser::getEntityResolver() const
02141 {
02142     return fEntityResolver;
02143 }
02144 
02145 inline ErrorHandler* SAXParser::getErrorHandler()
02146 {
02147     return fErrorHandler;
02148 }
02149 
02150 inline const ErrorHandler* SAXParser::getErrorHandler() const
02151 {
02152     return fErrorHandler;
02153 }
02154 
02155 inline PSVIHandler* SAXParser::getPSVIHandler()
02156 {
02157     return fPSVIHandler;
02158 }
02159 
02160 inline const PSVIHandler* SAXParser::getPSVIHandler() const
02161 {
02162     return fPSVIHandler;
02163 }
02164 
02165 inline const XMLScanner& SAXParser::getScanner() const
02166 {
02167     return *fScanner;
02168 }
02169 
02170 inline GrammarResolver* SAXParser::getGrammarResolver() const
02171 {
02172     return fGrammarResolver;
02173 }
02174 
02175 XERCES_CPP_NAMESPACE_END
02176 
02177 #endif

Generated on Wed Feb 18 07:56:10 2009 for Xerces-C++ by  doxygen 1.5.4