pxml.h

Go to the documentation of this file.
00001 /*
00002  * pxml.h
00003  *
00004  * XML parser support
00005  *
00006  * Portable Windows Library
00007  *
00008  * Copyright (c) 2002 Equivalence Pty. Ltd.
00009  *
00010  * The contents of this file are subject to the Mozilla Public License
00011  * Version 1.0 (the "License"); you may not use this file except in
00012  * compliance with the License. You may obtain a copy of the License at
00013  * http://www.mozilla.org/MPL/
00014  *
00015  * Software distributed under the License is distributed on an "AS IS"
00016  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
00017  * the License for the specific language governing rights and limitations
00018  * under the License.
00019  *
00020  * The Original Code is Portable Windows Library.
00021  *
00022  * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
00023  *
00024  * Contributor(s): ______________________________________.
00025  *
00026  * $Revision: 28994 $
00027  * $Author: rjongbloed $
00028  * $Date: 2013-01-25 02:06:35 -0600 (Fri, 25 Jan 2013) $
00029  */
00030 
00031 #ifndef PTLIB_PXML_H
00032 #define PTLIB_PXML_H
00033 
00034 #ifdef P_USE_PRAGMA
00035 #pragma interface
00036 #endif
00037 
00038 #include <ptlib.h>
00039 
00040 #include <ptbuildopts.h>
00041 
00042 #ifndef P_EXPAT
00043 
00044 namespace PXML {
00045 extern PString EscapeSpecialChars(const PString & str);
00046 };
00047 
00048 #else
00049 
00050 #include <ptclib/http.h>
00051 
00053 
00054 class PXMLElement;
00055 class PXMLData;
00056 
00057 
00058 class PXMLObject;
00059 class PXMLElement;
00060 class PXMLData;
00061 
00063 
00064 class PXMLBase : public PObject
00065 {
00066   public:
00067     enum {
00068       DEFAULT_MAX_ENTITY_LENGTH = 4096
00069     };
00070 
00071     enum Options {
00072       NoOptions           = 0x0000,
00073       Indent              = 0x0001,
00074       NewLineAfterElement = 0x0002,
00075       NoIgnoreWhiteSpace  = 0x0004,   
00076       CloseExtended       = 0x0008,   
00077       WithNS              = 0x0010,
00078       FragmentOnly        = 0x0020,   
00079       AllOptions          = 0xffff
00080     };
00081     __inline friend Options operator|(Options o1, Options o2) { return (Options)(((unsigned)o1) | ((unsigned)o2)); }
00082     __inline friend Options operator&(Options o1, Options o2) { return (Options)(((unsigned)o1) & ((unsigned)o2)); }
00083 
00084     enum StandAloneType {
00085       UninitialisedStandAlone = -2,
00086       UnknownStandAlone = -1,
00087       NotStandAlone,
00088       IsStandAlone
00089     };
00090 
00091     PXMLBase(int opts = NoOptions);
00092 
00093     void SetOptions(int opts)
00094       { m_options = opts; }
00095 
00096     int GetOptions() const { return m_options; }
00097 
00098     virtual PBoolean IsNoIndentElement(
00099       const PString & /*elementName*/
00100     ) const
00101     {
00102       return false;
00103     }
00104 
00105     void SetMaxEntityLength(unsigned len) { m_maxEntityLength = len; }
00106     unsigned GetMaxEntityLength() const { return m_maxEntityLength; }
00107 
00108   protected:
00109     int m_options;
00110     unsigned m_maxEntityLength;
00111 };
00112 
00113 
00114 class PXML : public PXMLBase
00115 {
00116     PCLASSINFO(PXML, PObject);
00117   public:
00118     PXML(
00119       int options = NoOptions,
00120       const char * noIndentElements = NULL
00121     );
00122     PXML(
00123       const PString & data,
00124       int options = NoOptions,
00125       const char * noIndentElements = NULL
00126     );
00127 
00128     PXML(const PXML & xml);
00129 
00130     ~PXML();
00131 
00132     bool IsLoaded() const { return rootElement != NULL; }
00133     bool IsDirty() const;
00134 
00135     bool Load(const PString & data, Options options = NoOptions);
00136     bool LoadFile(const PFilePath & fn, Options options = NoOptions);
00137 
00138     virtual void OnLoaded() { }
00139 
00140     bool Save(Options options = NoOptions);
00141     bool Save(PString & data, Options options = NoOptions);
00142     bool SaveFile(const PFilePath & fn, Options options = NoOptions);
00143 
00144     void RemoveAll();
00145 
00146     PBoolean IsNoIndentElement(
00147       const PString & elementName
00148     ) const;
00149 
00150     PString AsString() const;
00151     void PrintOn(ostream & strm) const;
00152     void ReadFrom(istream & strm);
00153 
00154 
00155     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
00156     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
00157     PXMLElement * GetElement(PINDEX idx) const;
00158     PINDEX        GetNumElements() const; 
00159     PXMLElement * GetRootElement() const { return rootElement; }
00160     PXMLElement * SetRootElement(PXMLElement * p);
00161     PXMLElement * SetRootElement(const PString & documentType);
00162     bool          RemoveElement(PINDEX idx);
00163 
00164     PCaselessString GetDocumentType() const;
00165 
00166 
00167     enum ValidationOp {
00168       EndOfValidationList,
00169       DocType,
00170       ElementName,
00171       RequiredAttribute,
00172       RequiredNonEmptyAttribute,
00173       RequiredAttributeWithValue,
00174       RequiredElement,
00175       Subtree,
00176       RequiredAttributeWithValueMatching,
00177       RequiredElementWithBodyMatching,
00178       OptionalElement,
00179       OptionalAttribute,
00180       OptionalNonEmptyAttribute,
00181       OptionalAttributeWithValue,
00182       OptionalAttributeWithValueMatching,
00183       OptionalElementWithBodyMatching,
00184       SetDefaultNamespace,
00185       SetNamespace,
00186 
00187       RequiredAttributeWithValueMatchingEx = RequiredAttributeWithValueMatching + 0x8000,
00188       OptionalAttributeWithValueMatchingEx = OptionalAttributeWithValueMatching + 0x8000,
00189       RequiredElementWithBodyMatchingEx    = RequiredElementWithBodyMatching    + 0x8000,
00190       OptionalElementWithBodyMatchingEx    = OptionalElementWithBodyMatching    + 0x8000
00191     };
00192 
00193     struct ValidationContext {
00194       PString m_defaultNameSpace;
00195       PStringToString m_nameSpaces;
00196     };
00197 
00198     struct ValidationInfo {
00199       ValidationOp m_op;
00200       const char * m_name;
00201 
00202       union {
00203         const void     * m_placeHolder;
00204         const char     * m_attributeValues;
00205         ValidationInfo * m_subElement;
00206         const char     * m_namespace;
00207       };
00208 
00209       PINDEX m_minCount;
00210       PINDEX m_maxCount;
00211     };
00212 
00213     bool Validate(const ValidationInfo * validator);
00214     bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements);
00215     bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements);
00216     bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, int options = NoOptions);
00217 
00218     PString  GetErrorString() const { return m_errorString; }
00219     unsigned GetErrorColumn() const { return m_errorColumn; }
00220     unsigned GetErrorLine() const   { return m_errorLine; }
00221 
00222     PString GetDocType() const         { return docType; }
00223     void SetDocType(const PString & v) { docType = v; }
00224 
00225     PMutex & GetMutex() { return rootMutex; }
00226 
00227     // static methods to create XML tags
00228     static PString CreateStartTag (const PString & text);
00229     static PString CreateEndTag (const PString & text);
00230     static PString CreateTagNoData (const PString & text);
00231     static PString CreateTag (const PString & text, const PString & data);
00232 
00233     static PString EscapeSpecialChars(const PString & string);
00234 
00235   protected:
00236     void Construct(int options, const char * noIndentElements);
00237     PXMLElement * rootElement;
00238     PMutex rootMutex;
00239 
00240     bool loadFromFile;
00241     PFilePath loadFilename;
00242     PString version, encoding;
00243     StandAloneType m_standAlone;
00244 
00245     PStringStream m_errorString;
00246     unsigned      m_errorLine;
00247     unsigned      m_errorColumn;
00248 
00249     PSortedStringList noIndentElements;
00250 
00251     PString docType;
00252     PString m_defaultNameSpace;
00253 };
00254 
00255 
00256 #if P_HTTP
00257 class PXML_HTTP : public PXML
00258 {
00259     PCLASSINFO(PXML_HTTP, PXML);
00260   public:
00261     PXML_HTTP(
00262       int options = NoOptions,
00263       const char * noIndentElements = NULL
00264     );
00265 
00266     bool StartAutoReloadURL(
00267       const PURL & url, 
00268       const PTimeInterval & timeout, 
00269       const PTimeInterval & refreshTime,
00270       Options options = NoOptions
00271     );
00272     bool StopAutoReloadURL();
00273     PString GetAutoReloadStatus() { PWaitAndSignal m(autoLoadMutex); PString str = autoLoadError; return str; }
00274     bool AutoLoadURL();
00275     virtual void OnAutoLoad(PBoolean ok);
00276 
00277     bool LoadURL(const PURL & url);
00278     bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions);
00279 
00280   protected:
00281     PDECLARE_NOTIFIER(PTimer,  PXML_HTTP, AutoReloadTimeout);
00282     PDECLARE_NOTIFIER(PThread, PXML_HTTP, AutoReloadThread);
00283 
00284     PTimer autoLoadTimer;
00285     PURL autoloadURL;
00286     PTimeInterval autoLoadWaitTime;
00287     PMutex autoLoadMutex;
00288     PString autoLoadError;
00289 };
00290 #endif // P_HTTP
00291 
00293 
00294 PARRAY(PXMLObjectArray, PXMLObject);
00295 
00296 class PXMLObject : public PObject {
00297   PCLASSINFO(PXMLObject, PObject);
00298   public:
00299     PXMLObject(PXMLElement * par)
00300       : parent(par) { dirty = false; }
00301 
00302     PXMLElement * GetParent() const
00303       { return parent; }
00304 
00305     PXMLObject * GetNextObject() const;
00306 
00307     void SetParent(PXMLElement * newParent)
00308     { 
00309       PAssert(parent == NULL, "Cannot reparent PXMLElement");
00310       parent = newParent;
00311     }
00312 
00313     PString AsString() const;
00314 
00315     virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0;
00316 
00317     virtual PBoolean IsElement() const = 0;
00318 
00319     void SetDirty();
00320     bool IsDirty() const { return dirty; }
00321 
00322     virtual PXMLObject * Clone(PXMLElement * parent) const = 0;
00323 
00324   protected:
00325     PXMLElement * parent;
00326     bool dirty;
00327 };
00328 
00330 
00331 class PXMLData : public PXMLObject {
00332   PCLASSINFO(PXMLData, PXMLObject);
00333   public:
00334     PXMLData(PXMLElement * parent, const PString & data);
00335     PXMLData(PXMLElement * parent, const char * data, int len);
00336 
00337     PBoolean IsElement() const    { return false; }
00338 
00339     void SetString(const PString & str, bool dirty = true);
00340 
00341     PString GetString() const           { return value; }
00342 
00343     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
00344 
00345     PXMLObject * Clone(PXMLElement * parent) const;
00346 
00347   protected:
00348     PString value;
00349 };
00350 
00352 
00353 class PXMLElement : public PXMLObject {
00354   PCLASSINFO(PXMLElement, PXMLObject);
00355   public:
00356     PXMLElement(PXMLElement * parent, const char * name = NULL);
00357     PXMLElement(PXMLElement * parent, const PString & name, const PString & data);
00358 
00359     PBoolean IsElement() const { return true; }
00360 
00361     void PrintOn(ostream & strm) const;
00362     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
00363 
00364     PCaselessString GetName() const
00365       { return name; }
00366 
00371     PCaselessString GetPathName() const;
00372 
00373     void SetName(const PString & v)
00374     { name = v; }
00375 
00376     PINDEX GetSize() const
00377       { return subObjects.GetSize(); }
00378 
00379     PXMLObject  * AddSubObject(PXMLObject * elem, bool dirty = true);
00380 
00381     PXMLElement * AddChild    (PXMLElement * elem, bool dirty = true);
00382     PXMLData    * AddChild    (PXMLData    * elem, bool dirty = true);
00383 
00384     PXMLElement * AddElement(const char * name);
00385     PXMLElement * AddElement(const PString & name, const PString & data);
00386     PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal);
00387 
00388     void SetAttribute(const PCaselessString & key,
00389                       const PString & value,
00390                       bool setDirty = true);
00391 
00392     PString GetAttribute(const PCaselessString & key) const;
00393     PString GetKeyAttribute(PINDEX idx) const;
00394     PString GetDataAttribute(PINDEX idx) const;
00395     bool HasAttribute(const PCaselessString & key) const;
00396     bool HasAttributes() const      { return attributes.GetSize() > 0; }
00397     PINDEX GetNumAttributes() const { return attributes.GetSize(); }
00398 
00399     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
00400     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
00401     PXMLObject  * GetElement(PINDEX idx = 0) const;
00402     bool          RemoveElement(PINDEX idx);
00403 
00404     PINDEX FindObject(const PXMLObject * ptr) const;
00405 
00406     bool HasSubObjects() const
00407       { return subObjects.GetSize() != 0; }
00408 
00409     PXMLObjectArray  GetSubObjects() const
00410       { return subObjects; }
00411 
00412     PXMLObject  * GetSubObject(PINDEX idx = 0) const
00413       { return GetElement(idx); }
00414 
00415     PString GetData() const;
00416     void SetData(const PString & data);
00417     void AddData(const PString & data);
00418 
00419     PXMLObject * Clone(PXMLElement * parent) const;
00420 
00421     void GetFilePosition(unsigned & col, unsigned & line) const { col = column; line = lineNumber; }
00422     void SetFilePosition(unsigned col,   unsigned line)         { column = col; lineNumber = line; }
00423 
00424     void AddNamespace(const PString & prefix, const PString & uri);
00425     void RemoveNamespace(const PString & prefix);
00426 
00427     bool GetDefaultNamespace(PCaselessString & str) const;
00428     bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const;
00429     PCaselessString PrependNamespace(const PCaselessString & name) const;
00430     bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri);
00431 
00432   protected:
00433     PCaselessString name;
00434     PStringToString attributes;
00435     PXMLObjectArray subObjects;
00436     bool dirty;
00437     unsigned column;
00438     unsigned lineNumber;
00439     PStringToString m_nameSpaces;
00440     PCaselessString m_defaultNamespace;
00441 };
00442 
00444 
00445 class PConfig;      // stupid gcc 4 does not recognize PConfig as a class
00446 
00447 class PXMLSettings : public PXML
00448 {
00449   PCLASSINFO(PXMLSettings, PXML);
00450   public:
00451     PXMLSettings(Options options = NewLineAfterElement);
00452     PXMLSettings(const PString & data, Options options = NewLineAfterElement);
00453     PXMLSettings(const PConfig & data, Options options = NewLineAfterElement);
00454 
00455     bool Load(const PString & data);
00456     bool LoadFile(const PFilePath & fn);
00457 
00458     bool Save();
00459     bool Save(PString & data);
00460     bool SaveFile(const PFilePath & fn);
00461 
00462     void SetAttribute(const PCaselessString & section, const PString & key, const PString & value);
00463 
00464     PString GetAttribute(const PCaselessString & section, const PString & key) const;
00465     bool    HasAttribute(const PCaselessString & section, const PString & key) const;
00466 
00467     void ToConfig(PConfig & cfg) const;
00468 };
00469 
00470 
00472 
00473 class PXMLParser : public PXMLBase
00474 {
00475   PCLASSINFO(PXMLParser, PXMLBase);
00476   public:
00477     PXMLParser(int options = NoOptions);
00478     ~PXMLParser();
00479     bool Parse(const char * data, int dataLen, bool final);
00480     void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine);
00481 
00482     virtual void StartElement(const char * name, const char **attrs);
00483     virtual void EndElement(const char * name);
00484     virtual void AddCharacterData(const char * data, int len);
00485     virtual void XmlDecl(const char * version, const char * encoding, int standAlone);
00486     virtual void StartDocTypeDecl(const char * docTypeName,
00487                                   const char * sysid,
00488                                   const char * pubid,
00489                                   int hasInternalSubSet);
00490     virtual void EndDocTypeDecl();
00491     virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri);
00492     virtual void EndNamespaceDeclHandler(const char * prefix);
00493 
00494     PString GetVersion() const  { return version; }
00495     PString GetEncoding() const { return encoding; }
00496 
00497     StandAloneType GetStandAlone() const { return m_standAlone; }
00498 
00499     PXMLElement * GetXMLTree() const;
00500     PXMLElement * SetXMLTree(PXMLElement * newRoot);
00501 
00502   protected:
00503     void * expat;
00504     PXMLElement * rootElement;
00505     bool rootOpen;
00506     PXMLElement * currentElement;
00507     PXMLData * lastElement;
00508     PString version, encoding;
00509     StandAloneType m_standAlone;
00510     PStringToString m_tempNamespaceList;
00511 };
00512 
00514 
00515 class PXMLStreamParser : public PXMLParser
00516 {
00517   PCLASSINFO(PXMLStreamParser, PXMLParser);
00518   public:
00519     PXMLStreamParser();
00520 
00521     virtual void EndElement(const char * name);
00522     virtual PXML * Read(PChannel * channel);
00523 
00524   protected:
00525     PQueue<PXML> messages;
00526 };
00527 
00528 
00529 #endif // P_EXPAT
00530 
00531 #endif // PTLIB_PXML_H
00532 
00533 
00534 // End Of File ///////////////////////////////////////////////////////////////

Generated on Fri Feb 15 20:58:31 2013 for PTLib by  doxygen 1.4.7