00001 /*00002 * The Software License00003 * =================================================================================00004 * Copyright (c) 2003-.The Terimber Corporation. All rights reserved.00005 * =================================================================================00006 * Redistributions of source code must retain the above copyright notice, 00007 * this list of conditions and the following disclaimer.00008 * Redistributions in binary form must reproduce the above copyright notice, 00009 * this list of conditions and the following disclaimer in the documentation 00010 * and/or other materials provided with the distribution.00011 * The end-user documentation included with the redistribution, if any, 00012 * must include the following acknowledgment:00013 * "This product includes software developed by the Terimber Corporation."00014 * =================================================================================00015 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, 00016 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00017 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00018 * IN NO EVENT SHALL THE TERIMBER CORPORATION OR ITS CONTRIBUTORS BE LIABLE FOR 00019 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00020 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00021 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 00022 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT00023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE00024 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.00025 * ================================================================================00026 */00027
00028 #ifndef _terimber_parsexml_h_00029 #define _terimber_parsexml_h_00030
00031 #include "xml/mngxml.h"00032
00033 BEGIN_TERIMBER_NAMESPACE00034 #pragma pack(4)00035
00039class xml_white_space_handler00040 {
00041 public:
00043xml_white_space_handler(constxml_container* el,
00044 bool preserve
00045 ) :
00046 _preserve(preserve),
00047 _el(el)
00048 {
00049 }
00050
00051bool_preserve;
00052constxml_container* _el;
00053 };
00054
00057typedef_stack< xml_white_space_handler >xml_white_space_stack_t;
00058
00061class xml_processor : publicbyte_manager00062 {
00063 public:
00065 xml_processor( byte_source& stream,
00066 xml_document& doc,
00067 mem_pool_t& small_pool,
00068 mem_pool_t& big_pool,
00069 size_t xml_size,
00070 bool validate
00071 );
00073 ~xml_processor();
00075 bool00076 parse();
00078 constchar*
00079 get_error() const;
00080
00081 private:
00084 void00085 parseDocument();
00086
00089 void00090 parseProlog();
00094 void00095 parseDocTypeDecl();
00098 void00099 parseElement();
00103 xml_forceinline00104 void00105 parseStartTag();
00108 void00109 parseAttributes(xml_element& el
00110 );
00113 xml_forceinline00114 void00115 parseEndTag();
00118 xml_forceinline00119 void00120 parseContent();
00126 void00127 parseCDATA();
00129 // [14] CharData ::= ,// [^<&]* - ([^<&]* ']]>' [^<&]*) 00130 void00131 _parseCharData();
00133 xml_forceinline00134 void00135 parseCharData();
00140 void00141 parseGeneralReference(bool skip_after
00142 );
00145 void00146 parseMisc();
00148 void00149 parseDTD( constchar* location = 0
00150 );
00152 void00153 skipDTD();
00155 void00156 resolve_references();
00157
00158 private:
00159xml_document& _doc;
00160bool_preserve_white_space;
00161byte_allocator* _white_space_allocator;
00162xml_white_space_stack_t_white_space_stack;
00163string_t_error;
00164bool_validate;
00165 };
00166
00167 #pragma pack()00168 END_TERIMBER_NAMESPACE00169
00170 #endif // _terimber_parsexml_h_