Home / Open source / Terimber 2.0
00001 /* 00002 * The Software License 00003 * ================================================================================= 00004 * Copyright (c) 2003-.The Terimber Corporation. All rights reserved. 00005 * ================================================================================= 00006 * Redistributions of source code must retain the above copyright notice, 00007 * this list of conditions and the following disclaimer. 00008 * Redistributions in binary form must reproduce the above copyright notice, 00009 * this list of conditions and the following disclaimer in the documentation 00010 * and/or other materials provided with the distribution. 00011 * The end-user documentation included with the redistribution, if any, 00012 * must include the following acknowledgment: 00013 * "This product includes software developed by the Terimber Corporation." 00014 * ================================================================================= 00015 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, 00016 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00017 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00018 * IN NO EVENT SHALL THE TERIMBER CORPORATION OR ITS CONTRIBUTORS BE LIABLE FOR 00019 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00020 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00021 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 00022 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00024 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00025 * ================================================================================ 00026 */ 00027 00028 #ifndef _terimber_parsexml_hpp_ 00029 #define _terimber_parsexml_hpp_ 00030 00031 #include "xml/parsexml.h" 00032 00033 BEGIN_TERIMBER_NAMESPACE 00034 #pragma pack(4) 00035 00037 00038 // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 00039 // [40] STag ::= '<' Name (S Attribute)* S? '>' 00040 // we can handle both 00041 xml_forceinline 00042 void 00043 xml_processor::parseStartTag() 00044 { 00045 // skips junk and resolves entity, if any 00046 if (pick() == ch_ampersand) 00047 parseGeneralReference(false); 00048 00049 // adds element to document 00050 xml_element* el = _doc.add_element(parseName()); 00051 00052 // parses attribute 00053 parseAttributes(*el); 00054 00055 // checks the close tag 00056 switch (pick()) 00057 { 00058 case ch_close_angle: // '>' 00059 pop(); 00060 // element isn't closed yet 00061 // next closeTag is expected 00062 // sets current element 00063 _doc.container_push(el); 00064 break; 00065 case ch_forward_slash: 00066 pop(); 00067 skip_sign(ch_close_angle, false, false, "Expected close tag"); 00068 // validate element before leaving 00069 if (_validate) 00070 _doc.validate(*el); 00071 00072 if (!_white_space_stack.empty() && _white_space_stack.top()._el == el) 00073 { 00074 _white_space_stack.pop(); 00075 if (_white_space_stack.empty()) 00076 _white_space_allocator->reset(); 00077 00078 _preserve_white_space = _white_space_stack.empty() ? false : _white_space_stack.top()._preserve; 00079 } 00080 00081 break; 00082 default: // 00083 throw_exception("Expected close tag"); 00084 } 00085 } 00086 00087 00088 // [42] ETag ::= '</' Name S? '>' 00089 xml_forceinline 00090 void 00091 xml_processor::parseEndTag() 00092 { 00093 xml_element* el = _doc.container_pop(); 00094 if (!el) 00095 throw_exception("Unexpected close tag"); 00096 00097 // checks name 00098 if (el->_decl->_name != parseName()) 00099 throw_exception("Invalid close tag"); 00100 00101 if (!_white_space_stack.empty() && _white_space_stack.top()._el == el) 00102 { 00103 _white_space_stack.pop(); 00104 if (_white_space_stack.empty()) 00105 _white_space_allocator->reset(); 00106 00107 _preserve_white_space = _white_space_stack.empty() ? false : _white_space_stack.top()._preserve; 00108 } 00109 00110 // validates element before leaving 00111 if (_validate) _doc.validate(*el); 00112 00113 skip_sign(ch_close_angle, true, false, "Expected close tag"); 00114 } 00115 00116 // [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 00117 xml_forceinline 00118 void 00119 xml_processor::parseContent() 00120 { 00121 // [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 00122 reset_all_tmp(); 00123 ub1_t symbol = pick(); 00124 00125 while (symbol) 00126 { 00127 switch (symbol) 00128 { 00129 case ch_open_angle: 00130 // [18] CDSect ::= CDStart CData CDEnd 00131 // [19] CDStart ::= '<![CDATA[' 00132 // OR 00133 // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 00134 // OR 00135 // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 00136 switch (pop()) 00137 { 00138 case ch_question: 00139 parsePI(); 00140 break; 00141 case ch_bang: 00142 // CDStart OR Comment 00143 switch (pop()) 00144 { 00145 case ch_dash: 00146 parseComment(); 00147 break; 00148 case ch_open_square: 00149 pop(); 00150 parseCDATA(); 00151 break; 00152 default: 00153 throw_exception("Unexpected markup instruction"); 00154 break; 00155 } 00156 break; 00157 default: // child element 00158 push(ch_open_angle); 00159 return; 00160 } // switch 00161 break; 00162 case ch_ampersand: 00163 // [67] Reference ::= EntityRef | CharRef 00164 // [68] EntityRef ::= '&' Name ';' 00165 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 00166 // parseGeneralReference(false); 00167 // break; 00168 default: 00169 parseCharData(); 00170 break; 00171 } // switch 00172 00173 symbol = pick(); 00174 } 00175 } 00176 00177 00178 xml_forceinline 00179 void 00180 xml_processor::parseCharData() 00181 { 00182 if (!_preserve_white_space) 00183 skip_white_space(); 00184 00185 ub1_t symbol = pick(); 00186 if (symbol && symbol != ch_open_angle) 00187 _parseCharData(); 00188 } 00189 00190 #pragma pack() 00191 END_TERIMBER_NAMESPACE 00192 00193 #endif // _terimber_parsexml_hpp_