00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "xml/mngxml.hpp"
00029 #include "xml/declxml.hpp"
00030 #include "xml/defxml.hpp"
00031 #include "xml/miscxml.hpp"
00032 #include "xml/storexml.hpp"
00033 #include "xml/sxml.hpp"
00034 #include "xml/sxs.hpp"
00035
00036 #include "base/memory.hpp"
00037 #include "base/list.hpp"
00038 #include "base/map.hpp"
00039 #include "base/stack.hpp"
00040 #include "base/string.hpp"
00041 #include "base/template.hpp"
00042 #include "base/common.hpp"
00043
00044 BEGIN_TERIMBER_NAMESPACE
00045 #pragma pack(4)
00046
00048 byte_manager::byte_manager(byte_source& stream,
00049 xml_document& doc,
00050 mem_pool_t& small_pool,
00051 mem_pool_t& big_pool,
00052 size_t xml_size) :
00053 _xml_size(__max(xml_size, os_def_size)),
00054 _small_pool(small_pool),
00055 _big_pool(big_pool),
00056
00057 _depot_store1_allocator(_xml_size <= os_def_size ? _small_pool.loan_object() : _big_pool.loan_object(_xml_size)),
00058 _depot_store2_allocator(_xml_size <= os_def_size ? _small_pool.loan_object() : _big_pool.loan_object(_xml_size)),
00059 _depot_store3_allocator(_xml_size <= os_def_size ? _small_pool.loan_object() : _big_pool.loan_object(_xml_size)),
00060
00061 _tmp_allocator(_small_pool.loan_object()),
00062
00063 _tmp_store1_allocator(_small_pool.loan_object()),
00064 _tmp_store2_allocator(_small_pool.loan_object()),
00065 _tmp_store3_allocator(_small_pool.loan_object()),
00066
00067 _entity_allocator(_small_pool.loan_object()),
00068
00069 _doc(doc),
00070
00071 _stream(stream),
00072
00073 _tmp_store1(*_depot_store1_allocator, *_tmp_store1_allocator, _xml_size - 1),
00074 _tmp_store2(*_depot_store2_allocator, *_tmp_store2_allocator, _xml_size - 1),
00075 _tmp_store3(*_depot_store3_allocator, *_tmp_store3_allocator, _xml_size - 1)
00076 {
00077 }
00078
00079 byte_manager::~byte_manager()
00080 {
00081 _small_pool.return_object(_tmp_allocator);
00082 _small_pool.return_object(_entity_allocator);
00083
00084 _small_pool.return_object(_tmp_store1_allocator);
00085 _small_pool.return_object(_tmp_store2_allocator);
00086 _small_pool.return_object(_tmp_store3_allocator);
00087
00088 if (_xml_size <= os_def_size)
00089 {
00090 _small_pool.return_object(_depot_store1_allocator);
00091 _small_pool.return_object(_depot_store2_allocator);
00092 _small_pool.return_object(_depot_store3_allocator);
00093 }
00094 else
00095 {
00096 _big_pool.return_object(_depot_store1_allocator);
00097 _big_pool.return_object(_depot_store2_allocator);
00098 _big_pool.return_object(_depot_store3_allocator);
00099 }
00100 }
00101
00102 const char*
00103 byte_manager::parseValue()
00104 {
00105
00106 _tmp_store1.reset();
00107
00108 ub1_t symbol = pick();
00109 while (symbol && is_name_char(symbol))
00110 {
00111 _tmp_store1 << symbol;
00112 symbol = pop();
00113 }
00114
00115 return _tmp_store1.persist();
00116 }
00117
00118 void
00119 byte_manager::parseAttributeValue(string_t& name, string_t& value)
00120 {
00121 skip_white_space();
00122
00123 name = parseName();
00124 skip_sign(ch_equal, true, true, "Can't find equal symbol");
00125
00126 value = parseQuotedValue(true, true, is_attribute_char, "Invalid characters in attribute value");
00127 }
00128
00129 void
00130 byte_manager::parseComment()
00131 {
00132 assert(pick() == ch_dash);
00133
00134 _tmp_store1.reset();
00135
00136
00137 pop();
00138 skip_sign(ch_dash, false, false, "Invalid char after <!- in comment");
00139
00140 ub1_t state = 0;
00141
00142
00143 ub1_t symbol = pick();
00144 while (symbol)
00145 {
00146 if (state == 2)
00147 {
00148 if (symbol != ch_close_angle)
00149 throw_exception("Invalid sequence (--) in comment");
00150
00151 pop();
00152 _doc.add_comment(_tmp_store1.persist());
00153 _tmp_store1.reset();
00154 return;
00155 }
00156
00157 if (symbol == ch_dash)
00158 ++state;
00159 else
00160 {
00161 while (state)
00162 {
00163 _tmp_store1 << ch_dash;
00164 --state;
00165 }
00166
00167 _tmp_store1 << symbol;
00168 }
00169 symbol = pop();
00170 }
00171
00172
00173
00174 throw_exception("Can't find the close tag --> for comment");
00175 }
00176
00177
00178 void
00179 byte_manager::parseCharRef(paged_buffer& buffer)
00180 {
00181 assert(pick() == ch_pound);
00182 ub4_t result = 0;
00183 numeric_radix radix_ = RADIX10;
00184
00185 ub1_t symbol = pop();
00186
00187
00188 if (symbol == ch_x || symbol == ch_X)
00189 {
00190 symbol = pop();
00191 radix_ = RADIX16;
00192 }
00193
00194 while (symbol && symbol != ch_semicolon)
00195 {
00196 result *= radix_;
00197 if (symbol >= ch_0 && symbol <= ch_9)
00198 result += symbol - ch_0;
00199 else if (radix_ == RADIX16 && symbol >= ch_A && symbol <= ch_F)
00200 result += symbol - ch_A + 0x0A;
00201 else if (radix_ == RADIX16 && symbol >= ch_a && symbol <= ch_f)
00202 result += symbol - ch_a + 0x0A;
00203 else
00204 throw_exception("Invalid char value");
00205
00206 symbol = pop();
00207 }
00208
00209 skip_sign(ch_semicolon, false, false, "Expected semicolon after Entity Reference");
00210
00211
00212 ub1_t utf8Buf[7] = {0};
00213 size_t count = 0;
00214 if (!usascii_to_utf8(result, utf8Buf, count))
00215 throw_exception("Invalid char token while char reference conversion");
00216 buffer.append(utf8Buf, count);
00217 }
00218
00219 const char*
00220 byte_manager::parseQuotedValue(bool resolve_entities, bool normalize, bool (*fn)(ub1_t), const char* message)
00221 {
00222 const bool check = fn != 0;
00223
00224
00225 ub1_t quote_symbol = _stream.skip_quote();
00226
00227 reset_all_tmp();
00228
00229 bool white_space_met = false;
00230
00231 ub1_t symbol = pick();
00232
00233
00234 while (symbol && symbol != quote_symbol)
00235 {
00236 switch (symbol)
00237 {
00238 case ch_space:
00239 case ch_hor_tab:
00240 case ch_lf:
00241 case ch_cr:
00242 if (normalize)
00243 white_space_met = true;
00244 else
00245 _tmp_store3 << symbol;
00246
00247
00248 symbol = pop();
00249 break;
00250 case ch_ampersand:
00251 if (resolve_entities)
00252 {
00253 if (resolveEntity(_tmp_store2))
00254 {
00255
00256 size_t len = 0;
00257 const ub1_t* pbuf = _tmp_store2.persist(len);
00258
00259 push(pbuf, len);
00260 }
00261 else
00262 {
00263 if (white_space_met)
00264 {
00265
00266 _tmp_store3 << ch_space;
00267
00268 white_space_met = false;
00269 }
00270 _tmp_store3 << _tmp_store2.persist();
00271 }
00272
00273
00274 _tmp_store2.reset();
00275
00276 symbol = pick();
00277 break;
00278 }
00279 default:
00280
00281 if (check && !fn(symbol))
00282 throw_exception(message);
00283
00284 if (white_space_met)
00285 {
00286
00287 _tmp_store3 << ch_space;
00288
00289 white_space_met = false;
00290 }
00291
00292
00293 _tmp_store3 << symbol;
00294
00295 symbol = pop();
00296 }
00297 }
00298
00299
00300 _stream.skip_quote(quote_symbol);
00301
00302 return _tmp_store3.persist();
00303 }
00304
00305 void
00306 byte_manager::parseExternalID(string_t& value_system, string_t& value_public, bool public_strick)
00307 {
00308
00309 switch (pick())
00310 {
00311
00312 case ch_S:
00313 skip_string(str_SYSTEM, "Invalid syntax of the doctype SYSTEM declaration");
00314
00315 skip_white_space(true, "Can't find the separator");
00316
00317 value_system = parseQuotedValue(false, false, 0, 0);
00318 break;
00319 case ch_P:
00320 skip_string(str_PUBLIC, "Invalid syntax of the doctype SYSTEM declaration");
00321
00322 skip_white_space(true, "Can't find the separator");
00323
00324 value_public = parseQuotedValue(false, false, is_public_char, "Invalid syntax of the PUBLIC part of ExternalID declaration");
00325 if (public_strick)
00326 {
00327 skip_white_space(true, "Can't find the separator");
00328
00329 value_system = parseQuotedValue(false, false, 0, 0);
00330 }
00331 else
00332 {
00333 skip_white_space();
00334 if (pick() != ch_close_angle)
00335
00336 value_system = parseQuotedValue(false, false, 0, 0);
00337 }
00338 break;
00339 default:
00340 throw_exception("Invalid syntax of the ExternalID declaration");
00341 }
00342 }
00343
00344 void
00345 byte_manager::parsePI()
00346 {
00347 skip_sign(ch_question, false, false, "Expected question mark in PI");
00348 _tmp_store2.reset();
00349
00350 const char* name = parseName();
00351
00352 if (!str_template::strnocasecmp(name, str_xml, os_minus_one))
00353 throw_exception("Invalid PI target name");
00354
00355 if (is_white_space(pick()))
00356 {
00357
00358 skip_white_space();
00359
00360 ub1_t symbol = pick();
00361 while (symbol)
00362 {
00363 if (symbol == ch_question)
00364 {
00365 pop();
00366 skip_sign(ch_close_angle, false, false, "Expected close tag");
00367 _doc.add_pi(name, _tmp_store2.persist());
00368 return;
00369 }
00370
00371 _tmp_store2 << symbol;
00372 symbol = pop();
00373 }
00374
00375 throw_exception("Invalid PI target syntax");
00376 }
00377
00378 skip_sign(ch_question, false, false, "Expected question mark");
00379 skip_sign(ch_close_angle, false, false, "Expected close tag");
00380
00381 _doc.add_pi(name, _tmp_store2.persist());
00382 }
00383
00384 bool
00385 byte_manager::resolveEntity(paged_buffer& buffer)
00386 {
00387 pop();
00388
00389 if (pick() == ch_pound)
00390 {
00391 parseCharRef(buffer);
00392 return false;
00393 }
00394
00395
00396 const entityDecl* entry = _doc.find_entity_decl(parseName());
00397 if (!entry)
00398 throw_exception("Unresolved parameter entity");
00399
00400 buffer.reset();
00401 skip_sign(ch_semicolon, false, false, "Expected semicolon symbol");
00402
00403
00404
00405 if (!entry->_is_encoded_char)
00406 {
00407 entity_map_t::iterator iter = _entity_map.find(entry);
00408 if (iter != _entity_map.end())
00409 {
00410
00411 if (*iter >= _stream.current_pos())
00412
00413 throw_exception("Recursive entities are not allowed");
00414 else
00415 *iter = _stream.current_pos();
00416 }
00417 else
00418 _entity_map.insert(*_entity_allocator, entry, _stream.current_pos());
00419 }
00420
00421
00422 if (get_standalone() && entry->_is_in_subset)
00423 throw_exception("Illegal external reference in standalone document");
00424
00425
00426 if (entry->_value.length())
00427
00428 buffer << entry->_value;
00429 else if (entry->_systemId.length())
00430 {
00431 if (entry->_is_unparsed)
00432 {
00433 buffer << entry->_notation;
00434 return false;
00435
00436 }
00437 else
00438
00439 buffer_loader::load(_stream.get_location(), entry->_systemId, _small_pool, _big_pool, buffer, true);
00440 }
00441
00442 return !entry->_is_encoded_char;
00443 }
00444
00445
00446 #pragma pack()
00447 END_TERIMBER_NAMESPACE