00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "xml/dtdxml.h"
00029 #include "xml/declxml.hpp"
00030 #include "xml/defxml.hpp"
00031 #include "xml/mngxml.hpp"
00032 #include "xml/sxml.hpp"
00033 #include "xml/sxs.hpp"
00034 #include "xml/storexml.hpp"
00035 #include "xml/miscxml.hpp"
00036
00037 #include "base/list.hpp"
00038 #include "base/map.hpp"
00039 #include "base/stack.hpp"
00040 #include "base/string.hpp"
00041 #include "base/common.hpp"
00042 #include "base/memory.hpp"
00043
00044 BEGIN_TERIMBER_NAMESPACE
00045 #pragma pack(4)
00046
00047 dtd_processor::dtd_processor( byte_source& stream,
00048 xml_document& doc,
00049 mem_pool_t& small_pool,
00050 mem_pool_t& big_pool,
00051 size_t xml_size) :
00052 byte_manager(stream, doc, small_pool, big_pool, xml_size)
00053 {
00054 }
00056
00057
00058
00059
00060 void
00061 dtd_processor::parse()
00062 {
00063
00064 parseSubSet(!get_subset() ? ch_close_square : ch_null, get_subset());
00065 validate();
00066
00067 _doc._standalone = 1;
00068 }
00069
00070 void
00071 dtd_processor::parseSubSet(char stopSymbol, bool include_allowed)
00072 {
00073 ub1_t symbol = 0;
00074 while ((symbol = pick()) && symbol != stopSymbol)
00075 {
00076 switch (symbol)
00077 {
00078
00079
00080
00081 case ch_percent:
00082
00083 parsePEReference(false, true);
00084 break;
00085 case ch_open_angle:
00086
00087
00088
00089
00090
00091
00092
00093
00094 switch (pop())
00095 {
00096 case ch_question:
00097
00098 parsePI();
00099 break;
00100 case ch_bang:
00101
00102
00103
00104
00105
00106 switch (pop())
00107 {
00108 case ch_E:
00109 switch (pop())
00110 {
00111 case ch_L:
00112 parseElement();
00113 break;
00114 case ch_N:
00115 parseEntity();
00116 break;
00117 default:
00118 throw_exception("Invalid ELEMENT or ENTITY sections syntax");
00119 }
00120 break;
00121 case ch_A:
00122 parseAttrList();
00123 break;
00124 case ch_N:
00125 parseNotation();
00126 break;
00127 case ch_dash:
00128 parseComment();
00129 break;
00130 case ch_open_square:
00131
00132 if (!include_allowed)
00133 throw_exception("IGNORE or INCLUDE sections are not allowed in internal dtd subset");
00134
00135 pop();
00136 skip_sign(ch_I, true, false, "Expected IGNORE or INCLUDE sections");
00137 switch (pick())
00138 {
00139 case ch_G:
00140
00141 parseIgnore();
00142 break;
00143 case ch_N:
00144
00145 parseInclude();
00146 break;
00147 default:
00148 throw_exception("Invalid IGNORE or INCLUDE sections syntax");
00149 break;
00150 }
00151 break;
00152 default:
00153 throw_exception("Invalid markup instruction syntax in the internal dtd section");
00154 break;
00155 }
00156 break;
00157 default:
00158 throw_exception("Unexpected char in dtd markup language");
00159 break;
00160 }
00161 break;
00162 default:
00163 skip_white_space(true, "White space expected");
00164 break;
00165 }
00166
00167 reset_all_tmp(true);
00168 _doc.get_tmp_allocator().reset();
00169 }
00170
00171 }
00172
00173 void
00174 dtd_processor::parseIgnore()
00175 {
00176
00177
00178
00179
00180
00181 skip_string(str_IGNORE + 1, "Invalid IGNORE section syntax");
00182 skip_sign(ch_open_square, true, true, "Expected IGNORE Bracket");
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192 size_t depth = 1;
00193 ub1_t symbol = 0;
00194
00195 while (0 != (symbol = pick()))
00196 {
00197 switch (symbol)
00198 {
00199 case ch_open_angle:
00200 if (pop() == ch_bang && pop() == ch_open_square)
00201 {
00202 pop();
00203 ++depth;
00204 }
00205 continue;
00206 case ch_close_square:
00207 if (pop() == ch_close_square && pop() == ch_close_angle)
00208 {
00209 pop();
00210 if (!--depth)
00211 return;
00212 }
00213 continue;
00214 default:
00215 break;
00216 }
00217
00218 pop();
00219 skip_white_space();
00220 }
00221
00222
00223 throw_exception("Invalid IGNORE section syntax");
00224 }
00225
00226 void
00227 dtd_processor::parseInclude()
00228 {
00229
00230
00231 skip_string(str_INCLUDE + 1, "Invalid INCLUDE section syntax");
00232 skip_sign(ch_open_square, true, true, "Expected open square tag symbol after INCLUDE section");
00233 parseSubSet(ch_close_square, true);
00234 skip_sign(ch_close_square, true, false, "Expected INCLUDE close bracket");
00235 skip_sign(ch_close_square, false, false, "Expected INCLUDE second close bracket");
00236 skip_sign(ch_close_angle, false, false, "Expected INCLUDE close angle");
00237 }
00238
00239
00240 void
00241 dtd_processor::parseElement()
00242 {
00243
00244
00245 skip_string(str_ELEMENT + 1, "Invalid ELEMENT section syntax");
00246 skip_white_space(true, "Expected white space");
00247
00248 parsePEReference(false, true);
00249
00250 elementDecl& decl = _doc.add_element_decl(parseName(), false, true, false);
00251 skip_white_space(true, "Expected white space");
00252 parsePEReference(false, false);
00253
00254
00255 parseContentSpec(decl);
00256
00257 skip_sign(ch_close_angle, true, false, "Expected close tag");
00258 _doc.add_element_desc(decl);
00259 }
00260
00261 void
00262 dtd_processor::parseContentSpec(elementDecl& decl)
00263 {
00264 skip_white_space();
00265
00266 switch (pick())
00267 {
00268 case ch_E:
00269 skip_string(str_EMPTY, "Invalid EMPTY declaration");
00270 decl._content = CONTENT_EMPTY;
00271 break;
00272 case ch_A:
00273 skip_string(str_ANY, "Invalid ANY declaration");
00274 decl._content = CONTENT_ANY;
00275 break;
00276 case ch_open_paren:
00277 pop();
00278
00279 parsePEReference(true, true);
00280
00281
00282
00283 switch (pick())
00284 {
00285 case ch_pound:
00286 skip_string(str__PCDATA, "Invalid PCDATA declaration");
00287 decl._content = CONTENT_MIXED;
00288 decl._token = parseMixed();
00289 return;
00290 default:
00291 decl._content = CONTENT_CHILDREN;
00292 decl._token = parseChildren();
00293 return;
00294 }
00295 default:
00296 throw_exception("Invalid element content");
00297 }
00298
00299
00300 deterministic_model(decl._token);
00301 }
00302
00303 dfa_token*
00304 dtd_processor::parseMixed()
00305 {
00306
00307
00308
00309
00310 dfa_token* curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_LEAF, 0, 0, 0);
00311 dfa_token* orgToken = curToken;
00312 dfa_token* headToken = orgToken;
00313
00314 _list< const elementDecl* > uniqueList;
00315
00316
00317
00318
00319
00320 bool hasElement = false;
00321 ub1_t symbol = 0;
00322
00323 while (0 != (symbol = pick()))
00324 {
00325 skip_white_space();
00326 switch (symbol = pick())
00327 {
00328 case ch_percent:
00329 parsePEReference(false, false);
00330 break;
00331 case ch_pipe:
00332 pop();
00333 parsePEReference(true, true);
00334
00335
00336 {
00337 const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00338
00339
00340 if (curToken == orgToken)
00341 {
00342 hasElement = true;
00343 curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00344 dfa_token(DFA_CHOICE, 0, curToken, new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00345 dfa_token(DFA_LEAF, &decl, 0, 0));
00346
00347 headToken = curToken;
00348 }
00349 else
00350 {
00351
00352 for (_list< const elementDecl* >::const_iterator iter = uniqueList.begin(); iter != uniqueList.end(); ++iter)
00353 if (*iter == &decl)
00354 throw_exception("Dublicate elements in Mixed model");
00355
00356 dfa_token* oldLast = curToken->_last;
00357 curToken->_last = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00358 dfa_token(DFA_CHOICE, 0, oldLast, new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00359 dfa_token(DFA_LEAF, &decl, 0, 0));
00360
00361
00362 curToken = curToken->_last;
00363 }
00364
00365
00366 uniqueList.push_back(*_tmp_allocator, &decl);
00367 }
00368 break;
00369 case ch_close_paren:
00370 if (ch_asterisk == pop())
00371 skip_sign(ch_asterisk, false, false, "Expected asterisk symbol");
00372
00373
00374
00375
00376
00377 if (hasElement)
00378 {
00379 headToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00380 dfa_token(DFA_ASTERISK, 0, headToken, 0);
00381 }
00382
00383 return headToken;
00384 default:
00385 throw_exception("Invalid element PCDATA syntax");
00386 }
00387 }
00388
00389 throw_exception("Invalid element PCDATA syntax");
00390 return 0;
00391 }
00392
00393 dfa_token*
00394 dtd_processor::checkRepeation(ub1_t symbol, dfa_token* token)
00395 {
00396 dfa_token* retVal = token;
00397
00398 switch (symbol)
00399 {
00400 case ch_question:
00401 retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_QUESTION, 0, token, 0);
00402 pop();
00403 break;
00404 case ch_asterisk:
00405 retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_ASTERISK, 0, token, 0);
00406 pop();
00407 break;
00408 case ch_plus:
00409 retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_PLUS, 0, token, 0);
00410 pop();
00411 break;
00412 default:
00413 break;
00414 }
00415
00416 return retVal;
00417 }
00418
00419 dfa_token*
00420 dtd_processor::parseChildren()
00421 {
00422 parsePEReference(true, true);
00423
00424 ub1_t symbol = 0;
00425 ub1_t type_char = 0;
00426 dfa_token* curToken = 0;
00427 dfa_token* headToken = 0;
00428
00429 switch (pick())
00430 {
00431 case ch_open_paren:
00432 {
00433 pop();
00434
00435
00436 curToken = parseChildren();
00437 }
00438 break;
00439 default:
00440 {
00441 const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00442
00443 curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00444 dfa_token(DFA_LEAF, &decl, 0, 0);
00445
00446 parsePEReference(true, true);
00447 curToken = checkRepeation(pick(), curToken);
00448 }
00449 }
00450
00451
00452 parsePEReference(true, true);
00453
00454 switch (type_char = pick())
00455 {
00456 case ch_comma:
00457 case ch_pipe:
00458 headToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00459 dfa_token(type_char == ch_comma ? DFA_SEQUENCE : DFA_CHOICE, 0, curToken, 0);
00460 curToken = headToken;
00461 break;
00462 case ch_close_paren:
00463 headToken = curToken;
00464 pop();
00465 break;
00466 default:
00467 throw_exception("Expected CHOICE or SEQUENCE or CLOSE PAREN syntax");
00468 }
00469
00470 if ((type_char == ch_comma) || (type_char == ch_pipe))
00471 {
00472 dfa_token* lastToken = 0;
00473 dfaRule typeRule = type_char == ch_comma ? DFA_SEQUENCE : DFA_CHOICE;
00474
00475 while (pick())
00476 {
00477 parsePEReference(true, true);
00478
00479 symbol = pick();
00480
00481 if (symbol == ch_close_paren)
00482 {
00483 pop();
00484
00485
00486
00487
00488
00489 if (!curToken->_last)
00490 {
00491 dfa_token* oldFirst = curToken->_first;
00492 curToken->_first = 0;
00493 lastToken->_last = oldFirst;
00494 curToken = lastToken;
00495 }
00496 break;
00497 }
00498 else if (symbol == ch_comma || symbol == ch_pipe)
00499 {
00500 if (symbol != type_char)
00501 throw_exception("Expected legal CHOICE/SEQUENCE syntax");
00502
00503 pop();
00504 parsePEReference(true, true);
00505
00506 if ((symbol = pick()) == ch_open_paren)
00507 {
00508 pop();
00509
00510 dfa_token* newToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00511 dfa_token(typeRule, 0, parseChildren(), 0);
00512
00513
00514 curToken->_last = newToken;
00515 lastToken = curToken;
00516 curToken = newToken;
00517 }
00518 else
00519 {
00520
00521
00522
00523
00524 const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00525 dfa_token* tmpToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00526 dfa_token(DFA_LEAF, &decl, 0, 0);
00527
00528 parsePEReference(true, true);
00529
00530 tmpToken = checkRepeation(pick(), tmpToken);
00531
00532
00533
00534
00535
00536
00537
00538 dfa_token* newToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00539 dfa_token(typeRule, 0, tmpToken, 0);
00540
00541 curToken->_last = newToken;
00542 lastToken = curToken;
00543 curToken = newToken;
00544 }
00545 }
00546 else
00547 throw_exception("Expected legal CHOICE/SEQUENCE syntax");
00548 }
00549 }
00550
00551
00552
00553
00554
00555
00556 return checkRepeation(pick(), headToken);
00557 }
00558
00559 void
00560 dtd_processor::parseEntity()
00561 {
00562
00563
00564 skip_string(str_ENTITY + 1, "Invalid ENTITY section syntax");
00565 skip_white_space(true, "Expected white space");
00566
00567
00568 bool bPEDecl = false;
00569 if (pick() == ch_percent)
00570 {
00571 pop();
00572 skip_white_space(true, "Expected white space");
00573 bPEDecl = true;
00574 }
00575
00576
00577
00578 bool wasAdded = false;
00579 entityDecl dummy(0, _tmp_allocator);
00580 entityDecl& entry = _doc.add_entity_decl(parseName(), wasAdded);
00581 entityDecl& entity = wasAdded ? entry : dummy;
00582
00583 entity._is_parameter = bPEDecl;
00584
00585 skip_white_space(true, "Expected white space");
00586 parsePEReference(false, true);
00587
00588
00589 parseEntityDef(entity);
00590 skip_sign(ch_close_angle, true, false, "Expected close tag");
00591 if (wasAdded)
00592 _doc.add_entity_desc(entry);
00593 }
00594
00595 void
00596 dtd_processor::parseEntityDef(entityDecl& decl)
00597 {
00598
00599 reset_all_tmp();
00600
00601 ub1_t symbol = 0;
00602 size_t counter = 0;
00603 bool met_xD = false;
00604
00605
00606 if ((symbol = pick()) == ch_double_quote || symbol == ch_single_quote)
00607 {
00608 ub1_t quote = skip_quote(0);
00609 while (0 != (symbol = pick()))
00610 {
00611
00612 if(symbol == ch_percent)
00613 {
00614
00615
00616
00617 if (&decl == expandPEReference(_tmp_store2))
00618 throw_exception("Recursive PE is not allowed");
00619
00620 size_t len = 0;
00621 const ub1_t* ptr = _tmp_store2.persist(len);
00622 push(ptr, len);
00623 _tmp_store2.reset();
00624 continue;
00625 }
00626 else if (symbol == ch_ampersand)
00627 {
00628 symbol = pop();
00629 if (symbol == ch_pound)
00630 parseCharRef(_tmp_store3);
00631 else
00632 {
00633
00634 const char* value = parseName();
00635 skip_sign(ch_semicolon, false, false, "Expected semicolon after Entity Reference");
00636 _tmp_store3 << ch_ampersand << value << ch_semicolon;
00637 }
00638 }
00639 else if (symbol == quote)
00640 {
00641 pop();
00642
00643 if (met_xD) _tmp_store3 << ch_cr;
00644 decl._value = _tmp_store3.persist();
00645 return;
00646 }
00647 else
00648 {
00649
00650
00651
00652 switch (symbol)
00653 {
00654 case ch_lf:
00655 _tmp_store3 << ch_lf;
00656
00657 met_xD = false;
00658 break;
00659 case ch_cr:
00660
00661 met_xD = true;
00662 break;
00663 default:
00664 if (met_xD)
00665 {
00666
00667 _tmp_store3 << ch_lf;
00668
00669 met_xD = false;
00670 }
00671 _tmp_store3 << symbol;
00672 }
00673 pop();
00674 }
00675 }
00676
00677 if (!symbol)
00678 throw_exception("Invalid ENTITY syntax");
00679 }
00680
00681
00682 skip_white_space();
00683 string_t value_system(_tmp_allocator);
00684 string_t value_public(_tmp_allocator);
00685 parseExternalID(value_system, value_public, true);
00686
00687 decl._systemId = value_system;
00688 decl._publicId = value_public;
00689
00690 if (!decl._is_parameter && is_white_space(pick()))
00691 {
00692
00693 parsePEReference(true, true);
00694
00695 if (pick() == ch_N)
00696 {
00697 skip_string(str_NDATA, "Invalid NDATA syntax");
00698 skip_white_space(true, "Expected white space");
00699 parsePEReference(false, true);
00700
00701 decl._notation = parseName();
00702 decl._is_unparsed = true;
00703 }
00704 }
00705
00706 decl._is_in_subset = true;
00707 }
00708
00709 void
00710 dtd_processor::parseAttrList()
00711 {
00712 skip_string(str_ATTRLIST, "Invalid ATTRLIST section syntax");
00713
00714 skip_white_space(true, "Expected white space");
00715 parsePEReference(false, true);
00716
00717
00718 elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00719
00720 while (pick())
00721 {
00722 skip_white_space();
00723
00724 switch (pick())
00725 {
00726 case ch_close_angle:
00727 pop();
00728 return;
00729 case ch_percent:
00730 parsePEReference(false, false);
00731 break;
00732 default:
00733 parseAttDef(decl);
00734 break;
00735 }
00736 }
00737
00738 throw_exception("Invalid ATTLIST syntax");
00739 }
00740
00741 void
00742 dtd_processor::parseAttDef(elementDecl& decl)
00743 {
00744 bool wasAdded = false;
00745 attributeDecl dummy(0, _tmp_allocator);
00746
00747 attributeDecl& new_decl = _doc.add_attribute_decl(decl, parseName(), false, wasAdded);
00748 attributeDecl& attr_decl = wasAdded ? new_decl : dummy;
00749
00750 skip_white_space(true, "Expected white space");
00751
00752 parsePEReference(false, false);
00753
00754 switch (pick())
00755 {
00756 case ch_C:
00757 if (pop() == ch_D)
00758 {
00759 skip_string(str_CDATA + 1, "Invalid CDATA syntax");
00760 attr_decl._atype = ATTR_TYPE_CDATA;
00761 attr_decl._ctype = vt_string;
00762 }
00763 else
00764 {
00765 skip_string(str_CTYPE + 1, "Invalid CTYPE syntax");
00766 attr_decl._atype = ATTR_TYPE_CDATA;
00767 skip_white_space(true, "Expected white space");
00768 attr_decl._ctype = convert_ctype(parseName());
00769 }
00770 break;
00771 case ch_I:
00772 skip_string(str_ID, "Invalid CDATA syntax");
00773 if (pick() != ch_R)
00774 {
00775 attr_decl._atype = ATTR_TYPE_ID;
00776 if (wasAdded)
00777 {
00778
00779 for (attribute_decl_map_t::const_iterator iter = decl._attributes.begin(); iter != decl._attributes.end(); ++iter)
00780 if (&*iter != &new_decl && iter->_atype == ATTR_TYPE_ID)
00781 throw_exception("Dublicate ID type for the same element");
00782 }
00783 }
00784 else
00785 {
00786 skip_string(str_REF, "Invalid IDREF syntax");
00787 if (pick() == ch_S)
00788 {
00789 attr_decl._atype = ATTR_TYPE_IDREFS;
00790 pop();
00791 }
00792 else
00793 attr_decl._atype = ATTR_TYPE_IDREF;
00794 }
00795
00796 attr_decl._ctype = vt_string;
00797 break;
00798 case ch_E:
00799 skip_string(str_ENTIT, "Invalid ENTITY syntax");
00800 if (pick() == ch_Y)
00801 {
00802 attr_decl._atype = ATTR_TYPE_ENTITY;
00803 pop();
00804 }
00805 else if (pick() == ch_I && pop() == ch_E && pop() == ch_S)
00806 {
00807 attr_decl._atype = ATTR_TYPE_ENTITIES;
00808 pop();
00809 }
00810 else
00811 throw_exception("Invalid ENTITY syntax");
00812
00813 attr_decl._ctype = vt_string;
00814 break;
00815 case ch_N:
00816 if (pop() == ch_M)
00817 {
00818 skip_string(str_NMTOKEN + 1, "Invalid NMTOKEN syntax");
00819 if (pick() == ch_S)
00820 {
00821 attr_decl._atype = ATTR_TYPE_NMTOKENS;
00822 pop();
00823 }
00824 else
00825 attr_decl._atype = ATTR_TYPE_NMTOKEN;
00826
00827 attr_decl._ctype = vt_string;
00828 }
00829 else
00830 {
00831 skip_string(str_NOTATION + 1, "Invalid NOTATION syntax");
00832 skip_white_space(true, "Expected white space");
00833 attr_decl._atype = ATTR_TYPE_NOTATION;
00834 attr_decl._ctype = vt_enum;
00835
00836 if (wasAdded)
00837 {
00838
00839 if (decl._content == CONTENT_EMPTY)
00840 throw_exception("An attribute of type NOTATION must not be declared on an element declared EMPTY");
00841
00842
00843 for (attribute_decl_map_t::const_iterator iter = decl._attributes.begin(); iter != decl._attributes.end(); ++iter)
00844 if (&*iter != &new_decl && iter->_atype == ATTR_TYPE_NOTATION)
00845 throw_exception("No element type may have more than one NOTATION attribute specified");
00846 }
00847
00848 parseAttrEnumeration(attr_decl);
00849 }
00850 break;
00851 case ch_open_paren:
00852 {
00853 attr_decl._atype = ATTR_TYPE_ENUMERATION;
00854 attr_decl._ctype = vt_enum;
00855 parseAttrEnumeration(attr_decl);
00856 }
00857 break;
00858 default:
00859 throw_exception("Unexpected char in attribute definition");
00860 }
00861
00862 skip_white_space(true, "Expected white space");
00863 parsePEReference(false, true);
00864
00865
00866 parseDefaultDecl(attr_decl);
00867
00868
00869 if (!wasAdded)
00870 return;
00871
00872 if (attr_decl._atype == ATTR_TYPE_ID && attr_decl._rule != ATTR_RULE_IMPLIED && attr_decl._rule != ATTR_RULE_REQUIRED)
00873 throw_exception("Invalid AttrDef syntax");
00874
00875
00876
00877
00878
00879 if (!strcmp(str_xml_space, attr_decl._name))
00880 {
00881 if (attr_decl._atype == ATTR_TYPE_ENUMERATION)
00882 {
00883 size_t count = attr_decl._enum.size();
00884
00885 if (count < 1 || count > 2)
00886 throw_exception("Invalid xml space syntax");
00887
00888 if (count == 1 &&
00889 !( attr_decl._enum.front()._value == str_default
00890
00891 || attr_decl._enum.front()._value == str_preserve)
00892 )
00893 throw_exception("Invalid xml space syntax");
00894
00895 if (count == 2 &&
00896 !(attr_decl._enum.front()._value == str_default
00897 && attr_decl._enum.back()._value == str_preserve
00898 || attr_decl._enum.back()._value == str_default
00899 && attr_decl._enum.front()._value == str_preserve)
00900 )
00901 throw_exception("Invalid xml space syntax");
00902 }
00903 else if (attr_decl._atype == ATTR_TYPE_CDATA && attr_decl._rule == ATTR_RULE_FIXED)
00904 {
00905 if (attr_decl._defval != str_default
00906 && attr_decl._defval != str_preserve)
00907 throw_exception("Invalid xml space syntax");
00908 }
00909 else
00910 throw_exception("Invalid xml space syntax");
00911 }
00912
00913
00914 if (attr_decl._defval.length())
00915 {
00916 const char* value_ = attr_decl._defval;
00917 switch (attr_decl._atype)
00918 {
00919 case ATTR_TYPE_ENUMERATION:
00920 {
00921 bool findDefault = false;
00922 for (_list< enumNodeDecl >::const_iterator iter = attr_decl._enum.begin(); iter != attr_decl._enum.end(); ++iter)
00923 {
00924 if (!findDefault && attr_decl._defval == iter->_value)
00925 {
00926 findDefault = true;
00927 break;
00928 }
00929 }
00930
00931 if (!findDefault)
00932 throw_exception("Default value doesn't match the enumeration items");
00933 }
00934 break;
00935 case ATTR_TYPE_ID:
00936 case ATTR_TYPE_NMTOKEN:
00937
00938 break;
00939 case ATTR_TYPE_NMTOKENS:
00940 {
00941 _list< const char* > values;
00942 tokenValues(value_, values, *_tmp_allocator);
00943 _list< const char* > defvalues;
00944 tokenValues(attr_decl._defval, defvalues, *_tmp_allocator);
00945
00946 if (values.empty())
00947 throw_exception("Invalid ENTITIES default value syntax");
00948
00949 for (_list< const char* >::const_iterator iter = values.begin(); iter != values.end(); ++iter)
00950 {
00951
00952 bool findDefault = false;
00953 for (_list< const char* >::const_iterator defiter = defvalues.begin(); defiter != defvalues.end(); ++defiter)
00954 {
00955 if (!findDefault && !strcmp(*defiter, *iter))
00956 {
00957 findDefault = true;
00958 break;
00959 }
00960 }
00961
00962 if (!findDefault)
00963 throw_exception("Default value doesn't match the enumeration items");
00964 }
00965 }
00966 break;
00967 default:
00968 break;
00969 }
00970 }
00971 }
00972
00973 void
00974 dtd_processor::parseAttrEnumeration(attributeDecl& decl)
00975 {
00976
00977
00978
00979 skip_sign(ch_open_paren, false, false, "Expected open paren symbol");
00980
00981 size_t counter = 0;
00982
00983 while (pick())
00984 {
00985 parsePEReference(true, true);
00986
00987 enumNodeDecl attrEnum(&_doc.get_model_allocator());
00988 attrEnum._id = counter++;
00989 attrEnum._value = parseValue();
00990 decl._enum.push_back(_doc.get_model_allocator(), attrEnum);
00991
00992 skip_white_space();
00993
00994 if (pick() == ch_close_paren)
00995 {
00996 pop();
00997 return;
00998 }
00999
01000 skip_sign(ch_pipe, false, false, "Expected Enum Separator");
01001 }
01002 }
01003
01004 void
01005 dtd_processor::parseDefaultDecl(attributeDecl& decl)
01006 {
01007 switch (pick())
01008 {
01009 case ch_pound:
01010 if (pop() == ch_R)
01011 {
01012 skip_string(str_REQUIRED, "Invalid REQUIRED syntax");
01013 decl._rule = ATTR_RULE_REQUIRED;
01014 }
01015 else if (pick() == ch_I)
01016 {
01017 skip_string(str_IMPLIED, "Invalid IMPLIED syntax");
01018 decl._rule = ATTR_RULE_IMPLIED;
01019 }
01020 else if (pick() == ch_F)
01021 {
01022 skip_string(str_FIXED, "Invalid FIXED syntax");
01023 decl._rule = ATTR_RULE_FIXED;
01024
01025 skip_white_space(true, "Expected white space");
01026
01027
01028
01029 decl._defval = parseQuotedValue(true, false, 0, 0);
01030 }
01031 break;
01032 default:
01033 {
01034
01035 if (decl._atype == ATTR_TYPE_ID)
01036 throw_exception("An ID attribute must have a declared default of #IMPLIED or #REQUIRED");
01037 decl._rule = ATTR_RULE_REQUIRED;
01038
01039
01040 decl._defval = parseQuotedValue(true, false, 0, 0);
01041 }
01042 break;
01043 }
01044 }
01045
01046 void
01047 dtd_processor::parseNotation()
01048 {
01049 skip_string(str_NOTATION, "Invalid NOTATION section syntax");
01050 skip_white_space(true, "Expected white space");
01051 parsePEReference(false, true);
01052
01053 notationDecl& decl = _doc.add_notation_decl(parseName());
01054
01055 skip_white_space(true, "Expected white space");
01056 parsePEReference(false, true);
01057
01058 string_t value_system(_tmp_allocator);
01059 string_t value_public(_tmp_allocator);
01060
01061 parseExternalID(value_system, value_public, false);
01062
01063 decl._publicId = value_public;
01064 decl._systemId = value_system;
01065 skip_sign(ch_close_angle, true, false, "Expected close tag");
01066 }
01067
01068
01069 void
01070 dtd_processor::validate()
01071 {
01072 }
01073
01074 void
01075 dtd_processor::parsePEReference(bool skip_junk_before, bool skip_junk_after)
01076 {
01077
01078 if (skip_junk_before && is_white_space(pick()))
01079 skip_white_space();
01080
01081 if (pick() == ch_percent)
01082 {
01083
01084 _tmp_store2.reset();
01085 expandPEReference(_tmp_store2);
01086 size_t len = 0;
01087 const ub1_t* ptr = _tmp_store2.persist(len);
01088 push(ptr, len);
01089 _tmp_store2.reset();
01090
01091 if (skip_junk_after && is_white_space(pick()))
01092 skip_white_space();
01093 }
01094 }
01095
01096 const entityDecl*
01097 dtd_processor::expandPEReference(paged_buffer& buffer)
01098 {
01099 assert(pick() == ch_percent);
01100 pop();
01101
01102
01103 const entityDecl* entry = _doc.find_entity_decl(parseName());
01104 if (!entry)
01105 throw_exception("Unresolved parameter entity");
01106
01107 skip_sign(ch_semicolon, false, false, "Expected semicolon symbol");
01108
01109
01110 if (entry->_value.length())
01111 buffer << entry->_value;
01112 else if (entry->_systemId.length())
01113
01114 buffer_loader::load(_stream.get_location(), entry->_systemId, _small_pool, _big_pool, buffer, false);
01115
01116 return entry;
01117 }
01118
01119 void
01120 dtd_processor::deterministic_model(const dfa_token* token)
01121 {
01122 }
01123
01124 vt_types
01125 dtd_processor::convert_ctype(const char* x)
01126 {
01127 if (!x)
01128 throw_exception("Expected valid type");
01129
01130
01133
01134 if (*x != ch_v || *++x != ch_t || *++x != ch_underscore)
01135 throw_exception("Unknown ctype");
01136
01137 switch (*++x)
01138 {
01139 case ch_u:
01140 if (*++x != ch_b) throw_exception("Unknown ctype");
01141 switch (*++x)
01142 {
01143 case ch_1:
01144 if (*++x) throw_exception("Unknown ctype");
01145 return vt_ub1;
01146 case ch_2:
01147 if (*++x) throw_exception("Unknown ctype");
01148 return vt_ub2;
01149 case ch_4:
01150 if (*++x) throw_exception("Unknown ctype");
01151 return vt_ub4;
01152 case ch_8:
01153 if (*++x) throw_exception("Unknown ctype");
01154 return vt_ub8;
01155 default:
01156 throw_exception("Unknown ctype");
01157 }
01158 case ch_s:
01159 switch (*++x)
01160 {
01161 case ch_b:
01162 switch (*++x)
01163 {
01164 case ch_1:
01165 if (*++x) throw_exception("Unknown ctype");
01166 return vt_sb1;
01167 case ch_2:
01168 if (*++x) throw_exception("Unknown ctype");
01169 return vt_sb2;
01170 case ch_4:
01171 if (*++x) throw_exception("Unknown ctype");
01172 return vt_sb4;
01173 case ch_8:
01174 if (*++x) throw_exception("Unknown ctype");
01175 return vt_sb8;
01176 default:
01177 throw_exception("Unknown ctype");
01178 }
01179 case ch_t:
01180 if (*++x != ch_r || *++x != ch_i || *++x != ch_n || *++x != ch_g || *++x) throw_exception("Unknown ctype");
01181 return vt_string;
01182 default:
01183 throw_exception("Unknown ctype");
01184 }
01185 case ch_f:
01186 if (*++x != ch_l || *++x != ch_t) throw_exception("Unknown ctype");
01187 switch (*++x)
01188 {
01189 case ch_3:
01190 if (*++x != ch_2 || *++x) throw_exception("Unknown ctype");
01191 return vt_float;
01192 case ch_6:
01193 if (*++x != ch_4 || *++x) throw_exception("Unknown ctype");
01194 return vt_double;
01195 default:
01196 throw_exception("Unknown ctype");
01197 }
01198 case ch_b:
01199 switch (*++x)
01200 {
01201 case ch_o:
01202 if (*++x != ch_o || *++x != ch_l || *++x) throw_exception("Unknown ctype");
01203 return vt_bool;
01204 case ch_i:
01205 if (*++x != ch_n || *++x != ch_a || *++x != ch_r || *++x != ch_y || *++x) throw_exception("Unknown ctype");
01206 return vt_binary;
01207 default:
01208 throw_exception("Unknown ctype");
01209 }
01210 case ch_g:
01211 if (*++x != ch_u || *++x != ch_i || *++x != ch_d || *++x) throw_exception("Unknown ctype");
01212 return vt_guid;
01213 case ch_d:
01214 switch (*++x)
01215 {
01216 case ch_a:
01217 if (*++x != ch_t || *++x != ch_e || *++x) throw_exception("Unknown ctype");
01218 return vt_date;
01219 case ch_e:
01220 if (*++x != ch_c || *++x != ch_i || *++x != ch_m || *++x != ch_a || *++x != ch_l || *++x) throw_exception("Unknown ctype");
01221 return vt_decimal;
01222 default:
01223 throw_exception("Unknown ctype");
01224 }
01225 case ch_w:
01226 if (*++x != ch_s || *++x != ch_t || *++x != ch_r || *++x != ch_i || *++x != ch_n || *++x != ch_g || *++x) throw_exception("Unknown ctype");
01227 return vt_wstring;
01228 case ch_n:
01229 if (*++x != ch_u || *++x != ch_m || *++x != ch_e || *++x != ch_r || *++x != ch_i || *++x != ch_c || *++x) throw_exception("Unknown ctype");
01230 return vt_numeric;
01231 default:
01232 throw_exception("Unknown ctype");
01233 }
01234
01235 return vt_unknown;
01236 }
01237
01238 #pragma pack()
01239 END_TERIMBER_NAMESPACE