00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "xml/xmlmodel.hpp"
00032 #include "xml/sxml.hpp"
00033 #include "xml/sxs.hpp"
00034
00035 #include "base/common.hpp"
00036 #include "base/memory.hpp"
00037 #include "base/list.hpp"
00038 #include "base/vector.hpp"
00039 #include "base/stack.hpp"
00040 #include "base/map.hpp"
00041 #include "base/bitset.hpp"
00042 #include "base/string.hpp"
00043
00044 BEGIN_TERIMBER_NAMESPACE
00045 #pragma pack(4)
00047 content_node::content_node(dfaRule rule) :
00048 _rule(rule), _max_states(0xffffffff)
00049 {
00050 }
00051
00053 content_any::content_any(dfaRule rule, size_t pos) :
00054 content_node(rule), _pos(pos)
00055 {
00056 assert(rule == DFA_ANY);
00057 }
00058
00059
00060 void
00061 content_any::calc_pos(_bitset& state, bool) const
00062 {
00063
00064 if (_pos == os_minus_one)
00065 state.reset();
00066 else
00067 state.set(_pos, true);
00068 }
00069
00070
00071 bool
00072 content_any::is_nullable() const
00073 {
00074 return _pos == os_minus_one;
00075 }
00076
00078 content_leaf::content_leaf(const elementDecl* decl, size_t pos) :
00079 content_node(DFA_LEAF), _decl(decl), _pos(pos)
00080 {
00081 }
00082
00083
00084 void
00085 content_leaf::calc_pos(_bitset& state, bool) const
00086 {
00087
00088 if (_pos == os_minus_one)
00089 state.reset();
00090 else
00091 state.set(_pos, true);
00092 }
00093
00094
00095 bool
00096 content_leaf::is_nullable() const
00097 {
00098 return _pos == os_minus_one;
00099 }
00100
00102 content_unary::content_unary(dfaRule rule, content_node* child) :
00103 content_node(rule), _child(child)
00104 {
00105 assert(rule == DFA_QUESTION || rule == DFA_ASTERISK || rule == DFA_PLUS);
00106 }
00107
00108
00109 bool
00110 content_unary::is_nullable() const
00111 {
00112 return _rule == DFA_PLUS ? _child->is_nullable() : true;
00113 }
00115 content_binary::content_binary(dfaRule rule, content_node* left, content_node* right) :
00116 content_node(rule), _left(left), _right(right)
00117 {
00118 assert(rule == DFA_CHOICE || rule == DFA_SEQUENCE);
00119 }
00120
00121
00122 void
00123 content_binary::calc_pos(_bitset& state, bool first) const
00124 {
00125 if (_rule == DFA_CHOICE)
00126 {
00127 state = first ? _left->get_firstPos() : _left->get_lastPos();
00128 state |= first ? _right->get_firstPos() : _right->get_lastPos();
00129 }
00130 else if (_rule == DFA_SEQUENCE)
00131 {
00132 state = first ? _left->get_firstPos() : _right->get_lastPos();
00133 if (first ? _left->is_nullable() : _right->is_nullable())
00134 state |= first ? _right->get_firstPos() : _left->get_lastPos();
00135 }
00136 }
00137
00138
00139 bool
00140 content_binary::is_nullable() const
00141 {
00142 return _rule == DFA_CHOICE ? (_left->is_nullable() || _right->is_nullable()) :
00143 (_left->is_nullable() && _right->is_nullable());
00144 }
00145
00146
00148
00149 content_interface::~content_interface()
00150 {
00151 }
00152
00154 leaf_type::leaf_type() :
00155 _decl(0), _rule(dfaRule_MIN)
00156 {
00157 }
00158
00160 content_mixed::content_mixed(const dfa_token* parent, byte_allocator& tmp_allocator) :
00161 _tmp_allocator(tmp_allocator)
00162 {
00163 build_mixed_list(parent);
00164 }
00165
00166 void
00167 content_mixed::build_mixed_list(const dfa_token* parent)
00168 {
00169
00170 switch (parent->_rule)
00171 {
00172 case DFA_LEAF:
00173 case DFA_ANY:
00174 _listToken.push_back(_tmp_allocator, parent);
00175 break;
00176 case DFA_CHOICE:
00177 case DFA_SEQUENCE:
00178 {
00179
00180 build_mixed_list(parent->_first);
00181
00182 if (parent->_last)
00183 build_mixed_list(parent->_last);
00184 }
00185 break;
00186 case DFA_QUESTION:
00187 case DFA_ASTERISK:
00188 case DFA_PLUS:
00189 build_mixed_list(parent->_first);
00190 break;
00191 default:
00192 assert(false);
00193 }
00194 }
00195
00196 void
00197 content_mixed::validate(const xml_element& el)
00198 {
00199 if (!el.has_children())
00200 return;
00201
00202 for (const xml_tree_node* iterElement = el._first_child; iterElement; iterElement = iterElement->_right)
00203 {
00204 if (iterElement->_decl->get_type() != ELEMENT_NODE)
00205 continue;
00206
00207 const elementDecl* decl = xml_element::cast_to_element(iterElement)->cast_decl();
00208 bool find = false;
00209
00210 for (_list< const dfa_token* >::const_iterator iterToken = _listToken.begin(); iterToken != _listToken.end(); ++iterToken)
00211 {
00212 if ((*iterToken)->_rule == DFA_LEAF)
00213 {
00214 if (!(*iterToken)->_decl)
00215 continue;
00216
00217 if ((*iterToken)->_decl == decl)
00218 {
00219 find = true;
00220 break;
00221 }
00222 }
00223 else
00224 {
00225
00226 assert(false);
00227 }
00228 }
00229
00230 if (!find)
00231 {
00232 string_t ex = "Unknown element found: ";
00233 ex += decl->_name;
00234 ex += " beneath parent element: ";
00235 ex += el._decl->_name;
00236 exception::_throw(ex);
00237 }
00238 }
00239 }
00240
00242 content_children::content_children(const dfa_token* parent, byte_allocator& tmp_allocator) :
00243 _tmp_allocator(tmp_allocator),
00244 _leafCount(0), _EOCPos(0),
00245 _emptyOk(false), _elemMapSize(0),
00246 _transTableSize(0)
00247 {
00248 build_dfa(parent);
00249 }
00250
00251 content_node*
00252 content_children::build_children_tree(const dfa_token* parent)
00253 {
00254
00255 const dfaRule curRule = parent->_rule;
00256
00257 switch (curRule)
00258 {
00259 case DFA_ANY:
00260 return new(_tmp_allocator.allocate(sizeof(content_any))) content_any(curRule, _leafCount++);
00261 case DFA_LEAF:
00262 return new(_tmp_allocator.allocate(sizeof(content_leaf))) content_leaf(parent->_decl, _leafCount++);
00263 case DFA_CHOICE:
00264 case DFA_SEQUENCE:
00265 {
00266 content_node* left = build_children_tree(parent->_first);
00267 content_node* right = build_children_tree(parent->_last);
00268 return new(_tmp_allocator.allocate(sizeof(content_binary))) content_binary(curRule, left, right);
00269 }
00270 case DFA_QUESTION:
00271 case DFA_ASTERISK:
00272 case DFA_PLUS:
00273 return new(_tmp_allocator.allocate(sizeof(content_unary))) content_unary(curRule, build_children_tree(parent->_first));
00274 default:
00275 assert(false);
00276 }
00277
00278 return 0;
00279 }
00280
00281 size_t
00282 content_children::post_tree_build_init(content_node* nodeCur, size_t curIndex)
00283 {
00284
00285 nodeCur->init(_tmp_allocator, _leafCount);
00286
00287
00288 dfaRule curRule = nodeCur->get_rule();
00289
00290
00291 size_t newIndex = curIndex;
00292
00293
00294 switch (curRule)
00295 {
00296 case DFA_ANY:
00297 _leafList[newIndex] = new(_tmp_allocator.allocate(sizeof(content_leaf))) content_leaf(0, ((content_any*)nodeCur)->get_pos());
00298 _leafListType[newIndex] = curRule;
00299 return ++newIndex;
00300 case DFA_CHOICE:
00301 case DFA_SEQUENCE:
00302
00303 newIndex = post_tree_build_init(((content_binary*)nodeCur)->get_left(), newIndex);
00304 return post_tree_build_init(((content_binary*)nodeCur)->get_right(), newIndex);
00305 case DFA_ASTERISK:
00306 case DFA_QUESTION:
00307 case DFA_PLUS:
00308
00309 return post_tree_build_init(((content_unary*)nodeCur)->get_child(), newIndex);
00310 case DFA_LEAF:
00311
00312
00313 if (((content_leaf*)nodeCur)->get_decl())
00314 {
00315 _leafList[newIndex] = new(_tmp_allocator.allocate(sizeof(content_leaf))) content_leaf(((content_leaf*)nodeCur)->get_decl(),
00316 ((content_leaf*)nodeCur)->get_pos());
00317 _leafListType[newIndex] = curRule;
00318 return ++newIndex;
00319 }
00320 break;
00321 default:
00322 assert(false);
00323 break;
00324 }
00325
00326 return newIndex;
00327 }
00328
00329 void
00330 content_children::calc_follow_list(content_node* curNode)
00331 {
00332
00333 switch (curNode->get_rule())
00334 {
00335 case DFA_CHOICE:
00336
00337 calc_follow_list(((content_binary*)curNode)->get_left());
00338 calc_follow_list(((content_binary*)curNode)->get_right());
00339 break;
00340 case DFA_SEQUENCE:
00341 {
00342
00343 calc_follow_list(((content_binary*)curNode)->get_left());
00344 calc_follow_list(((content_binary*)curNode)->get_right());
00345
00346
00347 const _bitset& last = ((content_binary*)curNode)->get_left()->get_lastPos();
00348 const _bitset& first = ((content_binary*)curNode)->get_right()->get_firstPos();
00349
00350
00351
00352
00353 for (size_t index = 0; index < _leafCount; ++index)
00354 {
00355 if (last.get(index))
00356 _followList[index] |= first;
00357 }
00358 }
00359 break;
00360 case DFA_ASTERISK:
00361 case DFA_PLUS:
00362 {
00363
00364 calc_follow_list(((content_unary*)curNode)->get_child());
00365
00366
00367
00368 const _bitset& first = curNode->get_firstPos();
00369 const _bitset& last = curNode->get_lastPos();
00370
00371
00372
00373
00374 for (size_t index = 0; index < _leafCount; ++index)
00375 {
00376 if (last.get(index))
00377 _followList[index] |= first;
00378 }
00379 }
00380 break;
00381 case DFA_QUESTION:
00382
00383 calc_follow_list(((content_unary*)curNode)->get_child());
00384 break;
00385 default:
00386 break;
00387 }
00388 }
00389
00390 void
00391 content_children::build_dfa(const dfa_token* parent)
00392 {
00393 size_t index;
00394
00395
00396
00397
00398
00399
00400
00401
00402 elementDecl fakeDecl(0, &_tmp_allocator);
00403 content_leaf* nodeEOC = new(_tmp_allocator.allocate(sizeof(content_leaf))) content_leaf(&fakeDecl, 0xffffffff);
00404 content_node* nodeOrgContent = build_children_tree(parent);
00405 content_node* headNode = new(_tmp_allocator.allocate(sizeof(content_binary)))content_binary(DFA_SEQUENCE, nodeOrgContent, nodeEOC);
00406
00407
00408
00409
00410
00411
00412
00413 _EOCPos = _leafCount;
00414 nodeEOC->set_pos(_leafCount++);
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431 _leafList.resize(_tmp_allocator, _leafCount);
00432 _leafListType.resize(_tmp_allocator, _leafCount);
00433
00434 post_tree_build_init(headNode, 0);
00435
00436
00437
00438
00439
00440
00441 _followList.resize(_tmp_allocator, _leafCount);
00442
00443 for (index = 0; index < _leafCount; ++index)
00444 _followList[index].resize(_tmp_allocator, _leafCount);
00445
00446 calc_follow_list(headNode);
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456 _emptyOk = nodeOrgContent->is_nullable();
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469 _elemMap.resize(_tmp_allocator, _leafCount);
00470 _elemMapType.resize(_tmp_allocator, _leafCount);
00471 _elemMapSize = 0;
00472
00473 bool init_LeafNameTypeVector = false;
00474 for (size_t outIndex = 0; outIndex < _leafCount; ++outIndex)
00475 {
00476 if (_leafListType[outIndex] != DFA_LEAF)
00477 if (!init_LeafNameTypeVector)
00478 init_LeafNameTypeVector = true;
00479
00480
00481 const elementDecl* decl = _leafList[outIndex]->get_decl();
00482
00483
00484 size_t inIndex = 0;
00485
00486 for (; inIndex < _leafCount; ++inIndex)
00487 {
00488 const elementDecl* inDecl = _elemMap[inIndex];
00489 if (inDecl == decl)
00490 break;
00491 }
00492
00493
00494 if (inIndex == _leafCount)
00495 {
00496 _elemMap[_elemMapSize] = decl;
00497 _elemMapType[_elemMapSize] = _leafListType[outIndex];
00498 ++_elemMapSize;
00499 }
00500
00501 }
00502
00503 _elemMap.reduce(_elemMapSize);
00504 _elemMapType.reduce(_elemMapSize);
00505
00506
00507
00508 if (init_LeafNameTypeVector)
00509 {
00510 _leafNameTypeVector.resize(_tmp_allocator, _elemMapSize);
00511 for (size_t indexCopy = 0; indexCopy < _elemMapSize; ++indexCopy)
00512 {
00513 _leafNameTypeVector[indexCopy]._decl = _elemMap[indexCopy];
00514 _leafNameTypeVector[indexCopy]._rule = _elemMapType[indexCopy];
00515 }
00516 }
00517
00518 _vector< size_t > leafSorter;
00519 leafSorter.resize(_tmp_allocator, _leafCount + _elemMapSize);
00520 size_t sortCount = 0;
00521
00522 for (size_t elemIndex = 0; elemIndex < _elemMapSize; ++elemIndex)
00523 {
00524 const elementDecl* decl = _elemMap[elemIndex];
00525
00526 for (size_t leafIndex = 0; leafIndex < _leafCount; ++leafIndex)
00527 {
00528 const elementDecl* leaf = _leafList[leafIndex]->get_decl();
00529 if (leaf == decl)
00530 leafSorter[sortCount++] = leafIndex;
00531 }
00532
00533 leafSorter[sortCount++] = os_minus_one;
00534 }
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547 size_t curArraySize = _leafCount * 4;
00548 _vector< _bitset > statesToDo;
00549 statesToDo.resize(_tmp_allocator, curArraySize);
00550
00551 _finalStateFlags.resize(_tmp_allocator, curArraySize);
00552
00553 _transTable.resize(_tmp_allocator, curArraySize);
00554
00555
00556
00557
00558
00559 _bitset setT;
00560 setT.assign(_tmp_allocator, headNode->get_firstPos());
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570 size_t unmarkedState = 0;
00571 size_t curState = 0;
00572
00573
00574
00575
00576
00577 make_def_state_list(_transTable[curState]);
00578 statesToDo[curState].assign(_tmp_allocator, setT);
00579 ++curState;
00580
00581
00582
00583
00584
00585
00586 _map< _bitset, size_t > stateTable;
00587
00588
00589
00590
00591
00592
00593 _bitset newSet;
00594 while (unmarkedState < curState)
00595 {
00596
00597
00598
00599
00600 setT.assign(_tmp_allocator, statesToDo[unmarkedState]);
00601 _vector< size_t >& transEntry = _transTable[unmarkedState];
00602
00603
00604 _finalStateFlags[unmarkedState] = setT.get(_EOCPos);
00605
00606
00607 ++unmarkedState;
00608
00609 size_t sorterIndex = 0;
00610
00611
00612 for (size_t elemIndex = 0; elemIndex < _elemMapSize; ++elemIndex)
00613 {
00614
00615
00616
00617
00618
00619
00620 if (newSet.empty())
00621 newSet.resize(_tmp_allocator, _leafCount);
00622 else
00623 newSet.reset();
00624
00625 size_t leafIndex = leafSorter[sorterIndex++];
00626
00627 while (leafIndex != os_minus_one)
00628 {
00629
00630 if (setT.get(leafIndex))
00631 {
00632
00633
00634
00635
00636
00637 newSet |= _followList[leafIndex];
00638 }
00639
00640 leafIndex = leafSorter[sorterIndex++];
00641 }
00642
00643
00644
00645
00646
00647 if (!newSet.empty())
00648 {
00649
00650
00651
00652
00653
00654 _map< _bitset, size_t >::iterator iter = stateTable.find(newSet);
00655
00656 size_t stateIndex = curState;
00657
00658 if (iter == stateTable.end())
00659 {
00660
00661
00662
00663
00664
00665 statesToDo[curState].assign(_tmp_allocator, newSet);
00666 make_def_state_list(_transTable[curState]);
00667 iter = stateTable.insert(_tmp_allocator, newSet, curState).first;
00668
00669
00670 ++curState;
00671
00672
00673
00674
00675
00676
00677 newSet.clear();
00678 }
00679 else
00680 stateIndex = *iter;
00681
00682
00683
00684
00685
00686
00687
00688 transEntry[elemIndex] = stateIndex;
00689
00690
00691 if (curState == curArraySize)
00692 {
00693
00694
00695
00696
00697
00698 curArraySize *= 4;
00699 _finalStateFlags.resize(_tmp_allocator, curArraySize);
00700 _transTable.resize(_tmp_allocator, curArraySize);
00701 statesToDo.resize(_tmp_allocator, curArraySize);
00702 }
00703 }
00704 }
00705 }
00706
00707
00708 _transTableSize = curState;
00709 }
00710
00711 void
00712 content_children::validate(const xml_element& el)
00713 {
00714
00715
00716
00717
00718
00719 if (!el.has_children())
00720 {
00721
00722 if (!_emptyOk)
00723 {
00724 string_t ex = "Parent element: ";
00725 ex += el._decl->_name;
00726 ex += " must contain child elements";
00727 exception::_throw(ex);
00728 }
00729
00730 return;
00731 }
00732
00733
00734
00735
00736
00737
00738 size_t curState = 0;
00739 size_t nextState = 0;
00740
00741 for (const xml_tree_node* iterElement = el._first_child; iterElement; iterElement = iterElement->_right)
00742 {
00743 if (iterElement->_decl->get_type() != ELEMENT_NODE)
00744 continue;
00745
00746
00747 const elementDecl* curDecl = xml_element::cast_to_element(iterElement)->cast_decl();
00748
00749 if (curDecl->_content == CONTENT_ANY)
00750 continue;
00751
00752
00753 size_t elemIndex = 0;
00754 for (; elemIndex < _elemMapSize; ++elemIndex)
00755 {
00756 const elementDecl* inElem = _elemMap[elemIndex];
00757 if (inElem == curDecl)
00758 {
00759 nextState = _transTable[curState][elemIndex];
00760 if (nextState != os_minus_one)
00761 break;
00762 }
00763 }
00764
00765
00766 if (nextState == os_minus_one)
00767 {
00768 string_t ex("Invalid child element order: ");
00769 ex += curDecl->_name;
00770 ex += " beneath parent element: ";
00771 ex += el._decl->_name;
00772 exception::_throw(ex);
00773 }
00774
00775
00776 if (elemIndex == _elemMapSize)
00777 {
00778 string_t ex("Unexpected child element: ");
00779 ex += curDecl->_name;
00780 ex += " beneath parent element: ";
00781 ex += el._decl->_name;
00782 exception::_throw(ex);
00783 }
00784
00785 curState = nextState;
00786 nextState = 0;
00787 }
00788
00789
00790
00791
00792
00793
00794 if (!_finalStateFlags[curState])
00795 {
00796 string_t ex("Expected more child elements beneath parent element: ");
00797 ex += el._decl->_name;
00798 exception::_throw(ex);
00799 }
00800 }
00801
00802 #pragma pack()
00803 END_TERIMBER_NAMESPACE
00804