00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 #include "xml/dtdxml.h"
00029 #include "xml/declxml.hpp"
00030 #include "xml/defxml.hpp"
00031 #include "xml/mngxml.hpp"
00032 #include "xml/sxml.hpp"
00033 #include "xml/sxs.hpp"
00034 #include "xml/storexml.hpp"
00035 #include "xml/miscxml.hpp"
00036 
00037 #include "base/list.hpp"
00038 #include "base/map.hpp"
00039 #include "base/stack.hpp"
00040 #include "base/string.hpp"
00041 #include "base/common.hpp"
00042 #include "base/memory.hpp"
00043 
00044 BEGIN_TERIMBER_NAMESPACE
00045 #pragma pack(4)
00046 
00047 dtd_processor::dtd_processor(   byte_source& stream, 
00048                                                                 xml_document& doc,
00049                                                                 mem_pool_t& small_pool, 
00050                                                                 mem_pool_t& big_pool,
00051                                                                 size_t xml_size) :
00052         byte_manager(stream, doc, small_pool, big_pool, xml_size)
00053 {
00054 }
00056 
00057 
00058 
00059 
00060 void 
00061 dtd_processor::parse()
00062 {
00063         
00064         parseSubSet(!get_subset() ? ch_close_square : ch_null, get_subset());
00065         validate();
00066         
00067         _doc._standalone = 1;
00068 }
00069 
00070 void 
00071 dtd_processor::parseSubSet(char stopSymbol, bool include_allowed)
00072 {
00073         ub1_t symbol = 0;
00074         while ((symbol = pick()) && symbol != stopSymbol)
00075         {
00076                 switch (symbol)
00077                 {
00078                         
00079                         
00080                         
00081                         case ch_percent:
00082                                 
00083                                 parsePEReference(false, true);
00084                                 break;
00085                         case ch_open_angle:
00086                                 
00087                                 
00088                                 
00089                                 
00090                                 
00091                                 
00092                                 
00093                                 
00094                                 switch (pop()) 
00095                                 {
00096                                         case ch_question:
00097                                                 
00098                                                 parsePI();
00099                                                 break;
00100                                         case ch_bang:
00101                                                 
00102                                                 
00103                                                 
00104                                                 
00105                                                 
00106                                                 switch (pop()) 
00107                                                 {
00108                                                         case ch_E:
00109                                                                 switch (pop())
00110                                                                 {
00111                                                                         case ch_L: 
00112                                                                                 parseElement();
00113                                                                                 break;
00114                                                                         case ch_N: 
00115                                                                                 parseEntity();
00116                                                                                 break;
00117                                                                         default:
00118                                                                                 throw_exception("Invalid ELEMENT or ENTITY sections syntax");
00119                                                                 }
00120                                                                 break;
00121                                                         case ch_A: 
00122                                                                 parseAttrList();
00123                                                                 break;
00124                                                         case ch_N: 
00125                                                                 parseNotation();
00126                                                                 break;
00127                                                         case ch_dash: 
00128                                                                 parseComment();
00129                                                         break;
00130                                                         case ch_open_square: 
00131                                                                 
00132                                                                 if (!include_allowed)
00133                                                                         throw_exception("IGNORE or INCLUDE sections are not allowed in internal dtd subset");
00134 
00135                                                                 pop(); 
00136                                                                 skip_sign(ch_I, true, false, "Expected IGNORE or INCLUDE sections");
00137                                                                 switch (pick()) 
00138                                                                 {
00139                                                                         case ch_G:
00140                                                                                 
00141                                                                                 parseIgnore();
00142                                                                                 break;
00143                                                                         case ch_N:
00144                                                                                 
00145                                                                                 parseInclude();
00146                                                                                 break;
00147                                                                         default:
00148                                                                                 throw_exception("Invalid IGNORE or INCLUDE sections syntax");
00149                                                                                 break;
00150                                                                 } 
00151                                                                 break;
00152                                                         default:
00153                                                                 throw_exception("Invalid markup instruction syntax in the internal dtd section");
00154                                                                 break;
00155                                                 } 
00156                                                 break;
00157                                         default:
00158                                                 throw_exception("Unexpected char in dtd markup language");
00159                                                 break;
00160                                 } 
00161                                 break;
00162                         default: 
00163                                 skip_white_space(true, "White space expected");
00164                                 break;
00165                 } 
00166 
00167                 reset_all_tmp(true);
00168                 _doc.get_tmp_allocator().reset();
00169         } 
00170 
00171 }
00172 
00173 void  
00174 dtd_processor::parseIgnore()
00175 {
00176         
00177         
00178         
00179 
00180         
00181         skip_string(str_IGNORE + 1, "Invalid IGNORE section syntax");
00182         skip_sign(ch_open_square, true, true, "Expected IGNORE Bracket");
00183 
00184         
00185         
00186         
00187         
00188 
00189         
00190     
00191     
00192     size_t depth = 1;
00193         ub1_t symbol = 0;
00194 
00195     while (0 != (symbol = pick()))
00196     {
00197                 switch (symbol)
00198                 {
00199                         case ch_open_angle:
00200                                 if (pop() == ch_bang && pop() == ch_open_square)
00201                                 {
00202                                         pop();
00203                                         ++depth;        
00204                                 }
00205                                 continue;
00206                         case ch_close_square:
00207                                 if (pop() == ch_close_square && pop() == ch_close_angle)
00208                                 {
00209                                         pop();
00210                                         if (!--depth)
00211                                                 return;
00212                                 }
00213                                 continue;
00214                         default:
00215                                 break;
00216         } 
00217 
00218                 pop();
00219                 skip_white_space();
00220     } 
00221 
00222         
00223         throw_exception("Invalid IGNORE section syntax");
00224 }
00225 
00226 void  
00227 dtd_processor::parseInclude()
00228 {
00229         
00230         
00231         skip_string(str_INCLUDE + 1, "Invalid INCLUDE section syntax");
00232         skip_sign(ch_open_square, true, true, "Expected open square tag symbol after INCLUDE section");
00233         parseSubSet(ch_close_square, true);
00234         skip_sign(ch_close_square, true, false, "Expected INCLUDE close bracket");
00235         skip_sign(ch_close_square, false, false, "Expected INCLUDE second close bracket");
00236         skip_sign(ch_close_angle, false, false, "Expected INCLUDE close angle");
00237 }
00238 
00239 
00240 void  
00241 dtd_processor::parseElement()
00242 {
00243         
00244         
00245         skip_string(str_ELEMENT + 1, "Invalid ELEMENT section syntax");
00246         skip_white_space(true, "Expected white space");
00247         
00248         parsePEReference(false, true);
00249 
00250         elementDecl& decl = _doc.add_element_decl(parseName(), false, true, false);
00251         skip_white_space(true, "Expected white space");
00252         parsePEReference(false, false);
00253 
00254     
00255     parseContentSpec(decl);
00256     
00257         skip_sign(ch_close_angle, true, false, "Expected close tag");
00258         _doc.add_element_desc(decl);
00259 }
00260 
00261 void  
00262 dtd_processor::parseContentSpec(elementDecl& decl)
00263 {
00264         skip_white_space();
00265 
00266         switch (pick())
00267         {
00268                 case ch_E:
00269                         skip_string(str_EMPTY, "Invalid EMPTY declaration");
00270                         decl._content = CONTENT_EMPTY;
00271                         break;
00272                 case ch_A:
00273                         skip_string(str_ANY, "Invalid ANY declaration");
00274                         decl._content = CONTENT_ANY;
00275                         break;
00276                 case ch_open_paren:
00277                         pop();
00278                         
00279                         parsePEReference(true, true);
00280                         
00281                         
00282                         
00283                         switch (pick())
00284                         {
00285                                 case ch_pound:
00286                                         skip_string(str__PCDATA, "Invalid PCDATA declaration");
00287                                         decl._content = CONTENT_MIXED;
00288                                         decl._token = parseMixed();
00289                                         return;
00290                                 default: 
00291                                         decl._content = CONTENT_CHILDREN;
00292                                         decl._token = parseChildren();
00293                                         return;
00294                         } 
00295                 default:
00296                         throw_exception("Invalid element content");
00297     } 
00298 
00299         
00300         deterministic_model(decl._token);
00301 }
00302 
00303 dfa_token*  
00304 dtd_processor::parseMixed()
00305 {
00306     
00307     
00308     
00309     
00310         dfa_token* curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_LEAF, 0, 0, 0);
00311         dfa_token* orgToken = curToken;
00312         dfa_token* headToken = orgToken;
00313    
00314         _list< const elementDecl* > uniqueList;
00315         
00316     
00317     
00318     
00319     
00320         bool hasElement = false;
00321         ub1_t symbol = 0;
00322 
00323     while (0 != (symbol = pick()))
00324     {
00325         skip_white_space();   
00326                 switch (symbol = pick())
00327                 {
00328                         case ch_percent:
00329                                 parsePEReference(false, false);
00330                                 break;
00331                         case ch_pipe:   
00332                                 pop(); 
00333                                 parsePEReference(true, true);
00334 
00335                                 
00336                                 {
00337                                         const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00338 
00339                                         
00340                                         if (curToken == orgToken)
00341                                         {
00342                                                 hasElement = true;
00343                                                 curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) 
00344                                                                                                         dfa_token(DFA_CHOICE, 0, curToken, new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) 
00345                                                                                                                                                                                         dfa_token(DFA_LEAF, &decl, 0, 0));
00346                                                 
00347                                                 headToken = curToken;
00348                                         }
00349                                         else
00350                                         {
00351                                                 
00352                                                 for (_list< const elementDecl* >::const_iterator iter = uniqueList.begin(); iter != uniqueList.end(); ++iter)
00353                                                         if (*iter == &decl)
00354                                                                 throw_exception("Dublicate elements in Mixed model");
00355 
00356                                                 dfa_token* oldLast = curToken->_last;
00357                                                 curToken->_last = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00358                                                                                                         dfa_token(DFA_CHOICE, 0, oldLast, new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00359                                                                                                                                                                                         dfa_token(DFA_LEAF, &decl, 0, 0));
00360 
00361                                                 
00362                                                 curToken = curToken->_last;
00363                                         }
00364 
00365                                         
00366                                         uniqueList.push_back(*_tmp_allocator, &decl);
00367                                 }
00368                                 break;
00369                         case ch_close_paren:
00370                                 if (ch_asterisk == pop()) 
00371                                         skip_sign(ch_asterisk, false, false, "Expected asterisk symbol");
00372                                 
00373                 
00374                 
00375                 
00376                 
00377                 if (hasElement) 
00378                                 {
00379                     headToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00380                                                                                                         dfa_token(DFA_ASTERISK, 0, headToken, 0);
00381                 }
00382 
00383                 return headToken;
00384                         default: 
00385                                 throw_exception("Invalid element PCDATA syntax");
00386         } 
00387     } 
00388 
00389         throw_exception("Invalid element PCDATA syntax");
00390         return 0;
00391 }
00392 
00393 dfa_token*
00394 dtd_processor::checkRepeation(ub1_t symbol, dfa_token* token)
00395 {
00396         dfa_token* retVal = token;
00397 
00398         switch (symbol)
00399         {
00400                 case ch_question: 
00401                         retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_QUESTION, 0, token, 0);
00402                         pop();
00403                         break;
00404                 case ch_asterisk: 
00405                         retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_ASTERISK, 0, token, 0);
00406                         pop();
00407                         break;
00408                 case ch_plus: 
00409                         retVal = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) dfa_token(DFA_PLUS, 0, token, 0);
00410                         pop();
00411                         break;
00412                 default:
00413                         break;
00414         }
00415 
00416         return retVal;
00417 }
00418 
00419 dfa_token*
00420 dtd_processor::parseChildren()
00421 {
00422         parsePEReference(true, true);
00423 
00424         ub1_t symbol = 0;
00425         ub1_t type_char = 0;
00426         dfa_token* curToken = 0;
00427         dfa_token* headToken = 0;
00428 
00429         switch (pick())
00430         {
00431                 case ch_open_paren: 
00432                         {
00433                                 pop(); 
00434                                 
00435                                 
00436                                 curToken = parseChildren();
00437                         }
00438                         break;
00439                 default: 
00440                         {
00441                                 const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00442                                 
00443                                 curToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token)))) 
00444                                                                                                         dfa_token(DFA_LEAF, &decl, 0, 0);
00445 
00446                                 parsePEReference(true, true);
00447                                 curToken = checkRepeation(pick(), curToken);
00448                         }
00449         } 
00450 
00451     
00452         parsePEReference(true, true);
00453 
00454         switch (type_char = pick())
00455         {
00456                 case ch_comma:
00457                 case ch_pipe:
00458                         headToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00459                                                 dfa_token(type_char == ch_comma ? DFA_SEQUENCE : DFA_CHOICE, 0, curToken, 0);
00460                         curToken = headToken;
00461                         break;
00462                 case ch_close_paren:
00463                         headToken = curToken;
00464                         pop();
00465                         break;
00466                 default:
00467                         throw_exception("Expected CHOICE or SEQUENCE or CLOSE PAREN syntax");
00468         } 
00469 
00470         if ((type_char == ch_comma) || (type_char == ch_pipe))
00471         {
00472                 dfa_token* lastToken = 0;
00473                 dfaRule typeRule = type_char == ch_comma ? DFA_SEQUENCE : DFA_CHOICE;
00474 
00475                 while (pick())
00476                 {
00477                         parsePEReference(true, true);
00478 
00479                         symbol = pick();
00480 
00481                         if (symbol == ch_close_paren)
00482                         {
00483                                 pop(); 
00484                                 
00485                                 
00486                                 
00487                                 
00488                                 
00489                                 if (!curToken->_last)
00490                                 {
00491                                         dfa_token* oldFirst = curToken->_first;
00492                                         curToken->_first = 0;
00493                                         lastToken->_last = oldFirst;
00494                                         curToken = lastToken;
00495                                 }
00496                                 break;
00497                         }
00498                         else if (symbol == ch_comma || symbol == ch_pipe)
00499                         {
00500                                 if (symbol != type_char)
00501                                         throw_exception("Expected legal CHOICE/SEQUENCE syntax");
00502 
00503                                 pop(); 
00504                                 parsePEReference(true, true);
00505 
00506                                 if ((symbol = pick()) == ch_open_paren)
00507                                 {
00508                                         pop(); 
00509                                         
00510                                         dfa_token* newToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00511                                                                                                 dfa_token(typeRule, 0, parseChildren(), 0);
00512 
00513                                         
00514                                         curToken->_last = newToken;
00515                                         lastToken = curToken;
00516                                         curToken = newToken;
00517                                 }
00518                                 else
00519                                 {
00520                                         
00521                                         
00522                                         
00523                                         
00524                                         const elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00525                                         dfa_token* tmpToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00526                                                                                                 dfa_token(DFA_LEAF, &decl, 0, 0);
00527 
00528                                         parsePEReference(true, true);
00529 
00530                                         tmpToken = checkRepeation(pick(), tmpToken);
00531 
00532                                         
00533                                         
00534                                         
00535                                         
00536                                         
00537                                         
00538                                         dfa_token* newToken = new(check_pointer(_doc.get_model_allocator().allocate(sizeof(dfa_token))))
00539                                                                                                 dfa_token(typeRule, 0, tmpToken, 0);
00540 
00541                                         curToken->_last = newToken;
00542                                         lastToken = curToken;
00543                                         curToken = newToken;
00544                                 }
00545                         } 
00546                         else
00547                                 throw_exception("Expected legal CHOICE/SEQUENCE syntax");
00548                 } 
00549         } 
00550 
00551     
00552     
00553     
00554     
00555     
00556         return checkRepeation(pick(), headToken);
00557 }
00558 
00559 void  
00560 dtd_processor::parseEntity()
00561 {
00562         
00563         
00564         skip_string(str_ENTITY + 1, "Invalid ENTITY section syntax");
00565         skip_white_space(true, "Expected white space");
00566 
00567         
00568         bool bPEDecl = false;
00569         if (pick() == ch_percent)
00570         {
00571                 pop(); 
00572                 skip_white_space(true, "Expected white space");
00573                 bPEDecl = true;
00574         }
00575 
00576 
00577         
00578         bool wasAdded = false;
00579         entityDecl dummy(0, _tmp_allocator);
00580         entityDecl& entry = _doc.add_entity_decl(parseName(), wasAdded);
00581         entityDecl& entity = wasAdded ? entry : dummy;
00582 
00583         entity._is_parameter = bPEDecl;
00584 
00585         skip_white_space(true, "Expected white space");
00586         parsePEReference(false, true);
00587 
00588         
00589         parseEntityDef(entity);
00590         skip_sign(ch_close_angle, true, false, "Expected close tag");
00591         if (wasAdded)
00592                 _doc.add_entity_desc(entry);
00593 }
00594 
00595 void  
00596 dtd_processor::parseEntityDef(entityDecl& decl)
00597 {
00598         
00599         reset_all_tmp();
00600 
00601         ub1_t symbol = 0;
00602         size_t counter = 0;
00603         bool met_xD = false;
00604 
00605         
00606         if ((symbol = pick()) == ch_double_quote || symbol == ch_single_quote)
00607         {
00608                 ub1_t quote = skip_quote(0);
00609                 while (0 != (symbol = pick()))
00610                 {
00611                         
00612                         if(symbol == ch_percent) 
00613                         {
00614                                 
00615                                 
00616                                 
00617                                 if (&decl == expandPEReference(_tmp_store2))
00618                                         throw_exception("Recursive PE is not allowed");
00619 
00620                                 size_t len = 0;
00621                                 const ub1_t* ptr = _tmp_store2.persist(len);
00622                                 push(ptr, len);
00623                                 _tmp_store2.reset();
00624                                 continue;
00625                         }
00626                         else if (symbol == ch_ampersand)
00627                         {
00628                                 symbol = pop(); 
00629                                 if (symbol == ch_pound) 
00630                                         parseCharRef(_tmp_store3);
00631                                 else 
00632                                 {
00633                                         
00634                                         const char* value = parseName();
00635                                         skip_sign(ch_semicolon, false, false, "Expected semicolon after Entity Reference");
00636                                         _tmp_store3 << ch_ampersand << value << ch_semicolon;
00637                                 }
00638                         }
00639                         else if (symbol == quote)
00640                         {
00641                                 pop();
00642                                 
00643                                 if (met_xD) _tmp_store3 << ch_cr;
00644                                 decl._value = _tmp_store3.persist();
00645                                 return;
00646                         }
00647                         else
00648                         {
00649                                 
00650                                 
00651                                 
00652                                 switch (symbol)
00653                                 {
00654                                         case ch_lf: 
00655                                                 _tmp_store3 << ch_lf;
00656                                                 
00657                                                 met_xD = false; 
00658                                                 break;
00659                                         case ch_cr:     
00660                                                 
00661                                                 met_xD = true;
00662                                                 break;
00663                                         default:
00664                                                 if (met_xD) 
00665                                                 {
00666                                                         
00667                                                         _tmp_store3 << ch_lf;
00668                                                         
00669                                                         met_xD = false;
00670                                                 }
00671                         _tmp_store3 << symbol;
00672                                 } 
00673                                 pop();
00674                         }
00675                 } 
00676 
00677                 if (!symbol)
00678                         throw_exception("Invalid ENTITY syntax");
00679         } 
00680 
00681         
00682         skip_white_space();
00683         string_t value_system(_tmp_allocator);
00684         string_t value_public(_tmp_allocator);
00685         parseExternalID(value_system, value_public, true);
00686 
00687         decl._systemId = value_system;
00688         decl._publicId = value_public;
00689 
00690         if (!decl._is_parameter && is_white_space(pick())) 
00691         {
00692                 
00693                 parsePEReference(true, true);
00694 
00695                 if (pick() == ch_N)
00696                 {
00697                         skip_string(str_NDATA, "Invalid NDATA syntax");
00698                         skip_white_space(true, "Expected white space");
00699                         parsePEReference(false, true);
00700 
00701                         decl._notation = parseName();
00702                         decl._is_unparsed = true;
00703                 } 
00704         } 
00705 
00706         decl._is_in_subset = true;
00707 }
00708         
00709 void  
00710 dtd_processor::parseAttrList()
00711 {
00712         skip_string(str_ATTRLIST, "Invalid ATTRLIST section syntax");
00713         
00714         skip_white_space(true, "Expected white space");
00715         parsePEReference(false, true);
00716 
00717 
00718         elementDecl& decl = _doc.add_element_decl(parseName(), false, false, false);
00719 
00720         while (pick())
00721         {
00722                 skip_white_space();
00723 
00724                 switch (pick())
00725                 {
00726                         case ch_close_angle:
00727                                 pop(); 
00728                                 return;
00729                         case ch_percent:
00730                                 parsePEReference(false, false);
00731                                 break;
00732                         default: 
00733                                 parseAttDef(decl);
00734                                 break;
00735                 }
00736         } 
00737 
00738         throw_exception("Invalid ATTLIST syntax");
00739 }
00740 
00741 void  
00742 dtd_processor::parseAttDef(elementDecl& decl)
00743 {
00744         bool wasAdded = false;
00745         attributeDecl dummy(0, _tmp_allocator);
00746 
00747         attributeDecl& new_decl = _doc.add_attribute_decl(decl, parseName(), false, wasAdded);
00748         attributeDecl& attr_decl = wasAdded ? new_decl : dummy;
00749 
00750         skip_white_space(true, "Expected white space");
00751         
00752         parsePEReference(false, false);
00753 
00754         switch (pick())
00755         {
00756                 case ch_C: 
00757                         if (pop() == ch_D)
00758                         {
00759                                 skip_string(str_CDATA + 1, "Invalid CDATA syntax");
00760                                 attr_decl._atype = ATTR_TYPE_CDATA;
00761                                 attr_decl._ctype = vt_string;
00762                         }
00763                         else
00764                         {
00765                                 skip_string(str_CTYPE + 1, "Invalid CTYPE syntax");
00766                                 attr_decl._atype = ATTR_TYPE_CDATA;
00767                                 skip_white_space(true, "Expected white space");
00768                                 attr_decl._ctype = convert_ctype(parseName());
00769                         }
00770                         break;
00771                 case ch_I: 
00772                         skip_string(str_ID, "Invalid CDATA syntax");
00773                         if (pick() != ch_R)
00774                         {
00775                                 attr_decl._atype = ATTR_TYPE_ID;
00776                                 if (wasAdded)
00777                                 {
00778                                         
00779                                         for (attribute_decl_map_t::const_iterator iter = decl._attributes.begin(); iter != decl._attributes.end(); ++iter)
00780                                                 if (&*iter != &new_decl && iter->_atype == ATTR_TYPE_ID)
00781                                                         throw_exception("Dublicate ID type for the same element");
00782                                 }
00783                         }
00784                         else
00785                         {
00786                                 skip_string(str_REF, "Invalid IDREF syntax");
00787                                 if (pick() == ch_S)
00788                                 {
00789                                         attr_decl._atype = ATTR_TYPE_IDREFS;
00790                                         pop(); 
00791                                 }
00792                                 else
00793                                         attr_decl._atype = ATTR_TYPE_IDREF;
00794                         }
00795 
00796                         attr_decl._ctype = vt_string;
00797                         break;
00798                 case ch_E:
00799                         skip_string(str_ENTIT, "Invalid ENTITY syntax");
00800                         if (pick() == ch_Y)
00801                         {
00802                                 attr_decl._atype = ATTR_TYPE_ENTITY;
00803                                 pop(); 
00804                         }
00805                         else if (pick() == ch_I && pop() == ch_E && pop() == ch_S)
00806                         {
00807                                 attr_decl._atype = ATTR_TYPE_ENTITIES;
00808                                 pop(); 
00809                         }
00810                         else
00811                                 throw_exception("Invalid ENTITY syntax");
00812 
00813                         attr_decl._ctype = vt_string;
00814                         break;
00815                 case ch_N:
00816                         if (pop() == ch_M)
00817                         {
00818                                 skip_string(str_NMTOKEN + 1, "Invalid NMTOKEN syntax");
00819                                 if (pick() == ch_S)
00820                                 {
00821                                         attr_decl._atype = ATTR_TYPE_NMTOKENS;
00822                                         pop(); 
00823                                 }
00824                                 else
00825                                         attr_decl._atype = ATTR_TYPE_NMTOKEN;
00826 
00827                                 attr_decl._ctype = vt_string;
00828                         }
00829                         else
00830                         {
00831                                 skip_string(str_NOTATION + 1, "Invalid NOTATION syntax");
00832                                 skip_white_space(true, "Expected white space");
00833                                 attr_decl._atype = ATTR_TYPE_NOTATION;
00834                                 attr_decl._ctype = vt_enum;
00835 
00836                                 if (wasAdded)
00837                                 {
00838                                         
00839                                         if (decl._content == CONTENT_EMPTY)
00840                                                 throw_exception("An attribute of type NOTATION must not be declared on an element declared EMPTY");
00841 
00842                                         
00843                                         for (attribute_decl_map_t::const_iterator iter = decl._attributes.begin(); iter != decl._attributes.end(); ++iter)
00844                                                 if (&*iter != &new_decl && iter->_atype == ATTR_TYPE_NOTATION)
00845                                                         throw_exception("No element type may have more than one NOTATION attribute specified");
00846                                 }
00847 
00848                                 parseAttrEnumeration(attr_decl);
00849                         }
00850                         break;
00851                 case ch_open_paren:
00852                         {
00853                                 attr_decl._atype = ATTR_TYPE_ENUMERATION;
00854                                 attr_decl._ctype = vt_enum;
00855                                 parseAttrEnumeration(attr_decl);
00856                         }
00857                         break;
00858                 default:
00859                         throw_exception("Unexpected char in attribute definition");
00860     } 
00861 
00862         skip_white_space(true, "Expected white space");
00863         parsePEReference(false, true);
00864 
00865     
00866     parseDefaultDecl(attr_decl);
00867 
00868         
00869         if (!wasAdded)
00870                 return;
00871 
00872         if (attr_decl._atype == ATTR_TYPE_ID && attr_decl._rule != ATTR_RULE_IMPLIED && attr_decl._rule != ATTR_RULE_REQUIRED)
00873                 throw_exception("Invalid AttrDef syntax");
00874 
00875         
00876         
00877         
00878         
00879         if (!strcmp(str_xml_space, attr_decl._name))
00880         {
00881                 if (attr_decl._atype == ATTR_TYPE_ENUMERATION)
00882                 {
00883                         size_t count = attr_decl._enum.size();
00884 
00885                         if (count < 1 || count > 2)
00886                                 throw_exception("Invalid xml space syntax");
00887 
00888                         if (count == 1 && 
00889                                 !(      attr_decl._enum.front()._value == str_default 
00890                                         
00891                                         || attr_decl._enum.front()._value == str_preserve)
00892                                 )
00893                                 throw_exception("Invalid xml space syntax");
00894 
00895                         if (count == 2 && 
00896                                 !(attr_decl._enum.front()._value == str_default
00897                                         && attr_decl._enum.back()._value == str_preserve
00898                                         || attr_decl._enum.back()._value == str_default
00899                                         && attr_decl._enum.front()._value == str_preserve)
00900                                 )
00901                                 throw_exception("Invalid xml space syntax");
00902                 }
00903                 else if (attr_decl._atype == ATTR_TYPE_CDATA && attr_decl._rule == ATTR_RULE_FIXED)
00904                 {
00905                         if (attr_decl._defval != str_default
00906                                 && attr_decl._defval != str_preserve)
00907                                 throw_exception("Invalid xml space syntax");
00908                 }
00909                 else
00910                         throw_exception("Invalid xml space syntax");
00911         }
00912         
00913         
00914         if (attr_decl._defval.length())
00915         {
00916                 const char* value_ = attr_decl._defval;
00917                 switch (attr_decl._atype)
00918                 {
00919                         case ATTR_TYPE_ENUMERATION:
00920                                 {
00921                                         bool findDefault = false;
00922                                         for (_list< enumNodeDecl >::const_iterator iter = attr_decl._enum.begin(); iter != attr_decl._enum.end(); ++iter)
00923                                         {
00924                                                 if (!findDefault && attr_decl._defval == iter->_value)
00925                                                 {
00926                             findDefault = true;
00927                                                         break;
00928                                                 }
00929                                         }
00930 
00931                                         if (!findDefault)
00932                                                 throw_exception("Default value doesn't match the enumeration items");
00933                                 }
00934                                 break;
00935                         case ATTR_TYPE_ID:
00936                         case ATTR_TYPE_NMTOKEN:
00937                                 
00938                                 break;
00939                         case ATTR_TYPE_NMTOKENS:
00940                                 {
00941                                         _list< const char* > values;
00942                                         tokenValues(value_, values, *_tmp_allocator);
00943                                         _list< const char* > defvalues;
00944                                         tokenValues(attr_decl._defval, defvalues, *_tmp_allocator); 
00945 
00946                                         if (values.empty())
00947                                                 throw_exception("Invalid ENTITIES default value syntax");
00948 
00949                                         for (_list< const char* >::const_iterator iter = values.begin(); iter != values.end(); ++iter)
00950                                         {
00951                                                 
00952                                                 bool findDefault = false;
00953                                                 for (_list< const char* >::const_iterator defiter = defvalues.begin(); defiter != defvalues.end(); ++defiter)
00954                                                 {
00955                                                         if (!findDefault && !strcmp(*defiter, *iter))
00956                                                         {
00957                                                                 findDefault = true;
00958                                                                 break;
00959                                                         }
00960                                                 }
00961                                         
00962                                                 if (!findDefault)
00963                                                         throw_exception("Default value doesn't match the enumeration items");
00964                                         }
00965                                 }
00966                                 break;
00967                         default:
00968                                 break;
00969                 } 
00970         } 
00971 }
00972 
00973 void  
00974 dtd_processor::parseAttrEnumeration(attributeDecl& decl)
00975 {
00976         
00977         
00978 
00979         skip_sign(ch_open_paren, false, false, "Expected open paren symbol");
00980 
00981         size_t counter = 0;
00982 
00983     while (pick())
00984     {
00985                 parsePEReference(true, true);
00986 
00987                 enumNodeDecl attrEnum(&_doc.get_model_allocator());
00988                 attrEnum._id = counter++;
00989                 attrEnum._value = parseValue();
00990                 decl._enum.push_back(_doc.get_model_allocator(), attrEnum);
00991 
00992                 skip_white_space();
00993         
00994                 if (pick() == ch_close_paren)
00995                 {
00996                         pop();
00997                         return;
00998                 }
00999 
01000                 skip_sign(ch_pipe, false, false, "Expected Enum Separator");
01001     }
01002 }
01003 
01004 void  
01005 dtd_processor::parseDefaultDecl(attributeDecl& decl)
01006 {
01007         switch (pick())
01008         {
01009                 case ch_pound: 
01010                         if (pop() == ch_R)
01011                         {
01012                                 skip_string(str_REQUIRED, "Invalid REQUIRED syntax");
01013                                 decl._rule = ATTR_RULE_REQUIRED;
01014                         }
01015                         else if (pick() == ch_I)
01016                         {
01017                                 skip_string(str_IMPLIED, "Invalid IMPLIED syntax");
01018                                 decl._rule = ATTR_RULE_IMPLIED;
01019                         }
01020                         else if (pick() == ch_F) 
01021                         {
01022                                 skip_string(str_FIXED, "Invalid FIXED syntax");
01023                                 decl._rule = ATTR_RULE_FIXED;
01024 
01025                                 skip_white_space(true, "Expected white space");
01026                                 
01027                                 
01028                                 
01029                                 decl._defval = parseQuotedValue(true, false, 0, 0);
01030                         }
01031                         break;
01032                 default:
01033                         {
01034                                 
01035                                 if (decl._atype == ATTR_TYPE_ID)
01036                                         throw_exception("An ID attribute must have a declared default of #IMPLIED or #REQUIRED");
01037                                 decl._rule = ATTR_RULE_REQUIRED;
01038                                 
01039                                 
01040                                 decl._defval = parseQuotedValue(true, false, 0, 0);
01041                         }
01042                         break;
01043         } 
01044 }
01045 
01046 void  
01047 dtd_processor::parseNotation()
01048 {
01049         skip_string(str_NOTATION, "Invalid NOTATION section syntax");
01050         skip_white_space(true, "Expected white space");
01051         parsePEReference(false, true);
01052 
01053         notationDecl& decl = _doc.add_notation_decl(parseName());
01054 
01055         skip_white_space(true, "Expected white space");
01056         parsePEReference(false, true);
01057 
01058         string_t value_system(_tmp_allocator);
01059         string_t value_public(_tmp_allocator);
01060 
01061         parseExternalID(value_system, value_public, false);
01062 
01063         decl._publicId = value_public;
01064         decl._systemId = value_system;
01065         skip_sign(ch_close_angle, true, false, "Expected close tag");
01066 }
01067 
01068 
01069 void  
01070 dtd_processor::validate()
01071 {
01072 }
01073 
01074 void  
01075 dtd_processor::parsePEReference(bool skip_junk_before, bool skip_junk_after)
01076 {
01077         
01078         if (skip_junk_before && is_white_space(pick()))
01079                 skip_white_space();
01080 
01081         if (pick() == ch_percent)
01082         {
01083                 
01084                 _tmp_store2.reset();
01085                 expandPEReference(_tmp_store2);
01086                 size_t len = 0;
01087                 const ub1_t* ptr = _tmp_store2.persist(len);
01088                 push(ptr, len);
01089                 _tmp_store2.reset();
01090 
01091                 if (skip_junk_after && is_white_space(pick()))
01092                         skip_white_space();
01093         }
01094 }
01095 
01096 const entityDecl*  
01097 dtd_processor::expandPEReference(paged_buffer& buffer)
01098 {
01099         assert(pick() == ch_percent);
01100         pop(); 
01101         
01102         
01103         const entityDecl* entry = _doc.find_entity_decl(parseName());
01104         if (!entry)
01105                 throw_exception("Unresolved parameter entity");
01106 
01107         skip_sign(ch_semicolon, false, false, "Expected semicolon symbol");
01108 
01109         
01110         if (entry->_value.length())
01111                 buffer << entry->_value;
01112         else if (entry->_systemId.length())
01113                 
01114                 buffer_loader::load(_stream.get_location(), entry->_systemId, _small_pool, _big_pool, buffer, false);
01115 
01116         return entry;
01117 }
01118 
01119 void 
01120 dtd_processor::deterministic_model(const dfa_token* token)
01121 {
01122 }
01123 
01124 vt_types 
01125 dtd_processor::convert_ctype(const char* x)
01126 {
01127         if (!x)
01128                 throw_exception("Expected valid type");
01129 
01130         
01133         
01134         if (*x != ch_v || *++x != ch_t || *++x != ch_underscore)
01135                 throw_exception("Unknown ctype");
01136 
01137         switch (*++x)
01138         {
01139                 case ch_u: 
01140                         if (*++x != ch_b) throw_exception("Unknown ctype");
01141                         switch (*++x)
01142                         {
01143                                 case ch_1:
01144                                         if (*++x) throw_exception("Unknown ctype");
01145                                         return vt_ub1;
01146                                 case ch_2:
01147                                         if (*++x) throw_exception("Unknown ctype");
01148                                         return vt_ub2;
01149                                 case ch_4:
01150                                         if (*++x) throw_exception("Unknown ctype");
01151                                         return vt_ub4;
01152                                 case ch_8:
01153                                         if (*++x) throw_exception("Unknown ctype");
01154                                         return vt_ub8;
01155                                 default:
01156                                         throw_exception("Unknown ctype");
01157                         }
01158                 case ch_s: 
01159                         switch (*++x)
01160                         {
01161                                 case ch_b: 
01162                                         switch (*++x)
01163                                         {
01164                                                 case ch_1:
01165                                                         if (*++x) throw_exception("Unknown ctype");
01166                                                         return vt_sb1;
01167                                                 case ch_2:
01168                                                         if (*++x) throw_exception("Unknown ctype");
01169                                                         return vt_sb2;
01170                                                 case ch_4:
01171                                                         if (*++x) throw_exception("Unknown ctype");
01172                                                         return vt_sb4;
01173                                                 case ch_8:
01174                                                         if (*++x) throw_exception("Unknown ctype");
01175                                                         return vt_sb8;
01176                                                 default:
01177                                                         throw_exception("Unknown ctype");
01178                                         }
01179                                 case ch_t:
01180                                         if (*++x != ch_r || *++x != ch_i || *++x != ch_n || *++x != ch_g || *++x) throw_exception("Unknown ctype");
01181                                         return vt_string;
01182                                 default:
01183                                         throw_exception("Unknown ctype");
01184                         }
01185                 case ch_f: 
01186                         if (*++x != ch_l || *++x != ch_t) throw_exception("Unknown ctype");
01187                         switch (*++x)
01188                         {
01189                                 case ch_3:
01190                                         if (*++x != ch_2 || *++x) throw_exception("Unknown ctype");
01191                                         return vt_float;
01192                                 case ch_6:
01193                                         if (*++x != ch_4 || *++x) throw_exception("Unknown ctype");
01194                                         return vt_double;
01195                                 default:
01196                                         throw_exception("Unknown ctype");
01197                         }
01198                 case ch_b:
01199                         switch (*++x)
01200                         {
01201                                 case ch_o:
01202                                         if (*++x != ch_o || *++x != ch_l || *++x) throw_exception("Unknown ctype");
01203                                         return vt_bool;
01204                                 case ch_i:
01205                                         if (*++x != ch_n || *++x != ch_a || *++x != ch_r || *++x != ch_y || *++x) throw_exception("Unknown ctype");
01206                                         return vt_binary;
01207                                 default:
01208                                         throw_exception("Unknown ctype");
01209                         }
01210                 case ch_g:
01211                         if (*++x != ch_u || *++x != ch_i || *++x != ch_d || *++x) throw_exception("Unknown ctype");
01212                         return vt_guid;
01213                 case ch_d:
01214                         switch (*++x)
01215                         {
01216                                 case ch_a:
01217                                         if (*++x != ch_t || *++x != ch_e || *++x) throw_exception("Unknown ctype");
01218                                         return vt_date;
01219                                 case ch_e:
01220                                         if (*++x != ch_c || *++x != ch_i || *++x != ch_m || *++x != ch_a || *++x != ch_l || *++x) throw_exception("Unknown ctype");
01221                                         return vt_decimal;
01222                                 default:
01223                                         throw_exception("Unknown ctype");
01224                         }
01225                 case ch_w:
01226                         if (*++x != ch_s || *++x != ch_t || *++x != ch_r || *++x != ch_i || *++x != ch_n || *++x != ch_g || *++x) throw_exception("Unknown ctype");
01227                         return vt_wstring;
01228                 case ch_n:
01229                         if (*++x != ch_u || *++x != ch_m || *++x != ch_e || *++x != ch_r || *++x != ch_i || *++x != ch_c || *++x) throw_exception("Unknown ctype");
01230                         return vt_numeric;
01231                 default:
01232                         throw_exception("Unknown ctype");
01233         } 
01234 
01235         return vt_unknown;
01236 }
01237 
01238 #pragma pack()
01239 END_TERIMBER_NAMESPACE