00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "base/memory.hpp"
00029 #include "base/list.hpp"
00030 #include "base/string.hpp"
00031 #include "base/vector.hpp"
00032 #include "base/common.hpp"
00033
00034 #include "xml/miscxml.hpp"
00035 #include "xml/declxml.hpp"
00036 #include "xml/defxml.hpp"
00037
00038 BEGIN_TERIMBER_NAMESPACE
00039 #pragma pack(4)
00040
00041 static const char protocolHTTP[] = { ch_h, ch_t, ch_t, ch_p, ch_null };
00042 static const char protocolFTP[] = { ch_f, ch_t, ch_p, ch_null };
00043 static const char protocolFILE[] = { ch_f, ch_i, ch_l, ch_e, ch_null };
00044 static const char protocolTale[] = { ch_colon, ch_forward_slash, ch_forward_slash, ch_null };
00045 static const char str_Colon[] = { ch_colon, ch_null };
00046 static const char str_At[] = { ch_at, ch_null };
00047 static const char str_Question[] = { ch_question, ch_null };
00048 static const char str_Pound[] = { ch_pound, ch_null };
00049 static const char str_ForwardSlash[] = { ch_forward_slash, ch_null };
00050
00051
00052 #if OS_TYPE == OS_LINUX || OS_TYPE == OS_UNIX || OS_TYPE == OS_MACOSX
00053 char* _fullpath(char* buf, const char* relative, size_t len)
00054 {
00055 if (!buf)
00056 buf = (char*)malloc(len);
00057
00058
00059 if (!getcwd(buf, len))
00060 return 0;
00061
00062 if (!relative)
00063 return buf;
00064 else if (*relative == '/' || *relative == '\\')
00065 {
00066 *buf = 0;
00067 strncat(buf, relative, len);
00068 return buf;
00069 }
00070
00071
00072 const char* p = relative;
00073
00074 if (!str_template::strcmp(p, "./", 3)
00075 || !str_template::strcmp(p, ".\\", 3))
00076 p += 2;
00077
00078
00079 size_t level = 0;
00080 while (!str_template::strcmp(p, "", 3)
00081 || !str_template::strcmp(p, "..\\", 3))
00082 {
00083 ++level;
00084 p += 3;
00085 }
00086
00087 char* pp = __min(strrchr(buf, '/'), strrchr(buf, '\\'));
00088
00089 while (level)
00090 {
00091 if (!pp)
00092 return 0;
00093
00094 *pp = 0;
00095 pp = __max(strrchr(buf, '/'), strrchr(buf, '\\'));
00096 --level;
00097 }
00098
00099
00100 size_t limit = strlen(buf);
00101 size_t add_len = strlen(p);
00102
00103 if (limit + add_len + 1 >= len)
00104 return 0;
00105
00106 strcat(buf, "/");
00107 strcat(buf, p);
00108
00109 return buf;
00110 }
00111
00112 #endif
00114 xml_stream_attribute::xml_stream_attribute() :
00115 _protocol(STREAM_UNKNOWN), _port(0), _query(false)
00116 {
00117 }
00118
00119 xml_stream_attribute::xml_stream_attribute(const char* url, bool full) :
00120 _protocol(STREAM_UNKNOWN), _port(0), _query(false)
00121 {
00122 if (!crack_xml_request(url, full))
00123 clear();
00124 }
00125
00126 void
00127 xml_stream_attribute::clear()
00128 {
00129 _protocol = STREAM_UNKNOWN;
00130 _host = 0;
00131 _port = 0;
00132 _user = 0;
00133 _password = 0;
00134 _path = 0;
00135 _file = 0;
00136 _extra = 0;
00137 _query = false;
00138 }
00139
00140 bool
00141 xml_stream_attribute::complete_to_full(const xml_stream_attribute& full_url)
00142 {
00143 if (_protocol != STREAM_RELATIVE)
00144 return false;
00145
00146 _protocol = full_url._protocol;
00147 _host = full_url._host;
00148 _port = full_url._port;
00149 _user = full_url._user;
00150 _password = full_url._password;
00151
00152 string_t old_path = _path;
00153 _path = full_url._path;
00154 _path += old_path;
00155 return true;
00156 }
00157
00158 bool
00159 xml_stream_attribute::combine_url(string_t& url) const
00160 {
00161 url = 0;
00162
00163 switch (_protocol)
00164 {
00165 case STREAM_UNKNOWN:
00166 return false;
00167 case STREAM_LOCAL:
00168 case STREAM_RELATIVE:
00169 break;
00170 case STREAM_HTTP:
00171 url += protocolHTTP;
00172 url += protocolTale;
00173 break;
00174 case STREAM_FTP:
00175 url += protocolFTP;
00176 url += protocolTale;
00177 break;
00178 case STREAM_FILE:
00179 url += protocolFILE;
00180 url += protocolTale;
00181 break;
00182 default:
00183 assert(false);
00184 }
00185
00186 if (_user.length())
00187 {
00188 url += _user;
00189 if (_password.length())
00190 {
00191 url += str_Colon;
00192 url += _password;
00193 }
00194
00195 url += str_At;
00196 }
00197
00198 if (_host.length())
00199 url += _host;
00200
00201 if (_path.length())
00202 url += _path;
00203
00204 if (_file.length())
00205 url += _file;
00206
00207 if (_extra.length())
00208 {
00209 url += _query ? str_Question : str_Pound;
00210 url += _extra;
00211 }
00212
00213 return true;
00214 }
00215
00216 void
00217 xml_stream_attribute::prepare_socket_request(string_t& url) const
00218 {
00219 url = "GET ";
00220
00221 if (_path.length() || _file.length())
00222 {
00223 url += _path;
00224 url += _file;
00225 }
00226 else
00227 url += "/";
00228
00229
00230
00231 if (_extra.length())
00232 {
00233 url += _query ? str_Question : str_Pound;
00234 url += _extra;
00235 }
00236
00237 url += " HTTP/1.0\r\n Host: ";
00238 url += _host;
00239 if (_port)
00240 {
00241 url += str_Colon;
00242 char buf[16];
00243 str_template::strprint(buf, 16, "%hu", _port);
00244 url += buf;
00245 }
00246
00247 url += "\r\n\r\n";
00248 }
00249
00250 void
00251 xml_stream_attribute::prepare_http_request(const char* agent, string_t& url) const
00252 {
00253 url = "GET ";
00254
00255 if (_path.length() || _file.length())
00256 {
00257 url += _path;
00258 url += _file;
00259 }
00260 else
00261 url += "/";
00262
00263
00264
00265 if (_extra.length())
00266 {
00267 url += _query ? str_Question : str_Pound;
00268 url += _extra;
00269 }
00270
00271 url += " HTTP/1.0\r\nHost: ";
00272 url += _host;
00273 if (_port)
00274 {
00275 url += str_Colon;
00276 char buf[16];
00277 str_template::strprint(buf, 16, "%hu", _port);
00278 url += buf;
00279 }
00280
00281 url += "\r\n";
00282 url += "User-Agent: ";
00283 url += agent;
00284 url += "\r\nCache-Control:no-cache\r\nAccept-Encoding:*\r\nConnection: Keep-Alive\r\nContent-Type: text/xml\r\nContent-Length: 0\r\n\r\n";
00285 }
00286
00287 bool
00288 xml_stream_attribute::crack_xml_request(const char* url, bool full)
00289 {
00290
00291
00292
00293
00294 if (!url) return false;
00295
00296 while (*url == ch_space) ++url;
00297
00298
00299 const char* url_ = url;
00300 static const char protocolDelimeter[] = { ch_colon, ch_forward_slash, ch_back_slash, ch_null };
00301 static const char hostDelimeter[] = { ch_forward_slash, ch_back_slash, ch_pound, ch_question, ch_null };
00302 static const char extraDelimeter[] = { ch_pound, ch_question, ch_null };
00303 static const char fileDelimeter[] = { ch_forward_slash, ch_back_slash, ch_null };
00304
00305
00306
00307 if (is_letter(*url_) && *(url_ + 1) == ch_colon && (*(url_ + 2) == ch_forward_slash || *(url_ + 2) == ch_back_slash))
00308 {
00309 _protocol = STREAM_LOCAL;
00310 _host.assign(url_, 2);
00311 const char* file_begin_forward = strrchr(url_ + 2, ch_forward_slash);
00312 const char* file_begin_back = strrchr(url_ + 2, ch_back_slash);
00313 const char* file_begin = __max(file_begin_forward, file_begin_back);
00314 if (file_begin)
00315 {
00316 _file.assign(file_begin);
00317 _path.assign(url_ + 2, file_begin - url_ - 2);
00318 }
00319 else
00320 _file.assign(url_ + 2);
00321
00322 return true;
00323 }
00324
00325
00326
00327 if ((*url_ == ch_forward_slash || *url_ == ch_back_slash)
00328 && (*(url_ + 1) == ch_forward_slash || *(url_ + 1) == ch_back_slash))
00329 {
00330 _protocol = STREAM_LOCAL;
00331 const char* host_end = strpbrk(url_ + 2, fileDelimeter);
00332 _host.assign(url_, host_end ? host_end - url_ : os_minus_one);
00333 if (host_end)
00334 {
00335 const char* file_begin_forward = strrchr(host_end + 1, ch_forward_slash);
00336 const char* file_begin_back = strrchr(host_end + 1, ch_back_slash);
00337 const char* file_begin = __max(file_begin_forward, file_begin_back);
00338 if (file_begin)
00339 {
00340 _file.assign(file_begin);
00341 _path.assign(host_end, file_begin - host_end);
00342 }
00343 else
00344 _file.assign(host_end);
00345 }
00346 return true;
00347 }
00348
00349
00350
00351
00352
00353 if (!str_template::strnocasecmp(url_, protocolHTTP, strlen(protocolHTTP)))
00354 {
00355 _protocol = STREAM_HTTP;
00356 _port = 80;
00357 url_ += strlen(protocolHTTP);
00358 }
00359 else if (!str_template::strnocasecmp(url_, protocolFTP, strlen(protocolFTP)))
00360 {
00361 _protocol = STREAM_FTP;
00362 _port = 21;
00363 url_ += strlen(protocolFTP);
00364 }
00365 else if (!str_template::strnocasecmp(url_, protocolFILE, strlen(protocolFILE)))
00366 {
00367 _protocol = STREAM_FILE;
00368 _port = 0;
00369 url_ += strlen(protocolFILE);
00370 }
00371 else
00372 {
00373 _protocol = STREAM_RELATIVE;
00374 #if OS_TYPE == OS_WIN32
00375 if (*url_ == ch_forward_slash || *url_ == ch_back_slash)
00376 ++url_;
00377 #endif
00378
00379 }
00380
00381 if (_protocol != STREAM_RELATIVE)
00382 {
00383
00384 if (*url_ != ch_colon
00385 || *(url_ + 1) != ch_forward_slash && *(url_ + 1) != ch_back_slash
00386 || *(url_ + 2) != ch_forward_slash && *(url_ + 2) != ch_back_slash)
00387 return false;
00388
00389
00390 url_ += 3;
00391 if (!*url_)
00392 return false;
00393
00394
00395 const char* host_end = strpbrk(url_, hostDelimeter);
00396 if (host_end != url_)
00397 {
00398
00399 const char* user_end = strchr(url_, ch_at);
00400 if (user_end)
00401 {
00402 const char* password_begin = strchr(url_, ch_colon);
00403 if (password_begin)
00404
00405 _password.assign(password_begin + 1, user_end - password_begin - 1);
00406
00407
00408 _user.assign(url_, password_begin ? password_begin - url_ : user_end - url_);
00409
00410 const char* port_begin = strchr(user_end + 1, ch_colon);
00411 if (port_begin)
00412 {
00413 if (!str_template::strscan(port_begin + 1, 32, "%hu", &_port))
00414 return false;
00415
00416 _host.assign(user_end + 1, port_begin - user_end - 1);
00417 }
00418 else
00419 _host.assign(user_end + 1, host_end ? host_end - user_end - 1 : os_minus_one);
00420 }
00421 else
00422 {
00423 const char* port_begin = strchr(url_, ch_colon);
00424 if (port_begin)
00425 {
00426 if (!str_template::strscan(port_begin + 1, 32, "%hu", &_port))
00427 return false;
00428
00429 _host.assign(url_, port_begin - url_);
00430 }
00431 else
00432 _host.assign(url_, host_end ? host_end - url_ : os_minus_one);
00433 }
00434 }
00435
00436
00437 url_ = host_end;
00438
00439 if (!url_)
00440 return true;
00441 else if (*url_ == ch_forward_slash || *url_ == ch_back_slash)
00442 ++url_;
00443 }
00444 else if (full)
00445 {
00446 char buf[_MAX_PATH + 1];
00447 if (!_fullpath(buf, url_, _MAX_PATH))
00448 return false;
00449
00450 _protocol = STREAM_LOCAL;
00451 _host.assign(buf, 2);
00452 const char* file_begin_forward = strrchr(buf + 3, ch_forward_slash);
00453 const char* file_begin_back = strrchr(buf + 3, ch_back_slash);
00454 const char* file_begin = __max(file_begin_forward, file_begin_back);
00455 if (file_begin)
00456 {
00457 _file.assign(file_begin);
00458 _path.assign(buf + 2, file_begin - buf - 2);
00459 }
00460 else
00461 _file.assign(buf + 2);
00462 return true;
00463 }
00464
00465
00466
00467 if (*url_ != ch_pound && *url_ != ch_question)
00468 {
00469 const char* file_end = strpbrk(url_, extraDelimeter);
00470 const char* file_begin_forward = strrchr(url_, ch_forward_slash);
00471 const char* file_begin_back = strrchr(url_, ch_back_slash);
00472 const char* file_begin = __max(file_begin_forward, file_begin_back);
00473 if (file_begin)
00474 {
00475 _file.append(file_begin, file_end ? file_end - file_begin : os_minus_one);
00476 _path = str_ForwardSlash;
00477 _path.append(url_, file_begin - url_);
00478 }
00479 else
00480 {
00481 _file = str_ForwardSlash;
00482 _file.append(url_, file_end ? file_end - url_ : os_minus_one);
00483 }
00484
00485
00486 url_ = file_end;
00487 if (!url_)
00488 return true;
00489 }
00490
00491
00492 _extra.assign(url_ + 1);
00493 _query = *url_ == ch_question;
00494
00495 return true;
00496 }
00497
00498 bool
00499 xml_stream_attribute::complete_to_full(const char* location, const char* full_path, string_t& res)
00500 {
00501 xml_stream_attribute attr_new(location, false);
00502 *this = attr_new;
00503 if (_protocol == STREAM_UNKNOWN)
00504 {
00505 res = "Unknown url format: ";
00506 res += location;
00507 return false;
00508 }
00509
00510 if (_protocol == STREAM_RELATIVE)
00511 {
00512 xml_stream_attribute attr_full(full_path, true);
00513 if (!complete_to_full(attr_full))
00514 {
00515 res = "Can't complete relative path: ";
00516 res += location;
00517 return false;
00518 }
00519 }
00520
00521 return combine_url(res);
00522 }
00523
00525
00526 void
00527 buffer_loader::load(const char* url, const char* location, mem_pool_t& small_pool, mem_pool_t& big_pool, paged_buffer& buffer, bool subset)
00528 {
00529 string_t full_path;
00530 xml_stream_attribute attr_new;
00531 if (!attr_new.complete_to_full(location, url, full_path))
00532 {
00533 string_t ex("Can't create full path from location: ");
00534 ex += location;
00535 ex += " and url: ";
00536 ex += url;
00537 exception::_throw(ex);
00538 }
00539
00540 stream_input_common stream(small_pool, big_pool, 0, subset);
00541 if (!stream.open(attr_new))
00542 {
00543 string_t ex("Can't open external file: ");
00544 ex += location;
00545 exception::_throw(ex);
00546 }
00547
00548 ub1_t* lbuf = (ub1_t*)buffer.get_tmp_allocator().allocate(os_def_size);
00549 size_t len;
00550 while (0 != (len = stream.pull(lbuf, os_def_size)))
00551 buffer.append(lbuf, len);
00552
00553 stream.close();
00554 }
00555
00556
00558 stream_input_memory::stream_input_memory(const ub1_t* buffer, size_t size, mem_pool_t& small_pool, mem_pool_t& big_pool, size_t xml_size, bool subset) :
00559 byte_source(small_pool, big_pool, xml_size, 0, subset),
00560 _external_buffer(buffer),
00561 _external_size(size),
00562 _external_pos(0)
00563 {}
00564
00565
00566 bool
00567 stream_input_memory::data_request(ub1_t* buf, size_t& len)
00568 {
00569 if (_external_pos == _external_size)
00570 return false;
00571
00572 len = __min(len, _external_size - _external_pos);
00573 memcpy(buf, _external_buffer + _external_pos, len);
00574 _external_pos += len;
00575 return true;
00576 }
00577
00578
00581 stream_input_common::stream_input_common(mem_pool_t& small_pool, mem_pool_t& big_pool, size_t xml_size, bool subset, size_t timeout) :
00582 byte_source(small_pool, big_pool, xml_size, 0, subset),
00583 terimber_socket(),
00584 _protocol(STREAM_UNKNOWN),
00585 _desc(0),
00586 _timeout(timeout),
00587 _file_length(0)
00588 {
00589 }
00590
00591 stream_input_common::~stream_input_common()
00592 {
00593 close();
00594 }
00595
00596
00597 bool
00598 stream_input_common::data_request(ub1_t* buf, size_t& len)
00599 {
00600 switch (_protocol)
00601 {
00602 case STREAM_LOCAL:
00603 {
00604 size_t shift = 0;
00605 size_t requested_len = len;
00606 len = 0;
00607 while (requested_len)
00608 {
00609 int gotLen = (int)::fread(buf + shift, 1, requested_len, _desc);
00610 if (gotLen < 0)
00611 {
00612 len = 0;
00613 close();
00614 return false;
00615 }
00616
00617 if (gotLen == 0)
00618 break;
00619
00620 requested_len -= gotLen;
00621 len += gotLen;
00622 shift += gotLen;
00623 }
00624
00625 return len != 0;
00626 }
00627 case STREAM_HTTP:
00628 {
00629 if (!_file_length || !len) return false;
00630 len = __min(_file_length, (ub4_t)len);
00631 if (terimber_socket::receive((char*)buf, len, _timeout))
00632 {
00633 close();
00634 len = 0;
00635 return false;
00636 }
00637
00638 _file_length -= (ub4_t)len;
00639 return true;
00640 }
00641 case STREAM_FTP:
00642 case STREAM_FILE:
00643 case STREAM_UNKNOWN:
00644 case STREAM_RELATIVE:
00645 default:
00646 len = 0;
00647 return false;
00648 }
00649 }
00650
00651 bool
00652 stream_input_common::open(const xml_stream_attribute& location)
00653 {
00654 if (_protocol != STREAM_UNKNOWN) return false;
00655
00656 string_t url;
00657
00658 if (!location.combine_url(url)) return false;
00659
00660 switch (location._protocol)
00661 {
00662 case STREAM_UNKNOWN:
00663 case STREAM_RELATIVE:
00664 return false;
00665 case STREAM_LOCAL:
00666 if (!(_desc = ::fopen(url, "rb")))
00667 return false;
00668 break;
00669 case STREAM_HTTP:
00670 {
00671
00672 _file_length = 0;
00673 string_t http_request;
00674
00675 location.prepare_socket_request(http_request);
00676
00677 sockaddr_in addr;
00678 if (resolve_address(location._host, location._port, addr)) return false;
00679
00680 set_address(addr);
00681
00682 if (!connect()) return false;
00683
00684 if (terimber_socket::send(http_request, http_request.length(), _timeout)) { terimber_socket::close(); return false; }
00685
00686
00687
00688 const size_t buf_size = 256;
00689 char header[buf_size];
00690 http_request = 0;
00691 size_t shift = 0;
00692 size_t counter = 0;
00693 for (counter = 0; counter < 4;)
00694 {
00695 if (terimber_socket::receive(header + shift, 1, _timeout)) { terimber_socket::close(); return false; }
00696
00697 if (header[shift] == ch_cr && (counter == 0 || counter == 2)
00698 || header[shift] == ch_lf && (counter == 1 || counter == 3))
00699 ++counter;
00700 else
00701 counter = 0;
00702
00703
00704 if (++shift == buf_size - 1)
00705 {
00706
00707 header[shift] = 0;
00708
00709 http_request += header;
00710
00711 shift = 0;
00712 }
00713 }
00714
00715
00716 header[shift] = 0;
00717
00718 http_request += header;
00719
00720
00721
00722 const char* str = strstr(http_request, "HTTP");
00723 if (!str) { close(); return false; }
00724
00725 if (!(str = strchr(str, ' '))) { close(); return false; }
00726
00727 while (*str == ' ') ++str;
00728 if (strncmp(str, "200", 3)) { close(); return false; }
00729
00730
00731 if (!(str = strstr(str, "Content-Length:"))) { close(); return false; }
00732
00733 str += strlen("Content-Length:");
00734 while (*str == ' ') ++str;
00735 if (!str_template::strscan(str, 32, "%d", &_file_length)) { close(); return false; }
00736 }
00737 break;
00738 case STREAM_FTP:
00739 case STREAM_FILE:
00740 return false;
00741 }
00742
00743 _protocol = location._protocol;
00744 _url = url;
00745 return true;
00746 }
00747
00748 void
00749 stream_input_common::close()
00750 {
00751 switch (_protocol)
00752 {
00753 case STREAM_UNKNOWN:
00754 return;
00755 case STREAM_RELATIVE:
00756 break;
00757 case STREAM_LOCAL:
00758 fclose(_desc);
00759 _desc = 0;
00760 break;
00761 case STREAM_HTTP:
00762 terimber_socket::close();
00763 _file_length = 0;
00764 break;
00765 case STREAM_FTP:
00766 case STREAM_FILE:
00767 break;
00768 }
00769
00770 _protocol = STREAM_UNKNOWN;
00771 }
00772
00774 stream_output_file::stream_output_file(mem_pool_t& small_pool, mem_pool_t& big_pool, size_t xml_size) :
00775 byte_consumer(small_pool, big_pool, xml_size),
00776 _desc(0)
00777 {
00778 }
00779
00780 bool
00781 stream_output_file::open(const char* location)
00782 {
00783 if (_desc)
00784 return false;
00785
00786 if (!(_desc = ::fopen(location, "wb")))
00787 return false;
00788
00789 return true;
00790 }
00791
00792 void
00793 stream_output_file::close()
00794 {
00795 if (_desc)
00796 {
00797
00798 ::fclose(_desc);
00799 _desc = 0;
00800 }
00801 }
00802
00803
00804 bool
00805 stream_output_file::data_persist(const ub1_t* buf, size_t len)
00806 {
00807 if (!_desc)
00808 return false;
00809
00810 size_t shift = 0;
00811 while (len)
00812 {
00813 int putLen = (int)::fwrite(buf + shift, 1, len, _desc);
00814 if (putLen < 0)
00815 return false;
00816
00817 if (putLen == 0)
00818 break;
00819
00820 len -= putLen;
00821 shift += putLen;
00822 }
00823
00824 return !len;
00825 }
00826
00827
00828 stream_output_file::~stream_output_file()
00829 {
00830
00831 close();
00832 }
00833
00835 memory_output_stream::memory_output_stream(mem_pool_t& small_pool, mem_pool_t& big_pool, size_t xml_size, ub1_t* buf, size_t buf_size) :
00836 byte_consumer(small_pool, big_pool, xml_size), _buf(buf), _buf_size(buf_size), _filled_size(0), _required_size(0)
00837 {
00838 }
00839
00840 size_t
00841 memory_output_stream::get_required_size() const
00842 {
00843 return _required_size;
00844 }
00845
00846 bool
00847 memory_output_stream::is_overflow() const
00848 {
00849 return _required_size - _filled_size > 0;
00850 }
00851
00852
00853 bool
00854 memory_output_stream::data_persist(const ub1_t* buf, size_t len)
00855 {
00856
00857 size_t copy_len = __min(len, _buf_size - _filled_size);
00858 if (_buf && copy_len)
00859 {
00860 memcpy(_buf + _filled_size, buf, copy_len);
00861 _filled_size += copy_len;
00862 }
00863
00864 _required_size += len;
00865 return true;
00866
00867
00868 }
00869
00870 #pragma pack()
00871 END_TERIMBER_NAMESPACE
00872