00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef _terimber_string_hpp_
00029 #define _terimber_string_hpp_
00030
00031 #include "base/string.h"
00032 #include "base/memory.hpp"
00033
00034 BEGIN_TERIMBER_NAMESPACE
00035 #pragma pack(4)
00036
00037 static const ub1_t str_leadingByte[6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
00038
00039
00040
00041
00042 template < class T >
00043 base_string< T >::base_string(byte_allocator* allocator_) :
00044 _allocator(allocator_), _handle(0), _length(0)
00045 {
00046 }
00047
00048
00049
00050
00051 template < class T >
00052 base_string< T >::~base_string()
00053 {
00054 _destroy();
00055 }
00056
00057
00058
00059 template < class T >
00060 base_string< T >::base_string(const T* x, byte_allocator* allocator_) :
00061 _allocator(allocator_), _handle(0), _length(0)
00062 {
00063 *this = x;
00064 }
00065
00066
00067
00068 template < class T >
00069 base_string< T >::base_string(const base_string< T >& x) :
00070 _allocator(x._allocator), _handle(0), _length(0)
00071 {
00072 *this = x;
00073 }
00074
00075
00076
00077 template < class T >
00078 inline
00079 base_string< T >&
00080 base_string< T >::operator=(const T* x)
00081 {
00082 _assign(x);
00083 return *this;
00084 }
00085
00086
00087
00088 template < class T >
00089 inline
00090 base_string< T >&
00091 base_string< T >::operator=(const base_string< T >& x)
00092 {
00093 _assign(x._handle, x._length);
00094 return *this;
00095 }
00096
00097
00098
00099 template < class T >
00100 inline
00101 bool
00102 base_string< T >::operator==(const base_string< T >& x) const
00103 {
00104 return _compare(x._handle, x._length) == 0;
00105 }
00106
00107 template < class T >
00108 inline
00109 bool
00110 base_string< T >::operator!=(const base_string< T >& x) const
00111 {
00112 return _compare(x._handle, x._length) != 0;
00113 }
00114
00115 template < class T >
00116 inline
00117 bool
00118 base_string< T >::operator<(const base_string< T >& x) const
00119 {
00120 return _compare(x._handle, x._length) < 0;
00121 }
00122
00123 template < class T >
00124 inline
00125 bool
00126 base_string< T >::operator<=(const base_string< T >& x) const
00127 {
00128 return _compare(x._handle, x._length) <= 0;
00129 }
00130
00131 template < class T >
00132 inline
00133 bool
00134 base_string< T >::operator>(const base_string< T >& x) const
00135 {
00136 return _compare(x._handle, x._length) > 0;
00137 }
00138
00139 template < class T >
00140 inline
00141 bool
00142 base_string< T >::operator>=(const base_string< T >& x) const
00143 {
00144 return _compare(x._handle, x._length) >= 0;
00145 }
00146
00147
00148
00149 template < class T >
00150 inline
00151 bool
00152 base_string< T >::operator==(const T* x) const
00153 {
00154 return _compare(x) == 0;
00155 }
00156
00157 template < class T >
00158 inline
00159 bool
00160 base_string< T >::operator!=(const T* x) const
00161 {
00162 return _compare(x) != 0;
00163 }
00164
00165 template < class T >
00166 inline
00167 bool
00168 base_string< T >::operator<(const T* x) const
00169 {
00170 return _compare(x) < 0;
00171 }
00172
00173 template < class T >
00174 inline
00175 bool
00176 base_string< T >::operator<=(const T* x) const
00177 {
00178 return _compare(x) <= 0;
00179 }
00180
00181 template < class T >
00182 inline
00183 bool
00184 base_string< T >::operator>(const T* x) const
00185 {
00186 return _compare(x) > 0;
00187 }
00188
00189 template < class T >
00190 inline
00191 bool
00192 base_string< T >::operator>=(const T* x) const
00193 {
00194 return _compare(x) >= 0;
00195 }
00196
00197 template < class T >
00198 inline
00199 int
00200 base_string< T >::compare(const T* x, size_t size) const
00201 {
00202 return _compare(x, size);
00203 }
00204
00205
00206
00207 template < class T >
00208 inline
00209 base_string< T >&
00210 base_string< T >::operator+=(const T* x)
00211 {
00212 _add(x);
00213 return *this;
00214 }
00215
00216 template < class T >
00217 inline
00218 base_string< T >&
00219 base_string< T >::operator+=(const base_string< T >& x)
00220 {
00221 _add(x._handle, x._length);
00222 return *this;
00223 }
00224
00225
00226
00227 template < class T >
00228 inline
00229 base_string< T >&
00230 base_string< T >::append(const T* x, size_t size)
00231 {
00232 _add(x, size);
00233 return *this;
00234 }
00235
00236
00237
00238 template < class T >
00239 inline
00240 base_string< T >&
00241 base_string< T >::assign(const T* x, size_t size)
00242 {
00243 _assign(x, size);
00244 return *this;
00245 }
00246
00247
00248
00249 template < class T >
00250 inline
00251 size_t
00252 base_string< T >::length() const
00253 {
00254 return _length;
00255 }
00256
00257
00258
00259 template < class T >
00260 inline
00261 T*
00262 base_string< T >::reserve(size_t size)
00263 {
00264 if (_length < size)
00265 _destroy(), _length = size;
00266 else if (_length > size)
00267 {
00268 _length = size;
00269 _handle[_length] = 0;
00270 }
00271
00272 if (_length >= 0) _create();
00273 return _handle;
00274 }
00275
00276 template < class T >
00277 inline
00278 byte_allocator*
00279 base_string< T >::get_allocator()
00280 {
00281 return _allocator;
00282 }
00283
00284
00285 template < class T >
00286 inline
00287 int
00288 base_string< T >::_compare(const T* x, size_t len) const
00289 {
00290 if (!_handle || !*_handle)
00291 return (x && *x) ? -1 : 0;
00292 if (!x || !*x)
00293 return 1;
00294
00295
00296 size_t slen = (len == os_minus_one) ? str_template::strlen(x) : len;
00297 int res = str_template::strcmp(x, _handle, __min(slen, _length));
00298 return (res != 0) ? res : (_length == slen ? 0 : (_length < slen ? -1 : 1));
00299 }
00300
00301
00302 template < class T >
00303 inline
00304 void
00305 base_string< T >::_assign(const T* x, size_t len)
00306 {
00307 if (!x)
00308 {
00309 _destroy();
00310 return;
00311 }
00312
00313
00314
00315 T* handle_ = _handle;
00316 _handle = 0;
00317 _length = 0;
00318 _add(x, len);
00319
00320 if (handle_)
00321 _allocator ? _allocator->deallocate(handle_) : delete [] handle_;
00322 }
00323
00324
00325 template < class T >
00326 inline
00327 void
00328 base_string< T >::_add(const T* x, size_t len)
00329 {
00330 if (!x)
00331 return;
00332 size_t _len = len == os_minus_one ? str_template::strlen(x) : len;
00333 if (!_handle)
00334 {
00335 _length = _len;
00336 if (_length >= 0) _create(), memcpy(_handle, x, S * _length);
00337 }
00338 else
00339 {
00340
00341 T* handle_ = _handle;
00342
00343 size_t len_ = _length;
00344
00345 _handle = 0;
00346
00347 _length = len_ + _len;
00348
00349 _create();
00350
00351 memcpy(_handle, handle_, S * len_);
00352
00353 memcpy(_handle + len_, x, S * _len);
00354
00355 _allocator ? _allocator->deallocate(handle_) : delete [] handle_;
00356 }
00357 }
00358
00359
00360 template < class T >
00361 inline
00362 void
00363 base_string< T >::_create()
00364 {
00365 if (_handle)
00366 return;
00367 _handle = _allocator ? (T*)_allocator->allocate(S * (_length + 1)) : new T[_length + 1];
00368 _handle[_length] = 0;
00369 }
00370
00371
00372 template < class T >
00373 inline
00374 void
00375 base_string< T >::_destroy()
00376 {
00377 if (!_handle)
00378 return;
00379 _allocator ? _allocator->deallocate(_handle) : delete [] _handle;
00380 _length = 0;
00381 _handle = 0;
00382 }
00383
00385 namespace str_template
00386 {
00387
00388 inline
00389 const char*
00390 strformat(char*, const char* format)
00391 {
00392 return format;
00393 }
00394
00395 inline
00396 const wchar_t*
00397 strformat(wchar_t* buf, const char* format)
00398 {
00399 wchar_t* retVal = buf;
00400 for (; format && *format; *buf++ = (ub2_t)*format++);
00401 return retVal;
00402 }
00403
00404 inline
00405 int
00406 strscan(const char* buf, size_t len, const char* format, void* res)
00407 {
00408 #if OS_TYPE == OS_WIN32 && defined(_MSC_VER) && _MSC_VER > 1200
00409 return _snscanf(buf, len, format, res);
00410 #else
00411 return sscanf(buf, format, res);
00412 #endif
00413 }
00414
00415 inline
00416 int
00417 strscan(const wchar_t* buf, size_t len, const wchar_t* format, void* res)
00418 {
00419 #if OS_TYPE == OS_WIN32 && defined(_MSC_VER) && _MSC_VER > 1200
00420 return _snwscanf(buf, len, format, res);
00421 #else
00422 return swscanf(buf, format, res);
00423 #endif
00424 }
00425
00426 inline
00427 int
00428 strprint(char* buf, size_t len, const char* format, ...)
00429 {
00430 va_list va;
00431 va_start(va, format);
00432
00433 #if OS_TYPE == OS_WIN32 && defined(_MSC_VER) && _MSC_VER < 1400
00434 int res = _vsnprintf(buf, len, format, va);
00435 #else
00436 int res = vsnprintf(buf, len, format, va);
00437 #endif
00438
00439 va_end(va);
00440 return res;
00441 }
00442
00443 inline
00444 int
00445 strprint(wchar_t* buf, size_t len, const wchar_t* format, ...)
00446 {
00447 va_list va;
00448 va_start(va, format);
00449
00450 #if OS_TYPE == OS_WIN32 && defined(_MSC_VER) && _MSC_VER <= 1200
00451 int res = _vsnwprintf(buf, len, format, va);
00452 #else
00453 int res = vswprintf(buf, len, format, va);
00454 #endif
00455
00456 va_end(va);
00457 return res;
00458 }
00459
00460
00461
00462
00463 template < class T >
00464 inline
00465 size_t
00466 strlen(const T* x)
00467 {
00468 if (!x) return 0;
00469 const T* end = x;
00470 while (*end) ++end;
00471 return end - x;
00472 }
00473
00474
00475
00476 template < class T >
00477 inline
00478 T*
00479 strcpy(T* dest, const T* x, size_t len)
00480 {
00481 T* begin = dest;
00482 while (len-- && *x) *dest++ = *x++;
00483 *dest = 0;
00484 return begin;
00485 }
00486
00487
00488
00489 template < class T >
00490 inline
00491 int
00492 strcmp(const T* dest, const T* x, size_t len)
00493 {
00494 int ret = 0;
00495 while (len-- && !(ret = (*dest - *x)) && *x) ++dest, ++x;
00496 return ret;
00497 }
00498
00499 inline
00500 int
00501 strnocasecmp(const char* dest, const char* x, size_t len)
00502 {
00503 if (len == os_minus_one)
00504 len = __min(strlen(dest), strlen(x)) + 1;
00505 #if OS_TYPE == OS_WIN32
00506 return strnicmp(dest, x, len);
00507 #else
00508 return strncasecmp(dest, x, len);
00509 #endif
00510 }
00511
00512 inline
00513 int
00514 strnocasecmp(const wchar_t* dest, const wchar_t* x, size_t len)
00515 {
00516 if (len == os_minus_one)
00517 len = __min(strlen(dest), strlen(x)) + 1;
00518 #if OS_TYPE == OS_WIN32
00519 return wcsnicmp(dest, x, len);
00520 #elif OS_TYPE == OS_MACOSX
00521 return wcsncmp(dest, x, len);
00522 #else
00523 return wcsncasecmp(dest, x, len);
00524 #endif
00525 }
00526
00528 inline
00529 size_t
00530 multibyte_to_unicode_len(const char* x, size_t len)
00531 {
00532
00533 size_t size = 0;
00534
00535 size_t len_ = 0;
00536 while (x[len_] && len_ < len)
00537 {
00538 ub4_t value = 0;
00539 ub1_t byte_count = 0;
00540 ub1_t source = x[len_];
00541
00542 if (source <= 0x7F)
00543 {
00544 value = source;
00545 }
00546 else
00547 {
00548 if (source < 0xC0 || source > 0xFD)
00549 return 0;
00550
00551 if (source < 0xE0)
00552 byte_count = 1;
00553 else if (source < 0xF0)
00554 byte_count = 2;
00555 else if (source < 0xF8)
00556 byte_count = 3;
00557 else if (source < 0xFC)
00558 byte_count = 4;
00559 else
00560 byte_count = 5;
00561
00562 value |= source & ((2 << (5 - byte_count)) - 1);
00563 while (byte_count)
00564 {
00565 ++len_;
00566 source = x[len_];
00567 if (len_ >= len)
00568 return 0;
00569
00570 if (source > 0xBF || source < 0x80)
00571 return 0;
00572
00573 value <<= 6;
00574 value |= source & 0x3F;
00575 --byte_count;
00576 }
00577
00578
00579
00580
00581
00582
00583
00584 }
00585
00586 if (value >= 0x10000)
00587 size += 2;
00588 else
00589 ++size;
00590
00591 ++len_;
00592 }
00593
00594 return size;
00595 }
00596
00597 inline
00598 const wchar_t*
00599 multibyte_to_unicode(wchar_t* dest, size_t dest_len, const char* x, size_t src_len)
00600 {
00601
00602 wchar_t* retVal = dest;
00603
00604 size_t len_ = 0;
00605 while (x[len_] && len_ < src_len)
00606 {
00607 ub4_t value = 0;
00608 ub1_t byte_count = 0;
00609 ub1_t source = (ub1_t)x[len_];
00610
00611 if (source <= 0x7F)
00612 {
00613 value = source;
00614 }
00615 else
00616 {
00617 if (source < 0xC0 || source > 0xFD)
00618 return 0;
00619
00620 if (source < 0xE0)
00621 byte_count = 1;
00622 else if (source < 0xF0)
00623 byte_count = 2;
00624 else if (source < 0xF8)
00625 byte_count = 3;
00626 else if (source < 0xFC)
00627 byte_count = 4;
00628 else
00629 byte_count = 5;
00630
00631 value |= source & ((2 << (5 - byte_count)) - 1);
00632 while (byte_count)
00633 {
00634 ++len_;
00635 source = x[len_];
00636 if (len_ >= src_len)
00637 return 0;
00638
00639 if (source > 0xBF || source < 0x80)
00640 return 0;
00641
00642 value <<= 6;
00643 value |= source & 0x3F;
00644 --byte_count;
00645 }
00646
00647
00648
00649
00650
00651
00652
00653 }
00654
00655
00656 if (value >= 0x10000)
00657 {
00658 if (dest_len < 2)
00659 return 0;
00660
00661
00662 value -= 0x10000;
00663
00664 ub2_t surrogate = (ub2_t)(value >> 10) + 0xD800;
00665 value = (value & 0x03FF) + 0xDC00;
00666 *dest++ = surrogate;
00667 *dest++ = (ub2_t)value;
00668 dest_len -= 2;
00669 }
00670 else
00671 {
00672 if (dest_len < 1)
00673 return 0;
00674
00675 *dest++ = (ub2_t)value;
00676 --dest_len;
00677 }
00678
00679 ++len_;
00680 }
00681
00682 return retVal;
00683 }
00684
00685 inline
00686 const wchar_t*
00687 multibyte_to_unicode(wstring_t& dest, const char* x, size_t len)
00688 {
00689 dest = 0;
00690
00691 if (!x)
00692 return 0;
00693
00694 size_t src_len = len == os_minus_one ? strlen(x) : len;
00695 size_t _len = multibyte_to_unicode_len(x, src_len);
00696 return multibyte_to_unicode(dest.reserve(_len), _len, x, src_len);
00697 }
00698
00699 inline
00700 const wchar_t*
00701 multibyte_to_unicode(byte_allocator& _allocator, const char* x, size_t len)
00702 {
00703 wchar_t* dest = 0;
00704
00705 if (!x)
00706 return 0;
00707
00708 size_t src_len = len == os_minus_one ? strlen(x) : len;
00709 size_t _len = multibyte_to_unicode_len(x, src_len);
00710 dest = (wchar_t*)_allocator.allocate((_len + 1) * sizeof(ub2_t));
00711 dest[_len] = 0;
00712 return multibyte_to_unicode(dest, _len, x, src_len);
00713 }
00714
00715 inline
00716 size_t
00717 unicode_to_multibyte_len(const wchar_t* x, size_t len)
00718 {
00719
00720 size_t size = 0;
00721
00722 size_t len_ = 0;
00723
00724 while (x[len_] && len_ < len)
00725 {
00726
00727 ub4_t value = x[len_];
00728
00729 if (value >= 0xD800 && value <= 0xDBFF)
00730 {
00731
00732 ++len_;
00733 if (!x[len_])
00734 return 0;
00735
00736 ub4_t surrogate = value;
00737
00738 value = x[len_];
00739 value = ((surrogate - 0xD800) << 10) + ((value - 0xDC00) + 0x10000);
00740 }
00741
00742 ub1_t byte_count = 0;
00743
00744 if (value < 0x80)
00745 byte_count = 1;
00746 else if (value < 0x800)
00747 byte_count = 2;
00748 else if (value < 0x10000)
00749 byte_count = 3;
00750 else if (value < 0x200000)
00751 byte_count = 4;
00752 else if (value < 0x4000000)
00753 byte_count = 5;
00754 else if (value <= 0x7FFFFFFF)
00755 byte_count = 6;
00756 else
00757 return 0;
00758
00759 size += byte_count;
00760 ++len_;
00761 }
00762
00763 return size;
00764 }
00765
00766 inline
00767 const char*
00768 unicode_to_multibyte(char* dest, size_t dest_len, const wchar_t* x, size_t src_len)
00769 {
00770
00771 char* retVal = dest;
00772
00773 size_t len_ = 0;
00774
00775 while (x[len_] && len_ < src_len)
00776 {
00777
00778 ub4_t value = x[len_];
00779
00780 if (value >= 0xD800 && value <= 0xDBFF)
00781 {
00782
00783 ++len_;
00784 if (!x[len_])
00785 return 0;
00786
00787 ub4_t surrogate = value;
00788
00789 value = x[len_];
00790 value = ((surrogate - 0xD800) << 10) + ((value - 0xDC00) + 0x10000);
00791 }
00792
00793 ub1_t byte_count = 0;
00794
00795 if (value < 0x80)
00796 byte_count = 1;
00797 else if (value < 0x800)
00798 byte_count = 2;
00799 else if (value < 0x10000)
00800 byte_count = 3;
00801 else if (value < 0x200000)
00802 byte_count = 4;
00803 else if (value < 0x4000000)
00804 byte_count = 5;
00805 else if (value <= 0x7FFFFFFF)
00806 byte_count = 6;
00807 else
00808 return 0;;
00809
00810 if (dest_len < byte_count)
00811 return 0;
00812
00813
00814 dest += byte_count;
00815
00816 dest_len -= byte_count;
00817 switch (byte_count)
00818 {
00819 case 6 : *--dest = (ub1_t)((value | 0x80UL) & 0xBFUL); value >>= 6;
00820 case 5 : *--dest = (ub1_t)((value | 0x80UL) & 0xBFUL); value >>= 6;
00821 case 4 : *--dest = (ub1_t)((value | 0x80UL) & 0xBFUL); value >>= 6;
00822 case 3 : *--dest = (ub1_t)((value | 0x80UL) & 0xBFUL); value >>= 6;
00823 case 2 : *--dest = (ub1_t)((value | 0x80UL) & 0xBFUL); value >>= 6;
00824 case 1 : *--dest = (ub1_t)(value | str_leadingByte[byte_count - 1]);
00825 }
00826
00827
00828 dest += byte_count;
00829 ++len_;
00830 }
00831
00832 return retVal;
00833 }
00834
00835 inline
00836 const char*
00837 unicode_to_multibyte(string_t& dest, const wchar_t* x, size_t len)
00838 {
00839 dest = 0;
00840
00841 if (!x)
00842 return 0;
00843
00844 size_t src_len = len == os_minus_one ? wcslen(x) : len;
00845 size_t _len = unicode_to_multibyte_len(x, src_len);
00846 return unicode_to_multibyte(dest.reserve(_len), _len, x, src_len);
00847 }
00848
00849 inline
00850 const char*
00851 unicode_to_multibyte(byte_allocator& _allocator, const wchar_t* x, size_t len)
00852 {
00853 char* dest = 0;
00854
00855 if (!x)
00856 return 0;
00857
00858 size_t src_len = len == os_minus_one ? wcslen(x) : len;
00859 size_t _len = unicode_to_multibyte_len(x, src_len);
00860 dest = (char*)_allocator.allocate(_len + 1);
00861 dest[_len] = 0;
00862 return unicode_to_multibyte(dest, _len, x, src_len);
00863 }
00864 }
00865
00866 #pragma pack()
00867 END_TERIMBER_NAMESPACE
00868
00869 #endif // _terimber_string_hpp_
00870