00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef _terimber_defxml_h_
00029 #define _terimber_defxml_h_
00030
00031 #include "xml/declxml.h"
00032 #include "base/common.h"
00033
00034 BEGIN_TERIMBER_NAMESPACE
00035 #pragma pack(4)
00036
00037
00038 const char str_xml[] = { ch_x, ch_m, ch_l, ch_null };
00039
00040 const char str_w3c_xml[] = { ch_h, ch_t, ch_t, ch_p, ch_colon, ch_forward_slash, ch_forward_slash, ch_w, ch_w, ch_w, ch_period, ch_w, ch_3, ch_period, ch_o, ch_r, ch_g, ch_forward_slash, ch_X, ch_M, ch_L, ch_forward_slash, ch_1, ch_9, ch_9, ch_8, ch_forward_slash, ch_n, ch_a, ch_m, ch_e, ch_s, ch_p, ch_a, ch_c, ch_e, ch_null };
00041 const char str_xmlns[] = { ch_x, ch_m, ch_l, ch_n, ch_s, ch_null };
00042 const char str_version[] = { ch_v, ch_e, ch_r, ch_s, ch_i, ch_o, ch_n, ch_null };
00043 const char str_encoding[] = { ch_e, ch_n, ch_c, ch_o, ch_d, ch_i, ch_n, ch_g, ch_null };
00044 const char str_standalone[] = { ch_s, ch_t, ch_a, ch_n, ch_d, ch_a, ch_l, ch_o, ch_n, ch_e, ch_null };
00045 const char str_SYSTEM[] = { ch_S, ch_Y, ch_S, ch_T, ch_E, ch_M, ch_null };
00046 const char str_PUBLIC[] = { ch_P, ch_U, ch_B, ch_L, ch_I, ch_C, ch_null };
00047 const char str_DOCTYPE[] = { ch_D, ch_O, ch_C, ch_T, ch_Y, ch_P, ch_E, ch_null };
00048 const char str_ELEMENT[] = { ch_E, ch_L, ch_E, ch_M, ch_E, ch_N, ch_T, ch_null };
00049 const char str_ENTITY[] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_Y, ch_null };
00050 const char str_ENTITIES[] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_I, ch_E, ch_S, ch_null };
00051 const char str_ENTIT[] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_null };
00052 const char str_ATTRLIST[] = { ch_A, ch_T, ch_T, ch_L, ch_I, ch_S, ch_T, ch_null };
00053 const char str_NOTATION[] = { ch_N, ch_O, ch_T, ch_A, ch_T, ch_I, ch_O, ch_N, ch_null };
00054 const char str_EMPTY[] = { ch_E, ch_M, ch_P, ch_T, ch_Y, ch_null };
00055 const char str_ANY[] = { ch_A, ch_N, ch_Y, ch_null };
00056 const char str__PCDATA[] = { ch_pound, ch_P, ch_C, ch_D, ch_A, ch_T, ch_A, ch_null };
00057 const char str_yes[] = { ch_y, ch_e, ch_s, ch_null };
00058 const char str_no[] = { ch_n, ch_o, ch_null };
00059 const char str_CDATA[] = { ch_C, ch_D, ch_A, ch_T, ch_A, ch_null };
00060 const char str_CTYPE[] = { ch_C, ch_T, ch_Y, ch_P, ch_E, ch_null };
00061 const char str_PCDATA[] = { ch_P, ch_C, ch_D, ch_A, ch_T, ch_A, ch_null };
00062 const char str_IGNORE[] = { ch_I, ch_G, ch_N, ch_O, ch_R, ch_E, ch_null };
00063 const char str_INCLUDE[] = { ch_I, ch_N, ch_C, ch_L, ch_U, ch_D, ch_E, ch_null };
00064 const char str_ID[] = { ch_I, ch_D, ch_null };
00065 const char str_IDREF[] = { ch_I, ch_D, ch_R, ch_E, ch_F, ch_null };
00066 const char str_IDREFS[] = { ch_I, ch_D, ch_R, ch_E, ch_F, ch_S, ch_null };
00067 const char str_REF[] = { ch_R, ch_E, ch_F, ch_null };
00068 const char str_NMTOKEN[] = { ch_N, ch_M, ch_T, ch_O, ch_K, ch_E, ch_N, ch_null };
00069 const char str_NMTOKENS[] = { ch_N, ch_M, ch_T, ch_O, ch_K, ch_E, ch_N, ch_S, ch_null };
00070 const char str_REQUIRED[] = { ch_R, ch_E, ch_Q, ch_U, ch_I, ch_R, ch_E, ch_D, ch_null };
00071 const char str_IMPLIED[] = { ch_I, ch_M, ch_P, ch_L, ch_I, ch_E, ch_D, ch_null };
00072 const char str_FIXED[] = { ch_F, ch_I, ch_X, ch_E, ch_D, ch_null };
00073 const char str_NDATA[] = { ch_N, ch_D, ch_A, ch_T, ch_A, ch_null };
00074 const char str_xml_space[] = { ch_x, ch_m, ch_l, ch_colon, ch_s, ch_p, ch_a, ch_c, ch_e, ch_null };
00075 const char str_default[] = { ch_d, ch_e, ch_f, ch_a, ch_u, ch_l, ch_t, ch_null };
00076 const char str_preserve[] = { ch_p, ch_r, ch_e, ch_s, ch_e, ch_r, ch_v, ch_e, ch_null };
00077
00078 const char str_apos[] = { ch_a, ch_p, ch_o, ch_s, ch_null };
00079 const char str_quote[] = { ch_q, ch_u, ch_o, ch_t, ch_null };
00080 const char str_amp[] = { ch_a, ch_m, ch_p, ch_null };
00081 const char str_lt[] = { ch_l, ch_t, ch_null };
00082 const char str_gt[] = { ch_g, ch_t, ch_null };
00083
00084 const char str_ch_apos[] = { ch_single_quote, ch_null };
00085 const char str_ch_quote[] = { ch_double_quote, ch_null };
00086 const char str_ch_amp[] = { ch_ampersand, ch_null };
00087 const char str_ch_lt[] = { ch_open_angle, ch_null };
00088 const char str_ch_gt[] = { ch_close_angle, ch_null };
00089 const char str_ch_colon[] = { ch_colon, ch_null };
00090
00091
00092 const ub1_t UTF8Pre[] = { 0x3C, 0x3F, 0x78, 0x6D, 0x6C };
00093 const ub1_t EBCDICPre[] = { 0x4C, 0x6F, 0xA7, 0x94, 0x93 };
00094 const ub1_t UTF16BPre[] = { 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C };
00095 const ub1_t UTF16LPre[] = { 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00 };
00096 const ub1_t UCS4BPre[] = { 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C };
00097 const ub1_t UCS4LPre[] = { 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00 };
00098
00099 const ub1_t UTF8BOM[] = { 0xEF, 0xBB, 0xBF };
00100 const ub1_t UTF16BBOM[] = { 0xFE, 0xFF };
00101 const ub1_t UTF16LBOM[] = { 0xFF, 0xFE };
00102 const ub1_t UCS4BBOM[] = { 0x00, 0x00, 0xFE, 0xFF };
00103 const ub1_t UCS4LBOM[] = { 0xFF, 0xFE, 0x00, 0x00 };
00104
00105 const ub1_t s_leadingByte[6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
00106
00108 xml_forceinline
00109 bool
00110 usascii_to_utf8( ub4_t in,
00111 ub1_t* out,
00112 size_t& count
00113 );
00115 xml_forceinline
00116 bool
00117 fixedN_to_utf8( encodingSchema schema,
00118 const ub1_t* in,
00119 size_t count,
00120 ub1_t* out,
00121 size_t& converted,
00122 size_t& processed,
00123 size_t& more
00124 );
00126 xml_forceinline
00127 bool
00128 utf8_to_utf8( const ub1_t* in,
00129 size_t count,
00130 size_t& processed,
00131 size_t& more
00132 );
00134 xml_forceinline
00135 void
00136 windowsN_to_utf8( encodingSchema schema,
00137 const ub1_t* in,
00138 size_t count,
00139 ub1_t* out,
00140 size_t& converted,
00141 size_t& processed
00142 );
00144 xml_forceinline
00145 void
00146 isoN_to_utf8( encodingSchema schema,
00147 const ub1_t* in,
00148 size_t count,
00149 ub1_t* out,
00150 size_t& converted,
00151 size_t& processed
00152 );
00154 void
00155 tokenValues( const char* x,
00156 _list< const char* >& values,
00157 byte_allocator& allocator_
00158 );
00159
00161 extern
00162 const ub2_t
00163 encoding_table_winodws_1251[256];
00164
00165 #pragma pack()
00166 END_TERIMBER_NAMESPACE
00167
00168 #endif // _terimber_defxml_h_