Home / Open source / Terimber 2.0
defxml.h File Reference#include "xml/declxml.h"
#include "base/common.h"
Go to the source code of this file.
|
Functions |
xml_forceinline bool | usascii_to_utf8 (ub4_t in, ub1_t *out, size_t &count) |
| converts ascii char to utf-8 char sequence
|
xml_forceinline bool | fixedN_to_utf8 (encodingSchema schema, const ub1_t *in, size_t count, ub1_t *out, size_t &converted, size_t &processed, size_t &more) |
| converts fixed width char buffer into utf-8 biffer
|
xml_forceinline bool | utf8_to_utf8 (const ub1_t *in, size_t count, size_t &processed, size_t &more) |
| checks if the input buffer is valid utf-8 string
|
xml_forceinline void | windowsN_to_utf8 (encodingSchema schema, const ub1_t *in, size_t count, ub1_t *out, size_t &converted, size_t &processed) |
| converts the Windows encoding to utf-8
|
xml_forceinline void | isoN_to_utf8 (encodingSchema schema, const ub1_t *in, size_t count, ub1_t *out, size_t &converted, size_t &processed) |
| converts the ISO encoding to utf-8
|
void | tokenValues (const char *x, _list< const char * > &values, byte_allocator &allocator_) |
| tokenizes the string into list of tokens
|
Variables |
BEGIN_TERIMBER_NAMESPACE const char | str_xml [] = { ch_x, ch_m, ch_l, ch_null } |
const char | str_w3c_xml [] = { ch_h, ch_t, ch_t, ch_p, ch_colon, ch_forward_slash, ch_forward_slash, ch_w, ch_w, ch_w, ch_period, ch_w, ch_3, ch_period, ch_o, ch_r, ch_g, ch_forward_slash, ch_X, ch_M, ch_L, ch_forward_slash, ch_1, ch_9, ch_9, ch_8, ch_forward_slash, ch_n, ch_a, ch_m, ch_e, ch_s, ch_p, ch_a, ch_c, ch_e, ch_null } |
const char | str_xmlns [] = { ch_x, ch_m, ch_l, ch_n, ch_s, ch_null } |
const char | str_version [] = { ch_v, ch_e, ch_r, ch_s, ch_i, ch_o, ch_n, ch_null } |
const char | str_encoding [] = { ch_e, ch_n, ch_c, ch_o, ch_d, ch_i, ch_n, ch_g, ch_null } |
const char | str_standalone [] = { ch_s, ch_t, ch_a, ch_n, ch_d, ch_a, ch_l, ch_o, ch_n, ch_e, ch_null } |
const char | str_SYSTEM [] = { ch_S, ch_Y, ch_S, ch_T, ch_E, ch_M, ch_null } |
const char | str_PUBLIC [] = { ch_P, ch_U, ch_B, ch_L, ch_I, ch_C, ch_null } |
const char | str_DOCTYPE [] = { ch_D, ch_O, ch_C, ch_T, ch_Y, ch_P, ch_E, ch_null } |
const char | str_ELEMENT [] = { ch_E, ch_L, ch_E, ch_M, ch_E, ch_N, ch_T, ch_null } |
const char | str_ENTITY [] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_Y, ch_null } |
const char | str_ENTITIES [] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_I, ch_E, ch_S, ch_null } |
const char | str_ENTIT [] = { ch_E, ch_N, ch_T, ch_I, ch_T, ch_null } |
const char | str_ATTRLIST [] = { ch_A, ch_T, ch_T, ch_L, ch_I, ch_S, ch_T, ch_null } |
const char | str_NOTATION [] = { ch_N, ch_O, ch_T, ch_A, ch_T, ch_I, ch_O, ch_N, ch_null } |
const char | str_EMPTY [] = { ch_E, ch_M, ch_P, ch_T, ch_Y, ch_null } |
const char | str_ANY [] = { ch_A, ch_N, ch_Y, ch_null } |
const char | str__PCDATA [] = { ch_pound, ch_P, ch_C, ch_D, ch_A, ch_T, ch_A, ch_null } |
const char | str_yes [] = { ch_y, ch_e, ch_s, ch_null } |
const char | str_no [] = { ch_n, ch_o, ch_null } |
const char | str_CDATA [] = { ch_C, ch_D, ch_A, ch_T, ch_A, ch_null } |
const char | str_CTYPE [] = { ch_C, ch_T, ch_Y, ch_P, ch_E, ch_null } |
const char | str_PCDATA [] = { ch_P, ch_C, ch_D, ch_A, ch_T, ch_A, ch_null } |
const char | str_IGNORE [] = { ch_I, ch_G, ch_N, ch_O, ch_R, ch_E, ch_null } |
const char | str_INCLUDE [] = { ch_I, ch_N, ch_C, ch_L, ch_U, ch_D, ch_E, ch_null } |
const char | str_ID [] = { ch_I, ch_D, ch_null } |
const char | str_IDREF [] = { ch_I, ch_D, ch_R, ch_E, ch_F, ch_null } |
const char | str_IDREFS [] = { ch_I, ch_D, ch_R, ch_E, ch_F, ch_S, ch_null } |
const char | str_REF [] = { ch_R, ch_E, ch_F, ch_null } |
const char | str_NMTOKEN [] = { ch_N, ch_M, ch_T, ch_O, ch_K, ch_E, ch_N, ch_null } |
const char | str_NMTOKENS [] = { ch_N, ch_M, ch_T, ch_O, ch_K, ch_E, ch_N, ch_S, ch_null } |
const char | str_REQUIRED [] = { ch_R, ch_E, ch_Q, ch_U, ch_I, ch_R, ch_E, ch_D, ch_null } |
const char | str_IMPLIED [] = { ch_I, ch_M, ch_P, ch_L, ch_I, ch_E, ch_D, ch_null } |
const char | str_FIXED [] = { ch_F, ch_I, ch_X, ch_E, ch_D, ch_null } |
const char | str_NDATA [] = { ch_N, ch_D, ch_A, ch_T, ch_A, ch_null } |
const char | str_xml_space [] = { ch_x, ch_m, ch_l, ch_colon, ch_s, ch_p, ch_a, ch_c, ch_e, ch_null } |
const char | str_default [] = { ch_d, ch_e, ch_f, ch_a, ch_u, ch_l, ch_t, ch_null } |
const char | str_preserve [] = { ch_p, ch_r, ch_e, ch_s, ch_e, ch_r, ch_v, ch_e, ch_null } |
const char | str_apos [] = { ch_a, ch_p, ch_o, ch_s, ch_null } |
const char | str_quote [] = { ch_q, ch_u, ch_o, ch_t, ch_null } |
const char | str_amp [] = { ch_a, ch_m, ch_p, ch_null } |
const char | str_lt [] = { ch_l, ch_t, ch_null } |
const char | str_gt [] = { ch_g, ch_t, ch_null } |
const char | str_ch_apos [] = { ch_single_quote, ch_null } |
const char | str_ch_quote [] = { ch_double_quote, ch_null } |
const char | str_ch_amp [] = { ch_ampersand, ch_null } |
const char | str_ch_lt [] = { ch_open_angle, ch_null } |
const char | str_ch_gt [] = { ch_close_angle, ch_null } |
const char | str_ch_colon [] = { ch_colon, ch_null } |
const ub1_t | UTF8Pre [] = { 0x3C, 0x3F, 0x78, 0x6D, 0x6C } |
const ub1_t | EBCDICPre [] = { 0x4C, 0x6F, 0xA7, 0x94, 0x93 } |
const ub1_t | UTF16BPre [] = { 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C } |
const ub1_t | UTF16LPre [] = { 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00 } |
const ub1_t | UCS4BPre [] = { 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C } |
const ub1_t | UCS4LPre [] = { 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00 } |
const ub1_t | UTF8BOM [] = { 0xEF, 0xBB, 0xBF } |
const ub1_t | UTF16BBOM [] = { 0xFE, 0xFF } |
const ub1_t | UTF16LBOM [] = { 0xFF, 0xFE } |
const ub1_t | UCS4BBOM [] = { 0x00, 0x00, 0xFE, 0xFF } |
const ub1_t | UCS4LBOM [] = { 0xFF, 0xFE, 0x00, 0x00 } |
const ub1_t | s_leadingByte [6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC} |
const ub2_t | encoding_table_winodws_1251 [256] |
| windows 1251 encoding table
|
Function Documentation
xml_forceinline bool fixedN_to_utf8 |
( |
encodingSchema |
schema, |
|
|
const ub1_t * |
in, |
|
|
size_t |
count, |
|
|
ub1_t * |
out, |
|
|
size_t & |
converted, |
|
|
size_t & |
processed, |
|
|
size_t & |
more | |
|
) |
| | |
converts fixed width char buffer into utf-8 biffer
- Parameters:
-
schema |
encoding schema |
in |
input buffer |
count |
input buffer length |
out |
output buffer |
converted |
input chars converted |
processed |
output chars processed |
more |
more input chars required to process output utf-8 char, crop input chars |
Definition at line 98 of file defxml.hpp.
References ch_cr, ch_hor_tab, ch_lf, s_leadingByte, UCS_4B, UCS_4BS, UCS_4L, UCS_4LS, UTF_16B, and UTF_16L.
Referenced by byte_source::convert_chars(), and byte_source::taste_buffer().
xml_forceinline void isoN_to_utf8 |
( |
encodingSchema |
schema, |
|
|
const ub1_t * |
in, |
|
|
size_t |
count, |
|
|
ub1_t * |
out, |
|
|
size_t & |
converted, |
|
|
size_t & |
processed | |
|
) |
| | |
converts the ISO encoding to utf-8
- Parameters:
-
schema |
encoding schema |
in |
input ISO encoding buffer |
count |
input buffer length |
out |
output buffer |
converted |
input chars converted |
processed |
output chars processed |
Definition at line 375 of file defxml.hpp.
References s_leadingByte, and UTF_ISO88591.
Referenced by byte_source::convert_chars().
void tokenValues |
( |
const char * |
x, |
|
|
_list< const char * > & |
values, |
|
|
byte_allocator & |
allocator_ | |
|
) |
| | |
xml_forceinline bool usascii_to_utf8 |
( |
ub4_t |
in, |
|
|
ub1_t * |
out, |
|
|
size_t & |
count | |
|
) |
| | |
xml_forceinline bool utf8_to_utf8 |
( |
const ub1_t * |
in, |
|
|
size_t |
count, |
|
|
size_t & |
processed, |
|
|
size_t & |
more | |
|
) |
| | |
xml_forceinline void windowsN_to_utf8 |
( |
encodingSchema |
schema, |
|
|
const ub1_t * |
in, |
|
|
size_t |
count, |
|
|
ub1_t * |
out, |
|
|
size_t & |
converted, |
|
|
size_t & |
processed | |
|
) |
| | |
Variable Documentation
const char str_standalone[] = { ch_s, ch_t, ch_a, ch_n, ch_d, ch_a, ch_l, ch_o, ch_n, ch_e, ch_null } |
const char str_w3c_xml[] = { ch_h, ch_t, ch_t, ch_p, ch_colon, ch_forward_slash, ch_forward_slash, ch_w, ch_w, ch_w, ch_period, ch_w, ch_3, ch_period, ch_o, ch_r, ch_g, ch_forward_slash, ch_X, ch_M, ch_L, ch_forward_slash, ch_1, ch_9, ch_9, ch_8, ch_forward_slash, ch_n, ch_a, ch_m, ch_e, ch_s, ch_p, ch_a, ch_c, ch_e, ch_null } |
const ub1_t UCS4BPre[] = { 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C } |
const ub1_t UCS4LPre[] = { 0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00 } |
const ub1_t UTF16BPre[] = { 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C } |
const ub1_t UTF16LPre[] = { 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00 } |
|
|