From 79079942de10abb77e39d2075bf606d864774fb9 Mon Sep 17 00:00:00 2001 From: Seven Du Date: Mon, 18 Nov 2019 12:20:49 +0800 Subject: [PATCH] [core] add option to toggle ampersand entities on/off when serialize xml string --- src/include/switch_xml.h | 20 ++++++++++++---- src/switch_xml.c | 44 ++++++++++++++++------------------- tests/unit/switch_xml.c | 50 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 29 deletions(-) diff --git a/src/include/switch_xml.h b/src/include/switch_xml.h index 7362a587ee..25aa2002c5 100644 --- a/src/include/switch_xml.h +++ b/src/include/switch_xml.h @@ -57,6 +57,8 @@ #define FREESWITCH_XML_H #include +/* Use UTF-8 as the general encoding */ +#define USE_UTF_8_ENCODING SWITCH_TRUE struct switch_xml_binding; @@ -213,10 +215,15 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_get(_In_ switch_xml_t xml,...); ///\ must be freed. ///\param xml the xml node ///\param prn_header add header too +///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars ///\return the ampersanded html text string to display xml -SWITCH_DECLARE(char *) switch_xml_toxml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header); -SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, _In_ switch_bool_t prn_header); -SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header); +#define switch_xml_toxml(xml, prn_header) switch_xml_toxml_ex(xml, prn_header, USE_UTF_8_ENCODING) +#define switch_xml_toxml_nolock(xml, prn_header) switch_xml_toxml_nolock_ex(xml, prn_header, USE_UTF_8_ENCODING) +#define switch_xml_tohtml(xml, prn_header) switch_xml_tohtml_ex(xml, prn_header, USE_UTF_8_ENCODING) + +SWITCH_DECLARE(char *) switch_xml_toxml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding); +SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding); +SWITCH_DECLARE(char *) switch_xml_tohtml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding); ///\brief Converts an switch_xml structure back to xml using the buffer passed in the parameters. ///\param xml the xml node @@ -224,9 +231,12 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool ///\param buflen size of buffer ///\param offset offset to start at ///\param prn_header add header too +///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars ///\return the xml text string -SWITCH_DECLARE(char *) switch_xml_toxml_buf(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset, - _In_ switch_bool_t prn_header); +#define switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header) switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header, USE_UTF_8_ENCODING); +SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset, + _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding); + ///\brief returns a NULL terminated array of processing instructions for the given ///\ target diff --git a/src/switch_xml.c b/src/switch_xml.c index 596fa6a6bf..f794d6f744 100644 --- a/src/switch_xml.c +++ b/src/switch_xml.c @@ -103,9 +103,6 @@ void globfree(glob_t *); #define SWITCH_XML_WS "\t\r\n " /* whitespace */ #define SWITCH_XML_ERRL 128 /* maximum error string length */ -/* Use UTF-8 as the general encoding */ -static switch_bool_t USE_UTF_8_ENCODING = SWITCH_TRUE; - static void preprocess_exec_set(char *keyval) { char *key = keyval; @@ -2478,7 +2475,7 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_open_cfg(const char *file_path, switch_x /* Encodes ampersand sequences appending the results to *dst, reallocating *dst if length exceeds max. a is non-zero for attribute encoding. Returns *dst */ -static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a) +static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a, switch_bool_t use_utf8_encoding) { const char *e = NULL; int immune = 0; @@ -2533,7 +2530,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, *dlen += sprintf(*dst + *dlen, " "); break; default: - if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) { + if (use_utf8_encoding && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) { int num = 1; for (;num<4;num++) { if (! ((*s >> (7-num)) & 0x01)) { @@ -2557,7 +2554,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, } expecting_x_utf_8_char = num - 1; - } else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) { + } else if (use_utf8_encoding && expecting_x_utf_8_char > 0) { if (((*s >> 6) & 0x03) == 0x2) { unicode_char = unicode_char << 6; @@ -2584,7 +2581,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, /* Recursively converts each tag to xml appending it to *s. Reallocates *s if its length exceeds max. start is the location of the previous tag in the parent tag's character content. Returns *s. */ -static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot) +static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot, switch_bool_t use_utf8_encoding) { int i, j; char *txt; @@ -2606,7 +2603,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, } /* parent character content up to this tag */ - *s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0); + *s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0, use_utf8_encoding); while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) + 1 > *max) { /* reallocate s */ *s = (char *) switch_must_realloc(*s, *max += SWITCH_XML_BUFSIZE); @@ -2628,7 +2625,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, } *len += sprintf(*s + *len, " %s=\"", xml->attr[i]); - switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1); + switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1, use_utf8_encoding); *len += sprintf(*s + *len, "\""); } @@ -2641,7 +2638,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, } *len += sprintf(*s + *len, " %s=\"", attr[i][j]); - switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1); + switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1, use_utf8_encoding); *len += sprintf(*s + *len, "\""); } @@ -2649,10 +2646,10 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, if (xml->child) { (*count)++; - *s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0); + *s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0, use_utf8_encoding); } else { - *s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0); /* data */ + *s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0, use_utf8_encoding); /* data */ } while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) > *max) { /* reallocate s */ @@ -2676,35 +2673,34 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, start = off; goto tailrecurse; /* - return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count); + return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count, use_utf8_encoding); */ } else { if (*count > 0) (*count)--; - return switch_xml_ampencode(txt + off, 0, s, len, max, 0); + return switch_xml_ampencode(txt + off, 0, s, len, max, 0, use_utf8_encoding); } } -SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, switch_bool_t prn_header) +SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding) { char *s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); - return switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); + return switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding); } - -SWITCH_DECLARE(char *) switch_xml_toxml(switch_xml_t xml, switch_bool_t prn_header) +SWITCH_DECLARE(char *) switch_xml_toxml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding) { char *r, *s; s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); - r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); + r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding); return r; } -SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_header) +SWITCH_DECLARE(char *) switch_xml_tohtml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding) { char *r, *s, *h; switch_size_t rlen = 0; @@ -2713,15 +2709,15 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_hea s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); h = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); - r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); - h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1); + r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding); + h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1, use_utf8_encoding); switch_safe_free(r); return h; } /* converts a switch_xml structure back to xml, returning a string of xml data that must be freed */ -SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header) +SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header, switch_bool_t use_utf8_encoding) { switch_xml_t p = (xml) ? xml->parent : NULL; switch_xml_root_t root = (switch_xml_root_t) xml; @@ -2759,7 +2755,7 @@ SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_ } } - s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1); + s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1, use_utf8_encoding); for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */ for (k = 2; root->pi[i][k - 1]; k++); diff --git a/tests/unit/switch_xml.c b/tests/unit/switch_xml.c index f6e28c11e5..3af029ce4d 100644 --- a/tests/unit/switch_xml.c +++ b/tests/unit/switch_xml.c @@ -68,6 +68,56 @@ FST_MINCORE_BEGIN() switch_xml_free(xml); } FST_TEST_END() + + FST_TEST_BEGIN(test_utf_8) + { + const char *text = "Voulez-Vous Parler Français"; + switch_xml_t xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE); + char *xml_string = NULL; + + fst_requires(xml); + xml_string = switch_xml_toxml(xml, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "Voulez-Vous Parler Français\n"); + free(xml_string); + + xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "Voulez-Vous Parler Français\n"); + switch_xml_free(xml); + free(xml_string); + + text = "你好,中文"; + xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE); + + fst_requires(xml); + xml_string = switch_xml_toxml(xml, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "你好,中文\n"); + free(xml_string); + + xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "你好,中文\n"); + switch_xml_free(xml); + free(xml_string); + + text = ""; + + xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE); + fst_requires(xml); + xml_string = switch_xml_toxml(xml, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "\n Voulez-Vous Parler Français\n\n"); + switch_xml_free(xml); + free(xml_string); + + xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE); + fst_requires(xml_string); + fst_check_string_equals(xml_string, "\n Voulez-Vous Parler Français\n\n"); + switch_xml_free(xml); + } + FST_TEST_END() } FST_SUITE_END() }