[core] add option to toggle ampersand entities on/off when serialize xml string

This commit is contained in:
Seven Du 2019-11-18 12:20:49 +08:00 committed by Andrey Volk
parent 917d850b04
commit 79079942de
3 changed files with 85 additions and 29 deletions

View File

@ -57,6 +57,8 @@
#define FREESWITCH_XML_H #define FREESWITCH_XML_H
#include <switch.h> #include <switch.h>
/* Use UTF-8 as the general encoding */
#define USE_UTF_8_ENCODING SWITCH_TRUE
struct switch_xml_binding; struct switch_xml_binding;
@ -213,10 +215,15 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_get(_In_ switch_xml_t xml,...);
///\ must be freed. ///\ must be freed.
///\param xml the xml node ///\param xml the xml node
///\param prn_header add <?xml version..> header too ///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the ampersanded html text string to display xml ///\return the ampersanded html text string to display xml
SWITCH_DECLARE(char *) switch_xml_toxml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header); #define switch_xml_toxml(xml, prn_header) switch_xml_toxml_ex(xml, prn_header, USE_UTF_8_ENCODING)
SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, _In_ switch_bool_t prn_header); #define switch_xml_toxml_nolock(xml, prn_header) switch_xml_toxml_nolock_ex(xml, prn_header, USE_UTF_8_ENCODING)
SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header); #define switch_xml_tohtml(xml, prn_header) switch_xml_tohtml_ex(xml, prn_header, USE_UTF_8_ENCODING)
SWITCH_DECLARE(char *) switch_xml_toxml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_tohtml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
///\brief Converts an switch_xml structure back to xml using the buffer passed in the parameters. ///\brief Converts an switch_xml structure back to xml using the buffer passed in the parameters.
///\param xml the xml node ///\param xml the xml node
@ -224,9 +231,12 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool
///\param buflen size of buffer ///\param buflen size of buffer
///\param offset offset to start at ///\param offset offset to start at
///\param prn_header add <?xml version..> header too ///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the xml text string ///\return the xml text string
SWITCH_DECLARE(char *) switch_xml_toxml_buf(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset, #define switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header) switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header, USE_UTF_8_ENCODING);
_In_ switch_bool_t prn_header); SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset,
_In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
///\brief returns a NULL terminated array of processing instructions for the given ///\brief returns a NULL terminated array of processing instructions for the given
///\ target ///\ target

View File

@ -103,9 +103,6 @@ void globfree(glob_t *);
#define SWITCH_XML_WS "\t\r\n " /* whitespace */ #define SWITCH_XML_WS "\t\r\n " /* whitespace */
#define SWITCH_XML_ERRL 128 /* maximum error string length */ #define SWITCH_XML_ERRL 128 /* maximum error string length */
/* Use UTF-8 as the general encoding */
static switch_bool_t USE_UTF_8_ENCODING = SWITCH_TRUE;
static void preprocess_exec_set(char *keyval) static void preprocess_exec_set(char *keyval)
{ {
char *key = keyval; char *key = keyval;
@ -2478,7 +2475,7 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_open_cfg(const char *file_path, switch_x
/* Encodes ampersand sequences appending the results to *dst, reallocating *dst /* Encodes ampersand sequences appending the results to *dst, reallocating *dst
if length exceeds max. a is non-zero for attribute encoding. Returns *dst */ if length exceeds max. a is non-zero for attribute encoding. Returns *dst */
static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a) static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a, switch_bool_t use_utf8_encoding)
{ {
const char *e = NULL; const char *e = NULL;
int immune = 0; int immune = 0;
@ -2533,7 +2530,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
*dlen += sprintf(*dst + *dlen, "&#xD;"); *dlen += sprintf(*dst + *dlen, "&#xD;");
break; break;
default: default:
if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) { if (use_utf8_encoding && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
int num = 1; int num = 1;
for (;num<4;num++) { for (;num<4;num++) {
if (! ((*s >> (7-num)) & 0x01)) { if (! ((*s >> (7-num)) & 0x01)) {
@ -2557,7 +2554,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
} }
expecting_x_utf_8_char = num - 1; expecting_x_utf_8_char = num - 1;
} else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) { } else if (use_utf8_encoding && expecting_x_utf_8_char > 0) {
if (((*s >> 6) & 0x03) == 0x2) { if (((*s >> 6) & 0x03) == 0x2) {
unicode_char = unicode_char << 6; unicode_char = unicode_char << 6;
@ -2584,7 +2581,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
/* Recursively converts each tag to xml appending it to *s. Reallocates *s if /* Recursively converts each tag to xml appending it to *s. Reallocates *s if
its length exceeds max. start is the location of the previous tag in the its length exceeds max. start is the location of the previous tag in the
parent tag's character content. Returns *s. */ parent tag's character content. Returns *s. */
static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot) static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot, switch_bool_t use_utf8_encoding)
{ {
int i, j; int i, j;
char *txt; char *txt;
@ -2606,7 +2603,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
} }
/* parent character content up to this tag */ /* parent character content up to this tag */
*s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0); *s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0, use_utf8_encoding);
while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) + 1 > *max) { /* reallocate s */ while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) + 1 > *max) { /* reallocate s */
*s = (char *) switch_must_realloc(*s, *max += SWITCH_XML_BUFSIZE); *s = (char *) switch_must_realloc(*s, *max += SWITCH_XML_BUFSIZE);
@ -2628,7 +2625,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
} }
*len += sprintf(*s + *len, " %s=\"", xml->attr[i]); *len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1); switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\""); *len += sprintf(*s + *len, "\"");
} }
@ -2641,7 +2638,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
} }
*len += sprintf(*s + *len, " %s=\"", attr[i][j]); *len += sprintf(*s + *len, " %s=\"", attr[i][j]);
switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1); switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\""); *len += sprintf(*s + *len, "\"");
} }
@ -2649,10 +2646,10 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
if (xml->child) { if (xml->child) {
(*count)++; (*count)++;
*s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0); *s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0, use_utf8_encoding);
} else { } else {
*s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0); /* data */ *s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0, use_utf8_encoding); /* data */
} }
while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) > *max) { /* reallocate s */ while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) > *max) { /* reallocate s */
@ -2676,35 +2673,34 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
start = off; start = off;
goto tailrecurse; goto tailrecurse;
/* /*
return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count); return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count, use_utf8_encoding);
*/ */
} else { } else {
if (*count > 0) if (*count > 0)
(*count)--; (*count)--;
return switch_xml_ampencode(txt + off, 0, s, len, max, 0); return switch_xml_ampencode(txt + off, 0, s, len, max, 0, use_utf8_encoding);
} }
} }
SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, switch_bool_t prn_header) SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{ {
char *s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); char *s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
return switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); return switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
} }
SWITCH_DECLARE(char *) switch_xml_toxml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
SWITCH_DECLARE(char *) switch_xml_toxml(switch_xml_t xml, switch_bool_t prn_header)
{ {
char *r, *s; char *r, *s;
s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
return r; return r;
} }
SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_header) SWITCH_DECLARE(char *) switch_xml_tohtml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{ {
char *r, *s, *h; char *r, *s, *h;
switch_size_t rlen = 0; switch_size_t rlen = 0;
@ -2713,15 +2709,15 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_hea
s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
h = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE); h = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header); r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1); h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1, use_utf8_encoding);
switch_safe_free(r); switch_safe_free(r);
return h; return h;
} }
/* converts a switch_xml structure back to xml, returning a string of xml data that /* converts a switch_xml structure back to xml, returning a string of xml data that
must be freed */ must be freed */
SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header) SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{ {
switch_xml_t p = (xml) ? xml->parent : NULL; switch_xml_t p = (xml) ? xml->parent : NULL;
switch_xml_root_t root = (switch_xml_root_t) xml; switch_xml_root_t root = (switch_xml_root_t) xml;
@ -2759,7 +2755,7 @@ SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_
} }
} }
s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1); s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1, use_utf8_encoding);
for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */ for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */
for (k = 2; root->pi[i][k - 1]; k++); for (k = 2; root->pi[i][k - 1]; k++);

View File

@ -68,6 +68,56 @@ FST_MINCORE_BEGIN()
switch_xml_free(xml); switch_xml_free(xml);
} }
FST_TEST_END() FST_TEST_END()
FST_TEST_BEGIN(test_utf_8)
{
const char *text = "<xml>Voulez-Vous Parler Français</xml>";
switch_xml_t xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
char *xml_string = NULL;
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Fran&#xE7;ais</xml>\n");
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Français</xml>\n");
switch_xml_free(xml);
free(xml_string);
text = "<xml>你好,中文</xml>";
xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>&#x4F60;&#x597D;&#xFF0C;&#x4E2D;&#x6587;</xml>\n");
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>你好,中文</xml>\n");
switch_xml_free(xml);
free(xml_string);
text = "<xml><tag><![CDATA[Voulez-Vous Parler Français]]></tag></xml>";
xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Fran&#xE7;ais</tag>\n</xml>\n");
switch_xml_free(xml);
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Français</tag>\n</xml>\n");
switch_xml_free(xml);
}
FST_TEST_END()
} }
FST_SUITE_END() FST_SUITE_END()
} }