From 4cd616cc1df627b0cc7802d6cfc67905ff95564e Mon Sep 17 00:00:00 2001
From: Marc Olivier Chouinard <mochouinard@moctel.com>
Date: Thu, 15 Dec 2011 23:36:20 -0500
Subject: [PATCH] FS-2081 --resolve

---
 src/switch_xml.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/switch_xml.c b/src/switch_xml.c
index 7790c17612..7e717846ef 100644
--- a/src/switch_xml.c
+++ b/src/switch_xml.c
@@ -101,6 +101,9 @@ void globfree(glob_t *);
 #define SWITCH_XML_WS   "\t\r\n "	/* whitespace */
 #define SWITCH_XML_ERRL 128		/* maximum error string length */
 
+/* Use UTF-8 as the general encoding */
+#define USE_UTF_8_ENCODING SWITCH_TRUE
+
 static int preprocess(const char *cwd, const char *file, int write_fd, int rlevel);
 
 typedef struct switch_xml_root *switch_xml_root_t;
@@ -2238,6 +2241,8 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
 {
 	const char *e = NULL;
 	int immune = 0;
+	int expecting_x_utf_8_char = 0;
+	int unicode_char = 0x000000;
 
 	if (!(s && *s))
 		return *dst;
@@ -2290,7 +2295,47 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
 				*dlen += sprintf(*dst + *dlen, "&#xD;");
 				break;
 			default:
-				(*dst)[(*dlen)++] = *s;
+				if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
+					int num = 1;
+					for (;num<4;num++) {
+						if (! ((*s >> (7-num)) & 0x01)) {
+							break;
+						}
+					}
+					switch (num) {
+						case 2:
+							unicode_char = *s & 0x1f;
+							break;
+						case 3:
+							unicode_char = *s & 0x0f;
+							break;
+						case 4:
+							unicode_char = *s & 0x07;
+							break;
+						default:
+							switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 Initial charactere, skip it\n");
+							/* ERROR HERE */
+							break;
+					}
+					expecting_x_utf_8_char = num - 1;
+
+				} else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) {
+					if (((*s >> 6) & 0x03) == 0x2) {
+
+						unicode_char = unicode_char << 6;
+						unicode_char = unicode_char | (*s & 0x3f);
+					} else {
+						switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 character to ampersand, skip it\n");
+						expecting_x_utf_8_char = 0;
+						break;
+					}
+					expecting_x_utf_8_char--;
+					if (expecting_x_utf_8_char == 0) {
+						*dlen += sprintf(*dst + *dlen, "&#x%X;", unicode_char);
+					}
+				} else {
+					(*dst)[(*dlen)++] = *s;
+				}
 			}
 		s++;
 	}