mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-08-13 01:26:58 +00:00
FS-2746 --resolve large xmlrpc update thanks garmt
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
###############################################################################
|
||||
# This directory builds libxmlrpc_util, which contains utility
|
||||
# functions that are used by the Xmlprc-c # libraries, and also
|
||||
# directly by Xmlrpc-c programs.
|
||||
# functions that are used by the Xmlprc-c libraries, and also
|
||||
# directly by Xmlrpc-c programs. Some of them are documented for use
|
||||
# by Xmlrpc-c users, as facilities of the libxmlrpc library (which
|
||||
# prerequires libxmlrpc_util).
|
||||
#
|
||||
# The functions in this library are characterized by being general purpose
|
||||
# programming functions, such as one might wish were in the standard C
|
||||
@@ -29,11 +31,13 @@ SHARED_LIBS_TO_INSTALL := libxmlrpc_util
|
||||
|
||||
TARGET_MODS = \
|
||||
asprintf \
|
||||
base64 \
|
||||
error \
|
||||
make_printable \
|
||||
memblock \
|
||||
select \
|
||||
sleep \
|
||||
string_number \
|
||||
time \
|
||||
utf8 \
|
||||
|
||||
@@ -43,8 +47,6 @@ MAJ=3
|
||||
|
||||
include $(SRCDIR)/common.mk
|
||||
|
||||
CFLAGS = $(CFLAGS_COMMON) $(CFLAGS_PERSONAL) $(CADD)
|
||||
|
||||
INCLUDES = -I$(BLDDIR) -Isrcdir \
|
||||
-I$(BLDDIR)/include -Isrcdir/include -Isrcdir/lib/util/include
|
||||
|
||||
@@ -53,17 +55,13 @@ UTIL_SHLIB = $(call shlibfn,libxmlrpc_util)
|
||||
UTIL_SHLIBLE = $(call shliblefn,libxmlrpc_util)
|
||||
#UTIL_SHLIBLE is e.g. libxmlrpc_util.so
|
||||
|
||||
ifneq ($(SHARED_LIB_TYPE),NONE)
|
||||
TARGET_SHARED_LIBS := $(UTIL_SHLIB) $(UTIL_SHLIBLE)
|
||||
endif
|
||||
|
||||
# This 'common.mk' dependency makes sure the symlinks get built before
|
||||
# this make file is used for anything.
|
||||
|
||||
$(SRCDIR)/common.mk: srcdir blddir
|
||||
|
||||
.PHONY: all
|
||||
all: libxmlrpc_util.a $(TARGET_SHARED_LIBS) $(TARGET_SHARED_LE_LIBS)
|
||||
all: libxmlrpc_util.a $(TARGET_SHARED_LIBRARIES) $(TARGET_SHARED_LE_LIBS)
|
||||
|
||||
# Rule for this is in common.mk, courtesy of TARGET_LIBRARY_NAMES:
|
||||
$(UTIL_SHLIB): $(TARGET_MODS:%=%.osh)
|
||||
@@ -92,4 +90,4 @@ distclean: clean distclean-common
|
||||
.PHONY: dep
|
||||
dep: dep-common
|
||||
|
||||
include Makefile.depend
|
||||
include depend.mk
|
||||
|
@@ -1,10 +1,61 @@
|
||||
//#define _GNU_SOURCE
|
||||
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE /* But only when HAVE_ASPRINTF */
|
||||
#endif
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "xmlrpc_config.h" /* For HAVE_ASPRINTF, __inline__ */
|
||||
#include "xmlrpc-c/string_int.h"
|
||||
#include "bool.h"
|
||||
|
||||
|
||||
|
||||
static __inline__ void
|
||||
newVsnprintf(char * const buffer,
|
||||
size_t const bufferSize,
|
||||
const char * const fmt,
|
||||
va_list varargs,
|
||||
size_t * const formattedSizeP) {
|
||||
/*----------------------------------------------------------------------------
|
||||
This is vsnprintf() with the new behavior, where not fitting in the buffer
|
||||
is not a failure.
|
||||
|
||||
Unfortunately, we can't practically return the size of the formatted string
|
||||
if the C library has old vsnprintf() and the formatted string doesn't fit
|
||||
in the buffer, so in that case we just return something larger than the
|
||||
buffer.
|
||||
-----------------------------------------------------------------------------*/
|
||||
if (bufferSize > INT_MAX/2) {
|
||||
/* There's a danger we won't be able to coerce the return value
|
||||
of XMLRPC_VSNPRINTF to an integer (which we have to do because,
|
||||
while for POSIX its return value is ssize_t, on Windows it is int),
|
||||
or return double the buffer size.
|
||||
*/
|
||||
*formattedSizeP = 0;
|
||||
} else {
|
||||
int rc;
|
||||
|
||||
rc = XMLRPC_VSNPRINTF(buffer, bufferSize, fmt, varargs);
|
||||
|
||||
if (rc < 0) {
|
||||
/* We have old vsnprintf() (or Windows) and the formatted value
|
||||
doesn't fit in the buffer, but we don't know how big a buffer it
|
||||
needs.
|
||||
*/
|
||||
*formattedSizeP = bufferSize * 2;
|
||||
} else {
|
||||
/* Either the string fits in the buffer or we have new vsnprintf()
|
||||
which tells us how big the string is regardless.
|
||||
*/
|
||||
*formattedSizeP = rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -15,29 +66,24 @@ simpleVasprintf(char ** const retvalP,
|
||||
/*----------------------------------------------------------------------------
|
||||
This is a poor man's implementation of vasprintf(), of GNU fame.
|
||||
-----------------------------------------------------------------------------*/
|
||||
size_t const initialSize = 4096;
|
||||
char * result;
|
||||
size_t bufferSize;
|
||||
bool outOfMemory;
|
||||
|
||||
result = malloc(initialSize);
|
||||
if (result != NULL) {
|
||||
size_t bytesNeeded;
|
||||
bytesNeeded = XMLRPC_VSNPRINTF(result, initialSize, fmt, varargs);
|
||||
if (bytesNeeded > initialSize) {
|
||||
free(result);
|
||||
result = malloc(bytesNeeded);
|
||||
if (result != NULL)
|
||||
XMLRPC_VSNPRINTF(result, bytesNeeded, fmt, varargs);
|
||||
} else if (bytesNeeded == initialSize) {
|
||||
if (result[initialSize-1] != '\0') {
|
||||
/* This is one of those old systems where vsnprintf()
|
||||
returns the number of bytes it used, instead of the
|
||||
number that it needed, and it in fact needed more than
|
||||
we gave it. Rather than mess with this highly unlikely
|
||||
case (old system and string > 4095 characters), we just
|
||||
treat this like an out of memory failure.
|
||||
*/
|
||||
for (result = NULL, bufferSize = 4096, outOfMemory = false;
|
||||
!result && !outOfMemory;
|
||||
) {
|
||||
|
||||
result = malloc(bufferSize);
|
||||
if (!result)
|
||||
outOfMemory = true;
|
||||
else {
|
||||
size_t bytesNeeded;
|
||||
newVsnprintf(result, bufferSize, fmt, varargs, &bytesNeeded);
|
||||
if (bytesNeeded > bufferSize) {
|
||||
free(result);
|
||||
result = NULL;
|
||||
bufferSize = bytesNeeded;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -46,7 +92,28 @@ simpleVasprintf(char ** const retvalP,
|
||||
|
||||
|
||||
|
||||
const char * const xmlrpc_strsol = "[insufficient memory to build string]";
|
||||
static const char * const xmlrpc_strsol =
|
||||
"[insufficient memory to build string]";
|
||||
|
||||
|
||||
|
||||
bool
|
||||
xmlrpc_strnomem(const char * const string) {
|
||||
/*----------------------------------------------------------------------------
|
||||
The string 'string' was generated by a function in this file because it
|
||||
couldn't get enough memory to generate the string that it was supposed to
|
||||
generate. I.e. a preceding call to a string function failed.
|
||||
-----------------------------------------------------------------------------*/
|
||||
return string == xmlrpc_strsol;
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char *
|
||||
xmlrpc_strnomemval() {
|
||||
|
||||
return xmlrpc_strsol;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -71,7 +138,7 @@ xmlrpc_vasprintf(const char ** const retvalP,
|
||||
|
||||
|
||||
|
||||
void GNU_PRINTF_ATTR(2,3)
|
||||
void XMLRPC_PRINTF_ATTR(2,3)
|
||||
xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {
|
||||
|
||||
va_list varargs; /* mysterious structure used by variable arg facility */
|
||||
@@ -85,6 +152,27 @@ xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {
|
||||
|
||||
|
||||
|
||||
const char *
|
||||
xmlrpc_strdupsol(const char * const string) {
|
||||
|
||||
const char * retvalOrNull;
|
||||
|
||||
retvalOrNull = strdup(string);
|
||||
|
||||
return retvalOrNull ? retvalOrNull : xmlrpc_strsol;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_strfree(const char * const string) {
|
||||
|
||||
if (string != xmlrpc_strsol)
|
||||
free((void *)string);
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char *
|
||||
xmlrpc_strdupnull(const char * const string) {
|
||||
|
||||
@@ -96,15 +184,6 @@ xmlrpc_strdupnull(const char * const string) {
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_strfree(const char * const string) {
|
||||
|
||||
if (string != xmlrpc_strsol)
|
||||
free((void *)string);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_strfreenull(const char * const string) {
|
||||
|
||||
|
49
libs/xmlrpc-c/lib/libutil/base64.c
Normal file
49
libs/xmlrpc-c/lib/libutil/base64.c
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "int.h"
|
||||
#include "xmlrpc-c/base64_int.h"
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_base64Encode(const char * const chars,
|
||||
char * const base64) {
|
||||
|
||||
/* Conversion table. */
|
||||
static char tbl[64] = {
|
||||
'A','B','C','D','E','F','G','H',
|
||||
'I','J','K','L','M','N','O','P',
|
||||
'Q','R','S','T','U','V','W','X',
|
||||
'Y','Z','a','b','c','d','e','f',
|
||||
'g','h','i','j','k','l','m','n',
|
||||
'o','p','q','r','s','t','u','v',
|
||||
'w','x','y','z','0','1','2','3',
|
||||
'4','5','6','7','8','9','+','/'
|
||||
};
|
||||
|
||||
unsigned int i;
|
||||
uint32_t length;
|
||||
char * p;
|
||||
const char * s;
|
||||
|
||||
length = strlen(chars); /* initial value */
|
||||
s = &chars[0]; /* initial value */
|
||||
p = &base64[0]; /* initial value */
|
||||
/* Transform the 3x8 bits to 4x6 bits, as required by base64. */
|
||||
for (i = 0; i < length; i += 3) {
|
||||
*p++ = tbl[s[0] >> 2];
|
||||
*p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
|
||||
*p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
|
||||
*p++ = tbl[s[2] & 0x3f];
|
||||
s += 3;
|
||||
}
|
||||
|
||||
/* Pad the result if necessary... */
|
||||
if (i == length + 1)
|
||||
*(p - 1) = '=';
|
||||
else if (i == length + 2)
|
||||
*(p - 1) = *(p - 2) = '=';
|
||||
|
||||
/* ...and zero-terminate it. */
|
||||
*p = '\0';
|
||||
}
|
@@ -1,5 +1,7 @@
|
||||
/* Copyright information is at end of file */
|
||||
|
||||
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
|
||||
|
||||
#include "xmlrpc_config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
@@ -86,7 +88,7 @@ void
|
||||
xmlrpc_set_fault_formatted_v(xmlrpc_env * const envP,
|
||||
int const code,
|
||||
const char * const format,
|
||||
va_list const args) {
|
||||
va_list args) {
|
||||
|
||||
const char * faultDescription;
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
//#define _GNU_SOURCE
|
||||
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
@@ -6,6 +6,7 @@
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "mallocvar.h"
|
||||
#include "xmlrpc-c/util_int.h"
|
||||
#include "xmlrpc-c/util.h"
|
||||
|
||||
@@ -19,30 +20,30 @@
|
||||
|
||||
|
||||
xmlrpc_mem_block *
|
||||
xmlrpc_mem_block_new(xmlrpc_env * const env,
|
||||
xmlrpc_mem_block_new(xmlrpc_env * const envP,
|
||||
size_t const size) {
|
||||
|
||||
xmlrpc_mem_block* block;
|
||||
xmlrpc_mem_block * block;
|
||||
|
||||
XMLRPC_ASSERT_ENV_OK(env);
|
||||
XMLRPC_ASSERT_ENV_OK(envP);
|
||||
|
||||
block = (xmlrpc_mem_block*) malloc(sizeof(xmlrpc_mem_block));
|
||||
XMLRPC_FAIL_IF_NULL(block, env, XMLRPC_INTERNAL_ERROR,
|
||||
"Can't allocate memory block");
|
||||
MALLOCVAR(block);
|
||||
|
||||
if (block == NULL)
|
||||
xmlrpc_faultf(envP, "Can't allocate memory block");
|
||||
else {
|
||||
xmlrpc_mem_block_init(envP, block, size);
|
||||
|
||||
xmlrpc_mem_block_init(env, block, size);
|
||||
XMLRPC_FAIL_IF_FAULT(env);
|
||||
|
||||
cleanup:
|
||||
if (env->fault_occurred) {
|
||||
if (block)
|
||||
if (envP->fault_occurred) {
|
||||
free(block);
|
||||
return NULL;
|
||||
} else {
|
||||
return block;
|
||||
block = NULL;
|
||||
}
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Destroy an existing xmlrpc_mem_block, and everything it contains. */
|
||||
void
|
||||
xmlrpc_mem_block_free(xmlrpc_mem_block * const blockP) {
|
||||
@@ -74,7 +75,7 @@ xmlrpc_mem_block_init(xmlrpc_env * const envP,
|
||||
blockP->_block = (void*) malloc(blockP->_allocated);
|
||||
if (!blockP->_block)
|
||||
xmlrpc_faultf(envP, "Can't allocate %u-byte memory block",
|
||||
blockP->_allocated);
|
||||
(unsigned)blockP->_allocated);
|
||||
}
|
||||
|
||||
|
||||
@@ -170,19 +171,15 @@ xmlrpc_mem_block_append(xmlrpc_env * const envP,
|
||||
const void * const data,
|
||||
size_t const len) {
|
||||
|
||||
int size;
|
||||
size_t const originalSize = blockP->_size;
|
||||
|
||||
XMLRPC_ASSERT_ENV_OK(envP);
|
||||
XMLRPC_ASSERT(blockP != NULL);
|
||||
|
||||
size = blockP->_size;
|
||||
xmlrpc_mem_block_resize(envP, blockP, size + len);
|
||||
XMLRPC_FAIL_IF_FAULT(envP);
|
||||
|
||||
memcpy(((unsigned char*) blockP->_block) + size, data, len);
|
||||
|
||||
cleanup:
|
||||
return;
|
||||
xmlrpc_mem_block_resize(envP, blockP, originalSize + len);
|
||||
if (!envP->fault_occurred) {
|
||||
memcpy(((unsigned char*) blockP->_block) + originalSize, data, len);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#define _XOPEN_SOURCE 600 /* Get pselect() in <sys/select.h> */
|
||||
|
||||
#include "xmlrpc_config.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <winsock.h>
|
||||
#else
|
||||
@@ -8,12 +10,12 @@
|
||||
in this order appears to work on all.
|
||||
*/
|
||||
#include <sys/time.h>
|
||||
#if HAVE_SYS_SELECT_H
|
||||
#include <sys/select.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <signal.h>
|
||||
|
||||
#include "xmlrpc_config.h"
|
||||
|
||||
#include "xmlrpc-c/select_int.h"
|
||||
|
||||
|
||||
|
46
libs/xmlrpc-c/lib/libutil/string_number.c
Normal file
46
libs/xmlrpc-c/lib/libutil/string_number.c
Normal file
@@ -0,0 +1,46 @@
|
||||
/*============================================================================
|
||||
string_number
|
||||
==============================================================================
|
||||
This file contains utilities for dealing with text string representation
|
||||
of numbers.
|
||||
============================================================================*/
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <xmlrpc-c/base.h>
|
||||
#include <xmlrpc-c/util.h>
|
||||
#include <xmlrpc-c/string_int.h>
|
||||
#include "xmlrpc_config.h"
|
||||
#include "int.h"
|
||||
|
||||
#include <xmlrpc-c/string_number.h>
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_parse_int64(xmlrpc_env * const envP,
|
||||
const char * const str,
|
||||
xmlrpc_int64 * const i64P) {
|
||||
|
||||
xmlrpc_int64 i64val;
|
||||
|
||||
char * tail;
|
||||
|
||||
errno = 0;
|
||||
i64val = XMLRPC_STRTOLL(str, &tail, 10);
|
||||
|
||||
if (errno == ERANGE)
|
||||
xmlrpc_faultf(envP, "Number cannot be represented in 64 bits. "
|
||||
"Must be in the range "
|
||||
"[%" XMLRPC_PRId64 " - %" XMLRPC_PRId64 "]",
|
||||
XMLRPC_INT64_MIN, XMLRPC_INT64_MAX);
|
||||
else if (errno != 0)
|
||||
xmlrpc_faultf(envP, "unexpected error: "
|
||||
"strtoll() failed with errno %d (%s)",
|
||||
errno, strerror(errno));
|
||||
else if (tail[0] != '\0')
|
||||
xmlrpc_faultf(envP, "contains non-numerical junk: '%s'", tail);
|
||||
else
|
||||
*i64P = i64val;
|
||||
}
|
@@ -38,6 +38,7 @@
|
||||
** http://www.cl.cam.ac.uk/~mgk25/unicode.html
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "int.h"
|
||||
|
||||
#include "xmlrpc_config.h"
|
||||
@@ -51,31 +52,33 @@
|
||||
** UTF-8 data.
|
||||
*/
|
||||
|
||||
/* The number of bytes in a UTF-8 sequence starting with the character used
|
||||
** as the array index. A zero entry indicates an illegal initial byte.
|
||||
** This table was generated using a Perl script and information from the
|
||||
** UTF-8 standard.
|
||||
**
|
||||
** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But
|
||||
** since Python 2.0 has the icky CNRI license, I regenerated this
|
||||
** table from scratch and wrote my own decoder. */
|
||||
static unsigned char utf8_seq_length[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
|
||||
static unsigned char utf8SeqLength[256] = {
|
||||
|
||||
/* utf8SeqLength[B] is the number of bytes in a UTF-8 sequence that starts
|
||||
with byte B. Except zero indicates an illegal initial byte.
|
||||
|
||||
Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But since
|
||||
Python 2.0 has the icky CNRI license, I generated this table from scratch
|
||||
and wrote my own decoder.
|
||||
*/
|
||||
|
||||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||||
/* 0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 2 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* A */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* B */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* C */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
/* F */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
|
||||
};
|
||||
|
||||
/* The minimum legal character value for a UTF-8 sequence of the given
|
||||
@@ -118,14 +121,129 @@ static uint32_t const utf8_min_char_for_length[] = {
|
||||
#if HAVE_UNICODE_WCHAR
|
||||
|
||||
|
||||
static void
|
||||
decode_utf8(xmlrpc_env * const envP,
|
||||
const char * const utf8_data,
|
||||
size_t const utf8_len,
|
||||
wchar_t * const ioBuff,
|
||||
size_t * const outBuffLenP) {
|
||||
static void
|
||||
validateContinuation(xmlrpc_env * const envP,
|
||||
char const c) {
|
||||
|
||||
if (!IS_CONTINUATION(c))
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-8 multibyte sequence contains character 0x%02x, "
|
||||
"which does not indicate continuation.", c);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
validateUtf16(xmlrpc_env * const envP,
|
||||
wchar_t const wc) {
|
||||
|
||||
if (wc > UCS2_MAX_LEGAL_CHARACTER)
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UCS-2 characters > U+FFFD are illegal. String contains 0x%04x",
|
||||
(unsigned)wc);
|
||||
else if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-16 surrogates may not appear in UTF-8 data. "
|
||||
"String contains %04x", (unsigned)wc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Microsoft Visual C in debug mode produces code that complains about
|
||||
returning an undefined value from xmlrpc_datetime_new_str(). It's a bogus
|
||||
complaint, because this function is defined to return nothing meaningful
|
||||
those cases. So we disable the check.
|
||||
*/
|
||||
#pragma runtime_checks("u", off)
|
||||
|
||||
static void
|
||||
decodeMultibyte(xmlrpc_env * const envP,
|
||||
const char * const utf8_seq,
|
||||
size_t const length,
|
||||
wchar_t * const wcP) {
|
||||
/*----------------------------------------------------------------------------
|
||||
Decode to UCS-2 (or validates as UTF-8 that can be decoded to UCS-2)
|
||||
Decode the multibyte UTF-8 sequence which is 'length' characters
|
||||
at 'utf8_data'.
|
||||
|
||||
Return the character in UTF-16 format as *wcP.
|
||||
-----------------------------------------------------------------------------*/
|
||||
wchar_t wc;
|
||||
|
||||
assert(utf8_seq[0] & 0x80); /* High bit set: this is multibyte seq */
|
||||
|
||||
switch (length) {
|
||||
case 2:
|
||||
/* 110xxxxx 10xxxxxx */
|
||||
validateContinuation(envP, utf8_seq[1]);
|
||||
|
||||
if (!envP->fault_occurred)
|
||||
wc = ((((wchar_t) (utf8_seq[0] & 0x1F)) << 6) |
|
||||
(((wchar_t) (utf8_seq[1] & 0x3F))));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||
validateContinuation(envP, utf8_seq[1]);
|
||||
if (!envP->fault_occurred) {
|
||||
validateContinuation(envP, utf8_seq[2]);
|
||||
if (!envP->fault_occurred)
|
||||
wc = ((((wchar_t) (utf8_seq[0] & 0x0F)) << 12) |
|
||||
(((wchar_t) (utf8_seq[1] & 0x3F)) << 6) |
|
||||
(((wchar_t) (utf8_seq[2] & 0x3F))));
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
case 5:
|
||||
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
case 6:
|
||||
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
/* This would require more than 16 bits in UTF-16, so
|
||||
it can't be represented in UCS-2, so it's beyond
|
||||
our capability. Characters in the BMP fit in 16
|
||||
bits.
|
||||
*/
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-8 string contains a character not in the "
|
||||
"Basic Multilingual Plane (first byte 0x%02x)",
|
||||
utf8_seq[0]);
|
||||
break;
|
||||
|
||||
default:
|
||||
xmlrpc_faultf(envP,
|
||||
"Internal error: Impossible UTF-8 sequence length %u",
|
||||
(unsigned)length);
|
||||
}
|
||||
|
||||
if (!envP->fault_occurred)
|
||||
validateUtf16(envP, wc);
|
||||
|
||||
if (!envP->fault_occurred)
|
||||
if ((uint32_t)wc < utf8_min_char_for_length[length])
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"Overlong UTF-8 sequence not allowed");
|
||||
|
||||
*wcP = wc;
|
||||
}
|
||||
|
||||
#pragma runtime_checks("u", restore)
|
||||
|
||||
|
||||
|
||||
static void
|
||||
decodeUtf8(xmlrpc_env * const envP,
|
||||
const char * const utf8_data,
|
||||
size_t const utf8_len,
|
||||
wchar_t * const ioBuff,
|
||||
size_t * const outBuffLenP) {
|
||||
/*----------------------------------------------------------------------------
|
||||
Decode to UCS-2 (or validate as UTF-8 that can be decoded to UCS-2)
|
||||
a UTF-8 string. To validate, set ioBuff and outBuffLenP to NULL.
|
||||
To decode, allocate a sufficiently large buffer, pass it as ioBuff,
|
||||
and pass a pointer as as outBuffLenP. The data will be written to
|
||||
@@ -134,132 +252,60 @@ decode_utf8(xmlrpc_env * const envP,
|
||||
We assume that wchar_t holds a single UCS-2 character in native-endian
|
||||
byte ordering.
|
||||
-----------------------------------------------------------------------------*/
|
||||
size_t i, length, out_pos;
|
||||
char init, con1, con2;
|
||||
wchar_t wc;
|
||||
size_t utf8Cursor;
|
||||
size_t outPos;
|
||||
|
||||
XMLRPC_ASSERT_ENV_OK(envP);
|
||||
XMLRPC_ASSERT_PTR_OK(utf8_data);
|
||||
XMLRPC_ASSERT((!ioBuff && !outBuffLenP) ||
|
||||
(ioBuff && outBuffLenP));
|
||||
XMLRPC_ASSERT((!ioBuff && !outBuffLenP) || (ioBuff && outBuffLenP));
|
||||
|
||||
/* Suppress GCC warning about possibly undefined variable. */
|
||||
wc = 0;
|
||||
for (utf8Cursor = 0, outPos = 0;
|
||||
utf8Cursor < utf8_len && !envP->fault_occurred;
|
||||
) {
|
||||
|
||||
char const init = utf8_data[utf8Cursor];
|
||||
/* Initial byte of the UTF-8 sequence */
|
||||
|
||||
wchar_t wc;
|
||||
|
||||
i = 0;
|
||||
out_pos = 0;
|
||||
while (i < utf8_len) {
|
||||
init = utf8_data[i];
|
||||
if ((init & 0x80) == 0x00) {
|
||||
/* Convert ASCII character to wide character. */
|
||||
wc = init;
|
||||
i++;
|
||||
++utf8Cursor;
|
||||
} else {
|
||||
/* Look up the length of this UTF-8 sequence. */
|
||||
length = utf8_seq_length[(unsigned char) init];
|
||||
|
||||
/* Check to make sure we have enough bytes to convert. */
|
||||
if (i + length > utf8_len)
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"Truncated UTF-8 sequence");
|
||||
|
||||
/* Decode a multibyte UTF-8 sequence. */
|
||||
switch (length) {
|
||||
case 0:
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"Invalid UTF-8 initial byte");
|
||||
|
||||
case 2:
|
||||
/* 110xxxxx 10xxxxxx */
|
||||
con1 = utf8_data[i+1];
|
||||
if (!IS_CONTINUATION(con1))
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-8 sequence too short");
|
||||
wc = ((((wchar_t) (init & 0x1F)) << 6) |
|
||||
(((wchar_t) (con1 & 0x3F))));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||
con1 = utf8_data[i+1];
|
||||
con2 = utf8_data[i+2];
|
||||
if (!IS_CONTINUATION(con1) || !IS_CONTINUATION(con2))
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-8 sequence too short");
|
||||
wc = ((((wchar_t) (init & 0x0F)) << 12) |
|
||||
(((wchar_t) (con1 & 0x3F)) << 6) |
|
||||
(((wchar_t) (con2 & 0x3F))));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
case 5:
|
||||
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
case 6:
|
||||
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
/* This would require more than 16 bits in UTF-16, so
|
||||
it can't be represented in UCS-2, so it's beyond
|
||||
our capability. Characters in the BMP fit in 16
|
||||
bits.
|
||||
*/
|
||||
size_t const length = utf8SeqLength[(unsigned char) init];
|
||||
|
||||
if (length == 0)
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-8 string contains a character not in the "
|
||||
"Basic Multilingual Plane (first byte %08x)",
|
||||
init);
|
||||
goto cleanup;
|
||||
|
||||
default:
|
||||
XMLRPC_ASSERT("Error in UTF-8 decoder tables");
|
||||
"Unrecognized UTF-8 initial byte value 0x%02x", init);
|
||||
else {
|
||||
/* Make sure we have enough bytes to convert. */
|
||||
if (utf8Cursor + length > utf8_len) {
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"Invalid UTF-8 sequence indicates a %u-byte sequence "
|
||||
"when only %u bytes are left in the string",
|
||||
(unsigned)length, (unsigned)(utf8_len - utf8Cursor));
|
||||
} else {
|
||||
decodeMultibyte(envP, &utf8_data[utf8Cursor], length, &wc);
|
||||
|
||||
/* Advance to the end of the sequence. */
|
||||
utf8Cursor += length;
|
||||
}
|
||||
}
|
||||
|
||||
/* Advance to the end of the sequence. */
|
||||
i += length;
|
||||
|
||||
/* Check for illegal UCS-2 characters. */
|
||||
if (wc > UCS2_MAX_LEGAL_CHARACTER)
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UCS-2 characters > U+FFFD are illegal");
|
||||
|
||||
/* Check for UTF-16 surrogates. */
|
||||
if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"UTF-16 surrogates may not appear in UTF-8 data");
|
||||
|
||||
/* Check for overlong sequences. */
|
||||
if ((uint32_t)wc < utf8_min_char_for_length[length])
|
||||
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"Overlong UTF-8 sequence not allowed");
|
||||
}
|
||||
|
||||
/* If we have a buffer, write our character to it. */
|
||||
if (ioBuff) {
|
||||
ioBuff[out_pos++] = wc;
|
||||
|
||||
if (!envP->fault_occurred) {
|
||||
/* If we have a buffer, write our character to it. */
|
||||
if (ioBuff)
|
||||
ioBuff[outPos++] = wc;
|
||||
}
|
||||
}
|
||||
|
||||
/* Record the number of characters we found. */
|
||||
|
||||
if (outBuffLenP)
|
||||
*outBuffLenP = out_pos;
|
||||
|
||||
cleanup:
|
||||
if (envP->fault_occurred) {
|
||||
if (outBuffLenP)
|
||||
*outBuffLenP = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_validate_utf8(xmlrpc_env * const env,
|
||||
const char * const utf8_data,
|
||||
size_t const utf8_len) {
|
||||
/*----------------------------------------------------------------------------
|
||||
Validate that a string is valid UTF-8.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
|
||||
*outBuffLenP = envP->fault_occurred ? 0 : outPos;
|
||||
}
|
||||
|
||||
|
||||
@@ -286,9 +332,9 @@ xmlrpc_utf8_to_wcs(xmlrpc_env * const envP,
|
||||
wcsP = XMLRPC_MEMBLOCK_NEW(wchar_t, envP, utf8_len);
|
||||
if (!envP->fault_occurred) {
|
||||
/* Decode the UTF-8 data. */
|
||||
decode_utf8(envP, utf8_data, utf8_len,
|
||||
XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
|
||||
&wcs_length);
|
||||
decodeUtf8(envP, utf8_data, utf8_len,
|
||||
XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
|
||||
&wcs_length);
|
||||
if (!envP->fault_occurred) {
|
||||
/* We can't have overrun our buffer. */
|
||||
XMLRPC_ASSERT(wcs_length <= utf8_len);
|
||||
@@ -329,7 +375,8 @@ xmlrpc_wcs_to_utf8(xmlrpc_env * const envP,
|
||||
|
||||
utf8P = XMLRPC_MEMBLOCK_NEW(char, envP, estimate);
|
||||
if (!envP->fault_occurred) {
|
||||
unsigned char * const buffer = XMLRPC_MEMBLOCK_CONTENTS(char, utf8P);
|
||||
unsigned char * const buffer =
|
||||
XMLRPC_MEMBLOCK_CONTENTS(unsigned char, utf8P);
|
||||
size_t bytesUsed;
|
||||
size_t i;
|
||||
|
||||
@@ -401,13 +448,12 @@ xmlrpc_force_to_utf8(char * const buffer) {
|
||||
char * p;
|
||||
|
||||
for (p = &buffer[0]; *p;) {
|
||||
uint const length = utf8_seq_length[(unsigned char) *p];
|
||||
unsigned int const length = utf8SeqLength[(unsigned char) *p];
|
||||
|
||||
bool forceDel;
|
||||
uint32_t decoded;
|
||||
|
||||
forceDel = false;
|
||||
decoded = 0; /* suppress compiler warning; valid when !forceDel */
|
||||
forceDel = false; /* initial value */
|
||||
|
||||
switch (length) {
|
||||
case 1:
|
||||
@@ -482,7 +528,7 @@ xmlrpc_force_to_xml_chars(char * const buffer) {
|
||||
char * p;
|
||||
|
||||
for (p = &buffer[0]; *p;) {
|
||||
uint const length = utf8_seq_length[(unsigned char) *p];
|
||||
unsigned int const length = utf8SeqLength[(unsigned char) *p];
|
||||
|
||||
if (length == 1) {
|
||||
if (*p < 0x20 && *p != '\r' && *p != '\n' && *p != '\t')
|
||||
@@ -505,7 +551,31 @@ xmlrpc_force_to_xml_chars(char * const buffer) {
|
||||
|
||||
|
||||
|
||||
void
|
||||
xmlrpc_validate_utf8(xmlrpc_env * const envP,
|
||||
const char * const utf8_data,
|
||||
size_t const utf8_len) {
|
||||
/*----------------------------------------------------------------------------
|
||||
Validate that a string is valid UTF-8.
|
||||
-----------------------------------------------------------------------------*/
|
||||
xmlrpc_env env;
|
||||
|
||||
xmlrpc_env_init(&env);
|
||||
|
||||
#if HAVE_UNICODE_WCHAR
|
||||
decodeUtf8(&env, utf8_data, utf8_len, NULL, NULL);
|
||||
#else
|
||||
/* We don't have a convenient way to validate, so we just fake it and
|
||||
call it valid.
|
||||
*/
|
||||
#endif
|
||||
|
||||
|
||||
if (env.fault_occurred) {
|
||||
xmlrpc_env_set_fault_formatted(
|
||||
envP, XMLRPC_INVALID_UTF8_ERROR,
|
||||
"%" XMLRPC_PRId64 "-byte "
|
||||
"supposed UTF-8 string is not valid UTF-8. %s",
|
||||
(XMLRPC_INT64)utf8_len, env.fault_string);
|
||||
}
|
||||
xmlrpc_env_clean(&env);
|
||||
}
|
||||
|
Reference in New Issue
Block a user