mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-08-13 01:26:58 +00:00
update to pcre 7.9
git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@13706 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
@@ -29,6 +29,10 @@
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
@@ -37,13 +41,11 @@
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "config.h"
|
||||
// We need this to compile the proper dll on windows/msys. This is copied
|
||||
// from pcre_internal.h. It would probably be better just to include that.
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
|
||||
#include "pcrecpp_internal.h"
|
||||
#include "pcre.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
#include "pcrecpp.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
|
||||
|
||||
namespace pcrecpp {
|
||||
@@ -53,7 +55,23 @@ static const int kMaxArgs = 16;
|
||||
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
|
||||
|
||||
// Special object that stands-in for no argument
|
||||
Arg no_arg((void*)NULL);
|
||||
Arg RE::no_arg((void*)NULL);
|
||||
|
||||
// This is for ABI compatibility with old versions of pcre (pre-7.6),
|
||||
// which defined a global no_arg variable instead of putting it in the
|
||||
// RE class. This works on GCC >= 3, at least. It definitely works
|
||||
// for ELF, but may not for other object formats (Mach-O, for
|
||||
// instance, does not support aliases.) We could probably have a more
|
||||
// inclusive test if we ever needed it. (Note that not only the
|
||||
// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
|
||||
// gnu-specific.)
|
||||
#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__)
|
||||
# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x)
|
||||
# define ULP_AS_STRING_INTERNAL(x) #x
|
||||
# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__)
|
||||
extern Arg no_arg
|
||||
__attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE")));
|
||||
#endif
|
||||
|
||||
// If a regular expression has no error, its error_ field points here
|
||||
static const string empty_string;
|
||||
@@ -61,7 +79,7 @@ static const string empty_string;
|
||||
// If the user doesn't ask for any options, we just use this one
|
||||
static RE_Options default_options;
|
||||
|
||||
void RE::Init(const char* pat, const RE_Options* options) {
|
||||
void RE::Init(const string& pat, const RE_Options* options) {
|
||||
pattern_ = pat;
|
||||
if (options == NULL) {
|
||||
options_ = default_options;
|
||||
@@ -74,27 +92,22 @@ void RE::Init(const char* pat, const RE_Options* options) {
|
||||
|
||||
re_partial_ = Compile(UNANCHORED);
|
||||
if (re_partial_ != NULL) {
|
||||
// Check for complicated patterns. The following change is
|
||||
// conservative in that it may treat some "simple" patterns
|
||||
// as "complex" (e.g., if the vertical bar is in a character
|
||||
// class or is escaped). But it seems good enough.
|
||||
if (strchr(pat, '|') == NULL) {
|
||||
// Simple pattern: we can use position-based checks to perform
|
||||
// fully anchored matches
|
||||
re_full_ = re_partial_;
|
||||
} else {
|
||||
// We need a special pattern for anchored matches
|
||||
re_full_ = Compile(ANCHOR_BOTH);
|
||||
}
|
||||
re_full_ = Compile(ANCHOR_BOTH);
|
||||
}
|
||||
}
|
||||
|
||||
RE::~RE() {
|
||||
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
|
||||
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
|
||||
if (error_ != &empty_string) delete error_;
|
||||
void RE::Cleanup() {
|
||||
if (re_full_ != NULL) (*pcre_free)(re_full_);
|
||||
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
|
||||
if (error_ != &empty_string) delete error_;
|
||||
}
|
||||
|
||||
|
||||
RE::~RE() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
|
||||
pcre* RE::Compile(Anchor anchor) {
|
||||
// First, convert RE_Options into pcre options
|
||||
int pcre_options = 0;
|
||||
@@ -334,13 +347,17 @@ bool RE::Replace(const StringPiece& rewrite,
|
||||
|
||||
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
|
||||
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
|
||||
// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
|
||||
|
||||
static int NewlineMode(int pcre_options) {
|
||||
// TODO: if we can make it threadsafe, cache this var
|
||||
int newline_mode = 0;
|
||||
/* if (newline_mode) return newline_mode; */ // do this once it's cached
|
||||
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
|
||||
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
|
||||
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) {
|
||||
newline_mode = (pcre_options &
|
||||
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
|
||||
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
|
||||
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF));
|
||||
} else {
|
||||
int newline;
|
||||
pcre_config(PCRE_CONFIG_NEWLINE, &newline);
|
||||
@@ -350,8 +367,12 @@ static int NewlineMode(int pcre_options) {
|
||||
newline_mode = PCRE_NEWLINE_CR;
|
||||
else if (newline == 3338)
|
||||
newline_mode = PCRE_NEWLINE_CRLF;
|
||||
else if (newline == -1)
|
||||
newline_mode = PCRE_NEWLINE_ANY;
|
||||
else if (newline == -2)
|
||||
newline_mode = PCRE_NEWLINE_ANYCRLF;
|
||||
else
|
||||
assert("" == "Unexpected return value from pcre_config(NEWLINE)");
|
||||
assert(NULL == "Unexpected return value from pcre_config(NEWLINE)");
|
||||
}
|
||||
return newline_mode;
|
||||
}
|
||||
@@ -364,7 +385,7 @@ int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
int start = 0;
|
||||
int lastend = -1;
|
||||
|
||||
for (; start <= static_cast<int>(str->length()); count++) {
|
||||
while (start <= static_cast<int>(str->length())) {
|
||||
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
|
||||
if (matches <= 0)
|
||||
break;
|
||||
@@ -379,9 +400,13 @@ int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
// Note it's better to call pcre_fullinfo() than to examine
|
||||
// all_options(), since options_ could have changed bewteen
|
||||
// compile-time and now, but this is simpler and safe enough.
|
||||
// Modified by PH to add ANY and ANYCRLF.
|
||||
if (start+1 < static_cast<int>(str->length()) &&
|
||||
(*str)[start] == '\r' && (*str)[start+1] == '\n' &&
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
|
||||
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
|
||||
) {
|
||||
matchend++;
|
||||
}
|
||||
// We also need to advance more than one char if we're in utf8 mode.
|
||||
@@ -424,6 +449,40 @@ bool RE::Extract(const StringPiece& rewrite,
|
||||
return Rewrite(out, rewrite, text, vec, matches);
|
||||
}
|
||||
|
||||
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
|
||||
string result;
|
||||
|
||||
// Escape any ascii character not in [A-Za-z_0-9].
|
||||
//
|
||||
// Note that it's legal to escape a character even if it has no
|
||||
// special meaning in a regular expression -- so this function does
|
||||
// that. (This also makes it identical to the perl function of the
|
||||
// same name; see `perldoc -f quotemeta`.) The one exception is
|
||||
// escaping NUL: rather than doing backslash + NUL, like perl does,
|
||||
// we do '\0', because pcre itself doesn't take embedded NUL chars.
|
||||
for (int ii = 0; ii < unquoted.size(); ++ii) {
|
||||
// Note that using 'isalnum' here raises the benchmark time from
|
||||
// 32ns to 58ns:
|
||||
if (unquoted[ii] == '\0') {
|
||||
result += "\\0";
|
||||
} else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
|
||||
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
|
||||
(unquoted[ii] < '0' || unquoted[ii] > '9') &&
|
||||
unquoted[ii] != '_' &&
|
||||
// If this is the part of a UTF8 or Latin1 character, we need
|
||||
// to copy this byte without escaping. Experimentally this is
|
||||
// what works correctly with the regexp library.
|
||||
!(unquoted[ii] & 128)) {
|
||||
result += '\\';
|
||||
result += unquoted[ii];
|
||||
} else {
|
||||
result += unquoted[ii];
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/***** Actual matching and rewriting code *****/
|
||||
|
||||
int RE::TryMatch(const StringPiece& text,
|
||||
@@ -437,7 +496,7 @@ int RE::TryMatch(const StringPiece& text,
|
||||
return 0;
|
||||
}
|
||||
|
||||
pcre_extra extra = { 0 };
|
||||
pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
|
||||
if (options_.match_limit() > 0) {
|
||||
extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
|
||||
extra.match_limit = options_.match_limit();
|
||||
@@ -470,13 +529,6 @@ int RE::TryMatch(const StringPiece& text,
|
||||
rc = vecsize / 2;
|
||||
}
|
||||
|
||||
if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {
|
||||
// We need an extra check to make sure that the match extended
|
||||
// to the end of the input string
|
||||
assert(vec[0] == 0); // PCRE_ANCHORED forces starting match
|
||||
if (vec[1] != text.size()) return 0; // Did not get ending match
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -553,14 +605,14 @@ bool RE::Rewrite(string *out, const StringPiece &rewrite,
|
||||
if (start >= 0)
|
||||
out->append(text.data() + start, vec[2 * n + 1] - start);
|
||||
} else if (c == '\\') {
|
||||
out->push_back('\\');
|
||||
*out += '\\';
|
||||
} else {
|
||||
//fprintf(stderr, "invalid rewrite pattern: %.*s\n",
|
||||
// rewrite.size(), rewrite.data());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
out->push_back(c);
|
||||
*out += c;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -588,23 +640,27 @@ bool Arg::parse_null(const char* str, int n, void* dest) {
|
||||
}
|
||||
|
||||
bool Arg::parse_string(const char* str, int n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
reinterpret_cast<string*>(dest)->assign(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
reinterpret_cast<StringPiece*>(dest)->set(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_char(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_uchar(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
@@ -653,6 +709,7 @@ bool Arg::parse_long_radix(const char* str,
|
||||
long r = strtol(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
@@ -670,6 +727,7 @@ bool Arg::parse_ulong_radix(const char* str,
|
||||
unsigned long r = strtoul(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
@@ -681,7 +739,8 @@ bool Arg::parse_short_radix(const char* str,
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<short*>(dest)) = r;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -692,7 +751,8 @@ bool Arg::parse_ushort_radix(const char* str,
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > USHRT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<unsigned short*>(dest)) = r;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -703,6 +763,7 @@ bool Arg::parse_int_radix(const char* str,
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < INT_MIN || r > INT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
@@ -714,6 +775,7 @@ bool Arg::parse_uint_radix(const char* str,
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > UINT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
@@ -734,11 +796,14 @@ bool Arg::parse_longlong_radix(const char* str,
|
||||
long long r = strtoq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
long long r = strtoll(str, &end, radix);
|
||||
#elif defined HAVE__STRTOI64
|
||||
long long r = _strtoi64(str, &end, radix);
|
||||
#else
|
||||
#error parse_longlong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_LONG_LONG */
|
||||
@@ -761,11 +826,14 @@ bool Arg::parse_ulonglong_radix(const char* str,
|
||||
unsigned long long r = strtouq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
unsigned long long r = strtoull(str, &end, radix);
|
||||
#elif defined HAVE__STRTOI64
|
||||
unsigned long long r = _strtoui64(str, &end, radix);
|
||||
#else
|
||||
#error parse_ulonglong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_UNSIGNED_LONG_LONG */
|
||||
@@ -783,6 +851,7 @@ bool Arg::parse_double(const char* str, int n, void* dest) {
|
||||
double r = strtod(buf, &end);
|
||||
if (end != buf + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<double*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
@@ -790,6 +859,7 @@ bool Arg::parse_double(const char* str, int n, void* dest) {
|
||||
bool Arg::parse_float(const char* str, int n, void* dest) {
|
||||
double r;
|
||||
if (!parse_double(str, n, &r)) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
|
||||
return true;
|
||||
}
|
||||
@@ -809,14 +879,14 @@ bool Arg::parse_float(const char* str, int n, void* dest) {
|
||||
return parse_##name##_radix(str, n, dest, 0); \
|
||||
}
|
||||
|
||||
DEFINE_INTEGER_PARSERS(short);
|
||||
DEFINE_INTEGER_PARSERS(ushort);
|
||||
DEFINE_INTEGER_PARSERS(int);
|
||||
DEFINE_INTEGER_PARSERS(uint);
|
||||
DEFINE_INTEGER_PARSERS(long);
|
||||
DEFINE_INTEGER_PARSERS(ulong);
|
||||
DEFINE_INTEGER_PARSERS(longlong);
|
||||
DEFINE_INTEGER_PARSERS(ulonglong);
|
||||
DEFINE_INTEGER_PARSERS(short) /* */
|
||||
DEFINE_INTEGER_PARSERS(ushort) /* */
|
||||
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
|
||||
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
|
||||
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
|
||||
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
|
||||
DEFINE_INTEGER_PARSERS(longlong) /* */
|
||||
DEFINE_INTEGER_PARSERS(ulonglong) /* */
|
||||
|
||||
#undef DEFINE_INTEGER_PARSERS
|
||||
|
||||
|
Reference in New Issue
Block a user