Merge in change to require system libpcre
libpcre is now a hard system dependency for building and running FreeSWITCH. FS-353
This commit is contained in:
commit
aec04d474c
12
Makefile.am
12
Makefile.am
|
@ -84,7 +84,6 @@ CORE_CFLAGS = $(AM_LIBAPR_CFLAGS) $(AM_LIBAPR_CPPFLAGS)
|
|||
CORE_CFLAGS += $(AM_LIBAPU_CPPFLAGS)
|
||||
CORE_CFLAGS += -I$(switch_srcdir)/libs/libtpl-1.5/src
|
||||
CORE_CFLAGS += -I$(switch_builddir)/libs/sqlite
|
||||
CORE_CFLAGS += -I$(switch_srcdir)/libs/pcre
|
||||
CORE_CFLAGS += -I$(switch_srcdir)/libs/speex/include -Ilibs/speex/include
|
||||
CORE_CFLAGS += -I$(switch_srcdir)/libs/srtp/include
|
||||
CORE_CFLAGS += -I$(switch_srcdir)/libs/srtp/crypto/include -Ilibs/srtp/crypto/include
|
||||
|
@ -93,7 +92,7 @@ CORE_CFLAGS += -I$(switch_builddir)/libs/tiff-4.0.2/libtiff -I$(switch_srcdir)/l
|
|||
|
||||
APR_LIBS = $(AM_LIBAPU_LIBS) $(AM_LIBAPR_LIBS)
|
||||
CORE_LIBS = libs/apr-util/libaprutil-1.la libs/apr/libapr-1.la
|
||||
CORE_LIBS += libs/sqlite/libsqlite3.la libs/pcre/libpcre.la libs/speex/libspeex/libspeexdsp.la libs/speex/libspeex/libspeex.la
|
||||
CORE_LIBS += libs/sqlite/libsqlite3.la libs/speex/libspeex/libspeexdsp.la libs/speex/libspeex/libspeex.la
|
||||
|
||||
if ENABLE_SRTP
|
||||
CORE_CFLAGS += -DENABLE_SRTP
|
||||
|
@ -125,9 +124,9 @@ libfreeswitch_spandsp_la_SOURCES = libs/spandsp/src/plc.c libs/spandsp/src/alloc
|
|||
libfreeswitch_spandsp_la_CFLAGS = -Ilibs/spandsp/src $(CORE_CFLAGS) $(AM_CFLAGS)
|
||||
CORE_LIBS+=libfreeswitch_spandsp.la
|
||||
lib_LTLIBRARIES = libfreeswitch.la
|
||||
libfreeswitch_la_CFLAGS = $(CORE_CFLAGS) $(AM_CFLAGS)
|
||||
libfreeswitch_la_CFLAGS = $(CORE_CFLAGS) $(PCRE_CFLAGS) $(AM_CFLAGS)
|
||||
libfreeswitch_la_LDFLAGS = -version-info 1:0:0 $(AM_LDFLAGS) $(PLATFORM_CORE_LDFLAGS) -shared -no-undefined
|
||||
libfreeswitch_la_LIBADD = $(CORE_LIBS) $(APR_LIBS) $(PLATFORM_CORE_LIBS)
|
||||
libfreeswitch_la_LIBADD = $(CORE_LIBS) $(APR_LIBS) $(PCRE_LIBS) $(PLATFORM_CORE_LIBS)
|
||||
libfreeswitch_la_DEPENDENCIES = $(BUILT_SOURCES)
|
||||
|
||||
if CURL_BUILTIN
|
||||
|
@ -482,11 +481,6 @@ libs/sqlite/Makefile: libs/sqlite/configure.ac
|
|||
cd libs/sqlite && ./config.status
|
||||
@$(TOUCH_TARGET)
|
||||
|
||||
libs/pcre/libpcre.la: libs/pcre libs/pcre/.update
|
||||
touch $(switch_srcdir)/src/include/switch.h
|
||||
@cd libs/pcre && $(MAKE)
|
||||
@$(TOUCH_TARGET)
|
||||
|
||||
SRTP_SRC = libs/srtp/srtp/srtp.c libs/srtp/srtp/ekt.c libs/srtp/crypto/cipher/cipher.c libs/srtp/crypto/cipher/null_cipher.c \
|
||||
libs/srtp/crypto/cipher/aes.c libs/srtp/crypto/cipher/aes_icm.c \
|
||||
libs/srtp/crypto/cipher/aes_cbc.c \
|
||||
|
|
|
@ -1102,6 +1102,7 @@ AM_CONDITIONAL([CURL_BUILTIN],[test "${ac_cv_use_system_curl}" != "yes"])
|
|||
|
||||
AC_SUBST(LIBCURL_DEPS)
|
||||
|
||||
PKG_CHECK_MODULES([PCRE], [libpcre >= 7.9])
|
||||
|
||||
AC_ARG_ENABLE(core-libedit-support,
|
||||
[AS_HELP_STRING([--disable-core-libedit-support], [Compile without libedit Support])], [enable_core_libedit_support="$enableval"], [enable_core_libedit_support="yes"])
|
||||
|
@ -1492,7 +1493,6 @@ if test "$enable_core_libedit_support" = "yes" ; then
|
|||
AC_CONFIG_SUBDIRS([libs/libedit])
|
||||
fi
|
||||
|
||||
AC_CONFIG_SUBDIRS([libs/pcre])
|
||||
AC_CONFIG_SUBDIRS([libs/apr])
|
||||
AC_CONFIG_SUBDIRS([libs/apr-util])
|
||||
AC_CONFIG_SUBDIRS([libs/ilbc])
|
||||
|
|
|
@ -283,6 +283,7 @@ Build-Depends:
|
|||
# core build
|
||||
dpkg-dev (>= 1.15.8.12), gcc (>= 4:4.4.5), g++ (>= 4:4.4.5),
|
||||
libc6-dev (>= 2.11.3), make (>= 3.81),
|
||||
libpcre3-dev,
|
||||
wget, pkg-config,
|
||||
# core codecs
|
||||
libogg-dev,
|
||||
|
|
|
@ -1821,14 +1821,6 @@ Copyright: 2008-2010, Eric des Courtis <eric.des.courtis@benbria.com>
|
|||
Benbria.
|
||||
License: MPL-1.1
|
||||
|
||||
Files: libs/pcre/*
|
||||
Copyright: 1997-2009 University of Cambridge
|
||||
2003 and onwards Google Inc.
|
||||
2005-2006, Google Inc
|
||||
2001 Alexander Tokarev <dwalin@dwalin.ru>
|
||||
2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com>
|
||||
License: BSD-3-clause
|
||||
|
||||
Files: libs/silk/*
|
||||
Copyright: 2006-2011, Skype Limited.
|
||||
License: BSD-2-clause
|
||||
|
|
|
@ -1243,7 +1243,7 @@ SEARCH_INCLUDES = YES
|
|||
INCLUDE_PATH =../libs/apr ../libs/apr-util ../libs/curl \
|
||||
../libs/iksemel ../libs/ilbc ../libs/js \
|
||||
../libs/libedit ../libs/libg722_1 ../libs/libnatpmp \
|
||||
../libs/libsndfile ../libs/miniupnpc ../libs/pcre \
|
||||
../libs/libsndfile ../libs/miniupnpc \
|
||||
../libs/portaudio ../libs/sofia-sip ../libs/spandsp \
|
||||
../libs/speex ../libs/sqlite ../libs/srtp \
|
||||
../libs/tiff-4.0.2 ../libs/udns \
|
||||
|
|
|
@ -408,20 +408,6 @@ opal
|
|||
/openzap/INSTALL
|
||||
/openzap/Makefile.in
|
||||
/opus-*/
|
||||
/pcre/config.h
|
||||
/pcre/Makefile
|
||||
/pcre/Makefile.in
|
||||
/pcre/pcre_chartables.c
|
||||
/pcre/pcre-config
|
||||
/pcre/pcrecpparg.h
|
||||
/pcre/pcrecpp_unittest
|
||||
/pcre/pcregrep
|
||||
/pcre/pcre_scanner_unittest
|
||||
/pcre/pcre_stringpiece.h
|
||||
/pcre/pcre_stringpiece_unittest
|
||||
/pcre/pcretest
|
||||
/pcre/stamp-h1
|
||||
/pcre-*/
|
||||
/pocketsphinx-*/
|
||||
/portaudio/bin/
|
||||
/portaudio/bin-stamp
|
||||
|
@ -1012,7 +998,6 @@ opal
|
|||
!/libdingaling/missing
|
||||
!/libg722_1/config/depcomp
|
||||
!/libg722_1/config/missing
|
||||
!/pcre/depcomp
|
||||
!/portaudio/bindings/cpp/build/gnu/aclocal.m4
|
||||
!/portaudio/bindings/cpp/build/gnu/config.guess
|
||||
!/portaudio/bindings/cpp/build/gnu/config.sub
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
Mon Jun 8 19:51:53 EDT 2009
|
|
@ -1,296 +0,0 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to turn PCRE man pages into HTML
|
||||
|
||||
|
||||
# Subroutine to handle font changes and other escapes
|
||||
|
||||
sub do_line {
|
||||
my($s) = $_[0];
|
||||
|
||||
$s =~ s/</</g; # Deal with < and >
|
||||
$s =~ s/>/>/g;
|
||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
|
||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
|
||||
$s =~ s"\\e"\\"g;
|
||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
|
||||
$s;
|
||||
}
|
||||
|
||||
# Subroutine to ensure not in a paragraph
|
||||
|
||||
sub end_para {
|
||||
if ($inpara)
|
||||
{
|
||||
print TEMP "</PRE>\n" if ($inpre);
|
||||
print TEMP "</P>\n";
|
||||
}
|
||||
$inpara = $inpre = 0;
|
||||
$wrotetext = 0;
|
||||
}
|
||||
|
||||
# Subroutine to start a new paragraph
|
||||
|
||||
sub new_para {
|
||||
&end_para();
|
||||
print TEMP "<P>\n";
|
||||
$inpara = 1;
|
||||
}
|
||||
|
||||
|
||||
# Main program
|
||||
|
||||
$innf = 0;
|
||||
$inpara = 0;
|
||||
$inpre = 0;
|
||||
$wrotetext = 0;
|
||||
$toc = 0;
|
||||
$ref = 1;
|
||||
|
||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
|
||||
{
|
||||
$toc = 1 if $ARGV[0] eq "-toc";
|
||||
shift;
|
||||
}
|
||||
|
||||
# Initial output to STDOUT
|
||||
|
||||
print <<End ;
|
||||
<html>
|
||||
<head>
|
||||
<title>$ARGV[0] specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>$ARGV[0] man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
End
|
||||
|
||||
print "<ul>\n" if ($toc);
|
||||
|
||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Handle lines beginning with a dot
|
||||
|
||||
if (/^\./)
|
||||
{
|
||||
# Some of the PCRE man pages used to contain instances of .br. However,
|
||||
# they should have all been removed because they cause trouble in some
|
||||
# (other) automated systems that translate man pages to HTML. Complain if
|
||||
# we find .br or .in (another macro that is deprecated).
|
||||
|
||||
if (/^\.br/ || /^\.in/)
|
||||
{
|
||||
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
|
||||
print STDERR "*** $_\n";
|
||||
die "*** Processing abandoned\n";
|
||||
}
|
||||
|
||||
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
|
||||
|
||||
elsif (/^\.nf/)
|
||||
{
|
||||
$innf = 1;
|
||||
}
|
||||
|
||||
elsif (/^\.fi/)
|
||||
{
|
||||
$innf = 0;
|
||||
}
|
||||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows. The point being that
|
||||
# the <pre> and </pre> that delimit literal sections will do the spacing.
|
||||
# Always skip if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
if ($wrotetext)
|
||||
{
|
||||
$_ = <STDIN>;
|
||||
if ($inpre)
|
||||
{
|
||||
print TEMP "\n" if (/^[\s.]/);
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
}
|
||||
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
|
||||
{
|
||||
&new_para();
|
||||
}
|
||||
elsif (/^\.SH\s*("?)(.*)\1/)
|
||||
{
|
||||
# Ignore the NAME section
|
||||
if ($2 =~ /^NAME\b/)
|
||||
{
|
||||
<STDIN>;
|
||||
next;
|
||||
}
|
||||
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
if ($toc)
|
||||
{
|
||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref, $ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
}
|
||||
elsif (/^\.SS\s*("?)(.*)\1/)
|
||||
{
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
elsif (/^\.B\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<b>$_</b>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
elsif (/^\.I\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<i>$_</i>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# A comment that starts "HREF" takes the next line as a name that
|
||||
# is turned into a hyperlink, using the text given, which might be
|
||||
# in a special font. If it ends in () or (digits) or punctuation, they
|
||||
# aren't part of the link.
|
||||
|
||||
elsif (/^\.\\"\s*HREF/)
|
||||
{
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
|
||||
print TEMP "<a href=\"$1.html\">$_</a>\n";
|
||||
}
|
||||
|
||||
# A comment that starts "HTML" inserts literal HTML
|
||||
|
||||
elsif (/^\.\\"\s*HTML\s*(.*)/)
|
||||
{
|
||||
print TEMP $1;
|
||||
}
|
||||
|
||||
# A comment that starts < inserts that HTML at the end of the
|
||||
# *next* input line - so as not to get a newline between them.
|
||||
|
||||
elsif (/^\.\\"\s*(<.*>)/)
|
||||
{
|
||||
my($markup) = $1;
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
print TEMP "$_$markup\n";
|
||||
}
|
||||
|
||||
# A comment that starts JOIN joins the next two lines together, with one
|
||||
# space between them. Then that line is processed. This is used in some
|
||||
# displays where two lines are needed for the "man" version. JOINSH works
|
||||
# the same, except that it assumes this is a shell command, so removes
|
||||
# continuation backslashes.
|
||||
|
||||
elsif (/^\.\\"\s*JOIN(SH)?/)
|
||||
{
|
||||
my($one,$two);
|
||||
$one = <STDIN>;
|
||||
$two = <STDIN>;
|
||||
$one =~ s/\s*\\e\s*$// if (defined($1));
|
||||
chomp($one);
|
||||
$two =~ s/^\s+//;
|
||||
$_ = "$one $two";
|
||||
redo; # Process the joined lines
|
||||
}
|
||||
|
||||
# Ignore anything not recognized
|
||||
|
||||
next;
|
||||
}
|
||||
|
||||
# Line does not begin with a dot. Replace blank lines with new paragraphs
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
&end_para() if ($wrotetext);
|
||||
next;
|
||||
}
|
||||
|
||||
# Convert fonts changes and output an ordinary line. Ensure that indented
|
||||
# lines are marked as literal.
|
||||
|
||||
$_ = &do_line($_);
|
||||
&new_para() if (!$inpara);
|
||||
|
||||
if (/^\s/)
|
||||
{
|
||||
if (!$inpre)
|
||||
{
|
||||
print TEMP "<pre>\n";
|
||||
$inpre = 1;
|
||||
}
|
||||
}
|
||||
elsif ($inpre)
|
||||
{
|
||||
print TEMP "</pre>\n";
|
||||
$inpre = 0;
|
||||
}
|
||||
|
||||
# Add <br> to the end of a non-literal line if we are within .nf/.fi
|
||||
|
||||
$_ .= "<br>\n" if (!$inpre && $innf);
|
||||
|
||||
print TEMP;
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# The TOC, if present, will have been written - terminate it
|
||||
|
||||
print "</ul>\n" if ($toc);
|
||||
|
||||
# Copy the remainder to the standard output
|
||||
|
||||
close(TEMP);
|
||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
|
||||
|
||||
print while (<TEMP>);
|
||||
|
||||
print <<End ;
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
End
|
||||
|
||||
close(TEMP);
|
||||
unlink("/tmp/$$");
|
||||
|
||||
# End
|
|
@ -1,23 +0,0 @@
|
|||
THE MAIN PCRE LIBRARY
|
||||
---------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2009 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
THE C++ WRAPPER LIBRARY
|
||||
-----------------------
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2008 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
|
@ -1,578 +0,0 @@
|
|||
# CMakeLists.txt
|
||||
#
|
||||
#
|
||||
# This file allows building PCRE with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
#
|
||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||
# not a previously-installed one.
|
||||
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||
# PCRE_SUPPORT_LIBBZ2.
|
||||
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||
# Ehrlicher.
|
||||
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||
# when building with minGW:
|
||||
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||
# be built without "lib" as prefix. (The libraries will be named
|
||||
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||
# be built with suffix of "-0.dll". (The libraries will be named
|
||||
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||
# built by default with Configure and Make.
|
||||
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||
# is included within another project.
|
||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||
# add options to stop the building of pcregrep and the tests, and
|
||||
# to disable the final configuration report.
|
||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||
# are set by specifying a release type.
|
||||
|
||||
PROJECT(PCRE C CXX)
|
||||
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.6)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 )
|
||||
FIND_PACKAGE( ZLIB )
|
||||
FIND_PACKAGE( Readline )
|
||||
|
||||
# Configuration checks
|
||||
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckIncludeFileCXX)
|
||||
INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckTypeSize)
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
|
||||
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
||||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
CHECK_INCLUDE_FILE_CXX(type_traits.h HAVE_TYPE_TRAITS_H)
|
||||
CHECK_INCLUDE_FILE_CXX(bits/type_traits.h HAVE_BITS_TYPE_TRAITS_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
CHECK_FUNCTION_EXISTS(strtoll HAVE_STRTOLL)
|
||||
CHECK_FUNCTION_EXISTS(strtoq HAVE_STRTOQ)
|
||||
CHECK_FUNCTION_EXISTS(_strtoi64 HAVE__STRTOI64)
|
||||
|
||||
CHECK_TYPE_SIZE("long long" LONG_LONG)
|
||||
CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# (Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
# the order we use here)
|
||||
|
||||
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
|
||||
"Build shared libraries instead of static ones.")
|
||||
|
||||
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
|
||||
|
||||
SET(PCRE_EBCDIC OFF CACHE BOOL
|
||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems)")
|
||||
|
||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
||||
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
||||
|
||||
SET(PCRE_NEWLINE "LF" CACHE STRING
|
||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||
|
||||
SET(PCRE_NO_RECURSE OFF CACHE BOOL
|
||||
"If ON, then don't use stack recursion when matching. See NO_RECURSE in config.h.in for details.")
|
||||
|
||||
SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
|
||||
"Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.")
|
||||
|
||||
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
|
||||
"Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)")
|
||||
|
||||
SET(PCRE_SUPPORT_UTF8 OFF CACHE BOOL
|
||||
"Enable support for the Unicode UTF-8 encoding.")
|
||||
|
||||
SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||
|
||||
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
||||
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
||||
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
||||
|
||||
IF (PCRE_BUILD_TESTS)
|
||||
IF (NOT PCRE_BUILD_PCREGREP)
|
||||
MESSAGE(STATUS "** Building tests requires pcregrep: PCRE_BUILD_PCREGREP forced ON")
|
||||
SET(PCRE_BUILD_PCREGREP ON)
|
||||
ENDIF(NOT PCRE_BUILD_PCREGREP)
|
||||
ENDIF(PCRE_BUILD_TESTS)
|
||||
|
||||
IF (MINGW)
|
||||
OPTION(NON_STANDARD_LIB_PREFIX
|
||||
"ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
|
||||
OFF)
|
||||
|
||||
OPTION(NON_STANDARD_LIB_SUFFIX
|
||||
"ON=Shared libraries built in mingw will be named libpcre-0.dll, etc., instead of libpcre.dll, etc."
|
||||
OFF)
|
||||
ENDIF(MINGW)
|
||||
|
||||
# bzip2 lib
|
||||
IF(BZIP2_FOUND)
|
||||
OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(PCRE_SUPPORT_LIBBZ2)
|
||||
INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
|
||||
ENDIF(PCRE_SUPPORT_LIBBZ2)
|
||||
|
||||
# zlib
|
||||
IF(ZLIB_FOUND)
|
||||
OPTION (PCRE_SUPPORT_LIBZ "Enable support for linking pcregrep with libz." ON)
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(PCRE_SUPPORT_LIBZ)
|
||||
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
|
||||
ENDIF(PCRE_SUPPORT_LIBZ)
|
||||
|
||||
# readline lib
|
||||
IF(READLINE_FOUND)
|
||||
OPTION (PCRE_SUPPORT_LIBREADLINE "Enable support for linking pcretest with libreadline." ON)
|
||||
ENDIF(READLINE_FOUND)
|
||||
IF(PCRE_SUPPORT_LIBREADLINE)
|
||||
INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR})
|
||||
ENDIF(PCRE_SUPPORT_LIBREADLINE)
|
||||
|
||||
# Prepare build configuration
|
||||
|
||||
SET(pcre_have_type_traits 0)
|
||||
SET(pcre_have_bits_type_traits 0)
|
||||
|
||||
IF(HAVE_TYPE_TRAITS_H)
|
||||
SET(pcre_have_type_traits 1)
|
||||
ENDIF(HAVE_TYPE_TRAITS_H)
|
||||
|
||||
IF(HAVE_BITS_TYPE_TRAITS_H)
|
||||
SET(pcre_have_bits_type_traits 1)
|
||||
ENDIF(HAVE_BITS_TYPE_TRAITS_H)
|
||||
|
||||
SET(pcre_have_long_long 0)
|
||||
SET(pcre_have_ulong_long 0)
|
||||
|
||||
IF(HAVE_LONG_LONG)
|
||||
SET(pcre_have_long_long 1)
|
||||
ENDIF(HAVE_LONG_LONG)
|
||||
|
||||
IF(HAVE_UNSIGNED_LONG_LONG)
|
||||
SET(pcre_have_ulong_long 1)
|
||||
ENDIF(HAVE_UNSIGNED_LONG_LONG)
|
||||
|
||||
IF(NOT BUILD_SHARED_LIBS)
|
||||
SET(PCRE_STATIC 1)
|
||||
ENDIF(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||
SET(BSR_ANYCRLF 1)
|
||||
ENDIF(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||
|
||||
IF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||
SET(SUPPORT_UTF8 1)
|
||||
ENDIF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||
|
||||
IF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||
SET(SUPPORT_UCP 1)
|
||||
ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||
|
||||
# This next one used to contain
|
||||
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
# some modifications to cmake/FindReadline.cmake which should
|
||||
# make it possible to override the default if necessary. PH
|
||||
|
||||
IF(PCRE_SUPPORT_LIBREADLINE)
|
||||
SET(SUPPORT_LIBREADLINE 1)
|
||||
SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
ENDIF(PCRE_SUPPORT_LIBREADLINE)
|
||||
|
||||
IF(PCRE_SUPPORT_LIBZ)
|
||||
SET(SUPPORT_LIBZ 1)
|
||||
SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES})
|
||||
ENDIF(PCRE_SUPPORT_LIBZ)
|
||||
|
||||
IF(PCRE_SUPPORT_LIBBZ2)
|
||||
SET(SUPPORT_LIBBZ2 1)
|
||||
SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${BZIP2_LIBRARIES})
|
||||
ENDIF(PCRE_SUPPORT_LIBBZ2)
|
||||
|
||||
SET(NEWLINE "")
|
||||
|
||||
IF(PCRE_NEWLINE STREQUAL "LF")
|
||||
SET(NEWLINE "10")
|
||||
ENDIF(PCRE_NEWLINE STREQUAL "LF")
|
||||
IF(PCRE_NEWLINE STREQUAL "CR")
|
||||
SET(NEWLINE "13")
|
||||
ENDIF(PCRE_NEWLINE STREQUAL "CR")
|
||||
IF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||
SET(NEWLINE "3338")
|
||||
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||
IF(PCRE_NEWLINE STREQUAL "ANY")
|
||||
SET(NEWLINE "-1")
|
||||
ENDIF(PCRE_NEWLINE STREQUAL "ANY")
|
||||
IF(PCRE_NEWLINE STREQUAL "ANYCRLF")
|
||||
SET(NEWLINE "-2")
|
||||
ENDIF(PCRE_NEWLINE STREQUAL "ANYCRLF")
|
||||
|
||||
IF(NEWLINE STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "The PCRE_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
|
||||
ENDIF(NEWLINE STREQUAL "")
|
||||
|
||||
IF(PCRE_EBCDIC)
|
||||
SET(EBCDIC 1)
|
||||
ENDIF(PCRE_EBCDIC)
|
||||
|
||||
IF(PCRE_NO_RECURSE)
|
||||
SET(NO_RECURSE 1)
|
||||
ENDIF(PCRE_NO_RECURSE)
|
||||
|
||||
# Output files
|
||||
CONFIGURE_FILE(config-cmake.h.in
|
||||
${PROJECT_BINARY_DIR}/config.h
|
||||
@ONLY)
|
||||
|
||||
CONFIGURE_FILE(pcre.h.generic
|
||||
${PROJECT_BINARY_DIR}/pcre.h
|
||||
COPYONLY)
|
||||
|
||||
# What about pcre-config and libpcre.pc?
|
||||
|
||||
IF(PCRE_BUILD_PCRECPP)
|
||||
CONFIGURE_FILE(pcre_stringpiece.h.in
|
||||
${PROJECT_BINARY_DIR}/pcre_stringpiece.h
|
||||
@ONLY)
|
||||
|
||||
CONFIGURE_FILE(pcrecpparg.h.in
|
||||
${PROJECT_BINARY_DIR}/pcrecpparg.h
|
||||
@ONLY)
|
||||
ENDIF(PCRE_BUILD_PCRECPP)
|
||||
|
||||
# Character table generation
|
||||
|
||||
OPTION(PCRE_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
IF(PCRE_REBUILD_CHARTABLES)
|
||||
ADD_EXECUTABLE(dftables dftables.c)
|
||||
|
||||
GET_TARGET_PROPERTY(DFTABLES_EXE dftables LOCATION)
|
||||
|
||||
ADD_CUSTOM_COMMAND(
|
||||
COMMENT "Generating character tables (pcre_chartables.c) for current locale"
|
||||
DEPENDS dftables
|
||||
COMMAND ${DFTABLES_EXE}
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre_chartables.c
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre_chartables.c
|
||||
)
|
||||
ELSE(PCRE_REBUILD_CHARTABLES)
|
||||
CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/pcre_chartables.c.dist
|
||||
${PROJECT_BINARY_DIR}/pcre_chartables.c
|
||||
COPYONLY)
|
||||
ENDIF(PCRE_REBUILD_CHARTABLES)
|
||||
|
||||
# Source code
|
||||
|
||||
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h)
|
||||
|
||||
SET(PCRE_SOURCES
|
||||
${PROJECT_BINARY_DIR}/pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_newline.c
|
||||
pcre_maketables.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucd.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
)
|
||||
|
||||
SET(PCREPOSIX_HEADERS pcreposix.h)
|
||||
|
||||
SET(PCREPOSIX_SOURCES pcreposix.c)
|
||||
|
||||
SET(PCRECPP_HEADERS
|
||||
pcrecpp.h
|
||||
pcre_scanner.h
|
||||
${PROJECT_BINARY_DIR}/pcrecpparg.h
|
||||
${PROJECT_BINARY_DIR}/pcre_stringpiece.h
|
||||
)
|
||||
|
||||
SET(PCRECPP_SOURCES
|
||||
pcrecpp.cc
|
||||
pcre_scanner.cc
|
||||
pcre_stringpiece.cc
|
||||
)
|
||||
|
||||
# Build setup
|
||||
|
||||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
||||
|
||||
IF(MSVC)
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE)
|
||||
ENDIF(MSVC)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
# needed to make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
IF(WIN32)
|
||||
SET(CMAKE_DEBUG_POSTFIX "d")
|
||||
ENDIF(WIN32)
|
||||
|
||||
SET(targets)
|
||||
|
||||
# Libraries
|
||||
# pcre
|
||||
ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET(targets ${targets} pcre)
|
||||
ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES})
|
||||
SET(targets ${targets} pcreposix)
|
||||
TARGET_LINK_LIBRARIES(pcreposix pcre)
|
||||
IF(MINGW AND NOT PCRE_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre pcreposix
|
||||
PROPERTIES PREFIX ""
|
||||
)
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre pcreposix
|
||||
PROPERTIES SUFFIX "-0.dll"
|
||||
)
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||
|
||||
|
||||
# pcrecpp
|
||||
IF(PCRE_BUILD_PCRECPP)
|
||||
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
|
||||
SET(targets ${targets} pcrecpp)
|
||||
TARGET_LINK_LIBRARIES(pcrecpp pcre)
|
||||
|
||||
IF(MINGW AND NOT PCRE_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcrecpp
|
||||
PROPERTIES PREFIX ""
|
||||
)
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcrecpp
|
||||
PROPERTIES SUFFIX "-0.dll"
|
||||
)
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||
ENDIF(PCRE_BUILD_PCRECPP)
|
||||
|
||||
|
||||
# Executables
|
||||
|
||||
# Removed by PH (2008-01-23) because pcredemo shouldn't really be built
|
||||
# automatically, and it gave trouble in some environments anyway.
|
||||
# ADD_EXECUTABLE(pcredemo pcredemo.c)
|
||||
# TARGET_LINK_LIBRARIES(pcredemo pcreposix)
|
||||
# IF(NOT BUILD_SHARED_LIBS)
|
||||
# # make sure to not use declspec(dllimport) in static mode on windows
|
||||
# SET_TARGET_PROPERTIES(pcredemo PROPERTIES COMPILE_FLAGS "-DPCRE_STATIC")
|
||||
# ENDIF(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE_BUILD_PCREGREP)
|
||||
ADD_EXECUTABLE(pcregrep pcregrep.c)
|
||||
SET(targets ${targets} pcregrep)
|
||||
TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS})
|
||||
ENDIF(PCRE_BUILD_PCREGREP)
|
||||
|
||||
|
||||
# Testing
|
||||
IF(PCRE_BUILD_TESTS)
|
||||
ENABLE_TESTING()
|
||||
|
||||
ADD_EXECUTABLE(pcretest pcretest.c)
|
||||
SET(targets ${targets} pcretest)
|
||||
TARGET_LINK_LIBRARIES(pcretest pcreposix ${PCRETEST_LIBS})
|
||||
|
||||
IF(PCRE_BUILD_PCRECPP)
|
||||
ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc)
|
||||
SET(targets ${targets} pcrecpp_unittest)
|
||||
TARGET_LINK_LIBRARIES(pcrecpp_unittest pcrecpp)
|
||||
IF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
|
||||
SET_TARGET_PROPERTIES(pcrecpp
|
||||
PROPERTIES PREFIX ""
|
||||
)
|
||||
ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
|
||||
|
||||
|
||||
ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc)
|
||||
SET(targets ${targets} pcre_scanner_unittest)
|
||||
TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp)
|
||||
|
||||
ADD_EXECUTABLE(pcre_stringpiece_unittest pcre_stringpiece_unittest.cc)
|
||||
SET(targets ${targets} pcre_stringpiece_unittest)
|
||||
TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp)
|
||||
ENDIF(PCRE_BUILD_PCRECPP)
|
||||
|
||||
GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION)
|
||||
GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION)
|
||||
|
||||
# Write out a CTest configuration file that sets some needed environment
|
||||
# variables for the test scripts.
|
||||
#
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||
"# This is a generated file.
|
||||
SET(ENV{srcdir} ${PROJECT_SOURCE_DIR})
|
||||
SET(ENV{pcregrep} ${PCREGREP_EXE})
|
||||
SET(ENV{pcretest} ${PCRETEST_EXE})
|
||||
")
|
||||
|
||||
IF(UNIX)
|
||||
ADD_TEST(pcre_test ${PROJECT_SOURCE_DIR}/RunTest)
|
||||
ADD_TEST(pcre_grep_test ${PROJECT_SOURCE_DIR}/RunGrepTest)
|
||||
ENDIF(UNIX)
|
||||
IF(WIN32)
|
||||
ADD_TEST(pcre_test cmd /C ${PROJECT_SOURCE_DIR}/RunTest.bat)
|
||||
ENDIF(WIN32)
|
||||
|
||||
GET_TARGET_PROPERTY(PCRECPP_UNITTEST_EXE
|
||||
pcrecpp_unittest
|
||||
DEBUG_LOCATION)
|
||||
|
||||
GET_TARGET_PROPERTY(PCRE_SCANNER_UNITTEST_EXE
|
||||
pcre_scanner_unittest
|
||||
DEBUG_LOCATION)
|
||||
|
||||
GET_TARGET_PROPERTY(PCRE_STRINGPIECE_UNITTEST_EXE
|
||||
pcre_stringpiece_unittest
|
||||
DEBUG_LOCATION)
|
||||
|
||||
ADD_TEST(pcrecpp_test ${PCRECPP_UNITTEST_EXE})
|
||||
ADD_TEST(pcre_scanner_test ${PCRE_SCANNER_UNITTEST_EXE})
|
||||
ADD_TEST(pcre_stringpiece_test ${PCRE_STRINGPIECE_UNITTEST_EXE})
|
||||
ENDIF(PCRE_BUILD_TESTS)
|
||||
|
||||
# Installation
|
||||
SET(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
INSTALL(TARGETS ${targets}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
INSTALL(FILES ${PCRE_HEADERS} ${PCREPOSIX_HEADERS} DESTINATION include)
|
||||
|
||||
FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
|
||||
FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||
FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||
|
||||
IF(PCRE_BUILD_PCRECPP)
|
||||
INSTALL(FILES ${PCRECPP_HEADERS} DESTINATION include)
|
||||
ELSE(PCRE_BUILD_PCRECPP)
|
||||
# Remove pcrecpp.3
|
||||
FOREACH(man ${man3})
|
||||
GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
|
||||
IF(NOT man_tmp STREQUAL "pcrecpp.3")
|
||||
SET(man3_new ${man3} ${man})
|
||||
ENDIF(NOT man_tmp STREQUAL "pcrecpp.3")
|
||||
ENDFOREACH(man ${man3})
|
||||
SET(man3 ${man3_new})
|
||||
ENDIF(PCRE_BUILD_PCRECPP)
|
||||
|
||||
|
||||
INSTALL(FILES ${man1} DESTINATION man/man1)
|
||||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
||||
|
||||
# help, only for nice output
|
||||
IF(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS OFF)
|
||||
ELSE(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS ON)
|
||||
ENDIF(BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE_SHOW_REPORT)
|
||||
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
IF (CMAKE_C_FLAGS)
|
||||
SET(cfsp " ")
|
||||
ENDIF(CMAKE_C_FLAGS)
|
||||
IF (CMAKE_CXX_FLAGS)
|
||||
SET(cxxfsp " ")
|
||||
ENDIF(CMAKE_CXX_FLAGS)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
MESSAGE(STATUS " C++ compiler .................... : ${CMAKE_CXX_COMPILER}")
|
||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
MESSAGE(STATUS " C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
|
||||
MESSAGE(STATUS " Enable UTF-8 support ............ : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
||||
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
|
||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
||||
MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}")
|
||||
IF(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}")
|
||||
ELSE(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcregrep with libz ......... : None" )
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}")
|
||||
ELSE(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : None" )
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(NOT PCRE_SUPPORT_LIBREADLINE)
|
||||
MESSAGE(STATUS " Link pcretest with libreadline .. : None" )
|
||||
ELSE(NOT PCRE_SUPPORT_LIBREADLINE)
|
||||
MESSAGE(STATUS " Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}")
|
||||
ENDIF(NOT PCRE_SUPPORT_LIBREADLINE)
|
||||
IF(MINGW AND NOT PCRE_STATIC)
|
||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||
MESSAGE(STATUS "")
|
||||
ENDIF(PCRE_SHOW_REPORT)
|
||||
|
||||
# end CMakeLists.txt
|
|
@ -1,5 +0,0 @@
|
|||
PCRE LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
3508
libs/pcre/ChangeLog
3508
libs/pcre/ChangeLog
File diff suppressed because it is too large
Load Diff
|
@ -1,113 +0,0 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to take the output of nroff -man and remove all the backspacing and
|
||||
# the page footers and the screen commands etc so that it is more usefully
|
||||
# readable online. In fact, in the latest nroff, intermediate footers don't
|
||||
# seem to be generated any more.
|
||||
|
||||
$blankcount = 0;
|
||||
$lastwascut = 0;
|
||||
$firstheader = 1;
|
||||
|
||||
# Input on STDIN; output to STDOUT.
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
s/.\x8//g; # Remove "char, backspace"
|
||||
|
||||
# Handle header lines. Retain only the first one we encounter, but remove
|
||||
# the blank line that follows. Any others (e.g. at end of document) and the
|
||||
# following blank line are dropped.
|
||||
|
||||
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
|
||||
{
|
||||
if ($firstheader)
|
||||
{
|
||||
$firstheader = 0;
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
}
|
||||
$_=<STDIN>; # Remove a blank that follows
|
||||
next;
|
||||
}
|
||||
|
||||
# Count runs of empty lines
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
$blankcount++;
|
||||
$lastwascut = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
# If a chunk of lines has been cut out (page footer) and the next line
|
||||
# has a different indentation, put back one blank line.
|
||||
|
||||
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
|
||||
{
|
||||
($a) = $lastprinted =~ /^(\s*)/;
|
||||
($b) = $_ =~ /^(\s*)/;
|
||||
$blankcount++ if ($a ne $b);
|
||||
}
|
||||
|
||||
# We get here only when we have a non-blank line in hand. If it was preceded
|
||||
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
|
||||
# If so, remove all 7 lines, and remember that we have just done a cut.
|
||||
|
||||
if ($blankcount >= 3)
|
||||
{
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] = <STDIN>;
|
||||
$next[$i] = "" if !defined $next[$i];
|
||||
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
|
||||
}
|
||||
|
||||
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
|
||||
|
||||
if ($next[0] =~ /^\s*$/ &&
|
||||
$next[1] =~ /^\s*$/ &&
|
||||
$next[2] =~ /^\s*$/)
|
||||
{
|
||||
$blankcount -= 3;
|
||||
$lastwascut = 1;
|
||||
}
|
||||
|
||||
# Otherwise output the saved blanks, the current, and the next three
|
||||
# lines. Remember the last printed line.
|
||||
|
||||
else
|
||||
{
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] =~ s/.\x8//g;
|
||||
print $next[$i];
|
||||
$lastprinted = $_;
|
||||
}
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# This non-blank line is not preceded by 3 or more blank lines. Output
|
||||
# any blanks there are, and the line. Remember it. Force two blank lines
|
||||
# before headings.
|
||||
|
||||
else
|
||||
{
|
||||
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
|
||||
defined($lastprinted);
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# End
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# This is a script for removing trailing whitespace from lines in files that
|
||||
# are listed on the command line.
|
||||
|
||||
# This subroutine does the work for one file.
|
||||
|
||||
sub detrail {
|
||||
my($file) = $_[0];
|
||||
my($changed) = 0;
|
||||
open(IN, "$file") || die "Can't open $file for input";
|
||||
@lines = <IN>;
|
||||
close(IN);
|
||||
foreach (@lines)
|
||||
{
|
||||
if (/\s+\n$/)
|
||||
{
|
||||
s/\s+\n$/\n/;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
if ($changed)
|
||||
{
|
||||
open(OUT, ">$file") || die "Can't open $file for output";
|
||||
print OUT @lines;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
|
||||
# This is the main program
|
||||
|
||||
$, = ""; # Output field separator
|
||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
|
||||
|
||||
# End
|
|
@ -1,418 +0,0 @@
|
|||
Technical Notes about PCRE
|
||||
--------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE internals.
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
Perl code does, but instead checked all possibilities simultaneously by keeping
|
||||
a list of current states and checking all of them as it advanced through the
|
||||
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
|
||||
algorithm", though it was not a traditional Finite State Machine (FSM). When
|
||||
the pattern was all used up, all remaining states were possible matches, and
|
||||
the one matching the longest subset of the subject string was chosen. This did
|
||||
not necessarily maximize the individual wild portions of the pattern, as is
|
||||
expected in Unix and Perl-style regular expressions.
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that form the "basic" PCRE library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is helpful for other reasons.
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE worked by running a very degenerate first
|
||||
pass to calculate a maximum store size, and then a second pass to do the real
|
||||
compile - which might use a bit less than the predicted amount of memory. The
|
||||
idea was that this would turn out faster than the Henry Spencer code because
|
||||
the first pass is degenerate and the second pass can just store stuff straight
|
||||
into the vector, which it knows is big enough.
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
actually only ever using a few hundred bytes of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I re-factored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source. It
|
||||
should make future maintenance and development easier. As this was such a major
|
||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||
major changes are also present in the 7.0 release).
|
||||
|
||||
A side effect of this work is that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there is a downside: pcre_compile()
|
||||
runs more slowly than before (30% or more, depending on the pattern) because it
|
||||
is doing a full analysis of the pattern. My hope is that this is not a big
|
||||
issue.
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre_exec(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. Not surprising, since it is intended to be as
|
||||
compatible with Perl as possible. This is the function most users of PCRE will
|
||||
use most of the time.
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
From PCRE 6.0, there is also a supplementary matching function called
|
||||
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
|
||||
simultaneously for all possible matches that start at one point in the subject
|
||||
string. (Going back to my roots: see Historical Note 1 above.) This function
|
||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the
|
||||
facilities are available, and those that are do not always work in quite the
|
||||
same way. See the user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of bytes, containing items of
|
||||
variable length. The first byte in an item is an opcode, and the length of the
|
||||
item is either implicit in the opcode or contained in the data bytes that
|
||||
follow it.
|
||||
|
||||
In many cases below LINK_SIZE data values are specified for offsets within the
|
||||
compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be
|
||||
compiled to use 3-byte or 4-byte values for these offsets (impairing the
|
||||
performance). This is necessary only when patterns whose compiled length is
|
||||
greater than 64K are going to be processed. In this description, we assume the
|
||||
"normal" compilation options. Data values that are counts (e.g. for
|
||||
quantifiers) are always just two bytes long.
|
||||
|
||||
A list of the opcodes follows:
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one byte long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data, or after \n in multiline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or \n at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before \n in multiline)
|
||||
OP_EXTUNI match an extended Unicode character
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ACCEPT )
|
||||
OP_COMMIT )
|
||||
OP_FAIL ) These are Perl 5.10's "backtracking
|
||||
OP_PRUNE ) control verbs".
|
||||
OP_SKIP )
|
||||
OP_THEN )
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?) when applied to a single character use the
|
||||
following opcodes:
|
||||
|
||||
OP_STAR
|
||||
OP_MINSTAR
|
||||
OP_POSSTAR
|
||||
OP_PLUS
|
||||
OP_MINPLUS
|
||||
OP_POSPLUS
|
||||
OP_QUERY
|
||||
OP_MINQUERY
|
||||
OP_POSQUERY
|
||||
|
||||
In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
|
||||
Those with "MIN" in their name are the minimizing versions. Those with "POS" in
|
||||
their names are possessive versions. Each is followed by the character that is
|
||||
to be repeated. Other repeats make use of
|
||||
|
||||
OP_UPTO
|
||||
OP_MINUPTO
|
||||
OP_POSUPTO
|
||||
OP_EXACT
|
||||
|
||||
which are followed by a two-byte count (most significant first) and the
|
||||
repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
|
||||
non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
|
||||
OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type is stored in the data
|
||||
byte. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two bytes that encode the desired property as a type and a
|
||||
value.
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the
|
||||
character may be more than one byte long. (Earlier versions of PCRE used
|
||||
multi-character strings, but this was changed to allow some new features to be
|
||||
added.)
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character, OP_CHAR or OP_CHARNC is used for a positive
|
||||
class, and OP_NOT for a negative one (that is, for something like [^a]).
|
||||
However, in UTF-8 mode, the use of OP_NOT applies only to characters with
|
||||
values < 128, because OP_NOT is confined to single bytes.
|
||||
|
||||
Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
|
||||
negated, single-character class. The normal ones (OP_STAR etc.) are used for a
|
||||
repeated positive single-character class.
|
||||
|
||||
When there's more than one character in a class and all the characters are less
|
||||
than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative
|
||||
one. In either case, the opcode is followed by a 32-byte bit map containing a 1
|
||||
bit for every character that is acceptable. The bits are counted from the least
|
||||
significant end of each byte.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode,
|
||||
subject characters with values greater than 256 can be handled correctly. For
|
||||
OP_CLASS they don't match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values > 255, OP_XCLASS is used. It
|
||||
optionally uses a bit map (if any characters lie within it), followed by a list
|
||||
of pairs and single characters. There is a flag character than indicates
|
||||
whether it's a positive or a negative class.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF is followed by two bytes containing the reference number.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to OP_CLASS and OP_REF. In both cases, the repeat information follows
|
||||
the base item. The matching code looks at the following opcode to see if it is
|
||||
one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
|
||||
All but the last two are just single-byte items. The others are followed by
|
||||
four bytes of data, comprising the minimum and maximum repeat counts. There are
|
||||
no special possessive opcodes for these repeats; a possessive repeat is
|
||||
compiled into an atomic group.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage.]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
|
||||
capturing brackets and it used a different opcode for each one. From release
|
||||
3.5, the limit was removed by putting the bracket number into the data for
|
||||
higher-numbered brackets. From release 7.0 all capturing brackets are handled
|
||||
this way, using the single opcode OP_CBRA.
|
||||
|
||||
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number immediately follows the offset, always as a 2-byte item.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, while
|
||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
||||
maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
|
||||
positive number) the offset back to the matching bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-byte opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid branch. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the regex.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are just like other subpatterns, but starting with one of
|
||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a two byte count of the number of characters to move
|
||||
back the pointer in the subject string. When operating in UTF-8 mode, the count
|
||||
is a character count rather than a byte count. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Once-only (atomic) subpatterns
|
||||
------------------------------
|
||||
|
||||
These are also just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE. The check for matching an empty string in an unbounded repeat is
|
||||
handled entirely at runtime, so there is just this one opcode.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by two bytes containing the
|
||||
reference number. If the condition is "in recursion" (coded as "(?(R)"), or "in
|
||||
recursion of group x" (coded as "(?(Rx)"), the group number is stored at the
|
||||
start of the subpattern using the opcode OP_RREF, and a value of zero for "the
|
||||
whole pattern". For a DEFINE condition, just the single byte OP_DEF is used (it
|
||||
has no associated data). Otherwise, a conditional subpattern always starts with
|
||||
one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
||||
are not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
OP_CALLOUT is followed by one byte of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a two-byte value giving the offset in the pattern to the
|
||||
start of the following item, and another two-byte item giving the length of the
|
||||
next item.
|
||||
|
||||
|
||||
Changing options
|
||||
----------------
|
||||
|
||||
If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
|
||||
opcode is compiled, followed by one byte containing the new settings of these
|
||||
flags. If there are several alternatives, there is an occurrence of OP_OPT at
|
||||
the start of all those following the first options change, to set appropriate
|
||||
options for the start of the alternative. Immediately after the end of the
|
||||
group there is another such item to reset the flags to their previous values. A
|
||||
change of flag right at the very start of the pattern can be handled entirely
|
||||
at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
April 2008
|
|
@ -1,291 +0,0 @@
|
|||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
|
||||
2006, 2007, 2008 Free Software Foundation, Inc.
|
||||
|
||||
This file is free documentation; the Free Software Foundation gives
|
||||
unlimited permission to copy, distribute and modify it.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell commands `./configure; make; make install' should
|
||||
configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation.
|
||||
|
||||
5. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
6. Often, you can also type `make uninstall' to remove the installed
|
||||
files again.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the `configure' script does not know about. Run `./configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple `-arch' options to the
|
||||
compiler but only a single `-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the `lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX'.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them.
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
|
||||
CC is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||
to try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, `configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share,
|
||||
you can create a site shell script called `config.site' that gives
|
||||
default values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of all of the options to `configure', and exit.
|
||||
|
||||
`--help=short'
|
||||
`--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
`configure', and exit. The `short' variant lists options used
|
||||
only in the top level, while the `recursive' variant lists options
|
||||
also present in any nested packages.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`--prefix=DIR'
|
||||
Use DIR as the installation prefix. *Note Installation Names::
|
||||
for more details, including other options available for fine-tuning
|
||||
the installation locations.
|
||||
|
||||
`--no-create'
|
||||
`-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2009 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2008, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
|
@ -1,334 +0,0 @@
|
|||
## Process this file with automake to produce Makefile.in.
|
||||
|
||||
pcrecpp_html = doc/html/pcrecpp.html
|
||||
dist_noinst_DATA = $(pcrecpp_html)
|
||||
|
||||
# The Libtool libraries to install. We'll add to this later.
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
# Unit tests you want to run when people type 'make check'.
|
||||
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
|
||||
TESTS =
|
||||
check_SCRIPTS =
|
||||
dist_noinst_SCRIPTS =
|
||||
|
||||
# Some of the binaries we make are to be installed, and others are
|
||||
# (non-user-visible) helper programs needed to build libpcre.
|
||||
bin_PROGRAMS =
|
||||
noinst_PROGRAMS =
|
||||
|
||||
# Additional files to delete on 'make clean' and 'make maintainer-clean'.
|
||||
CLEANFILES =
|
||||
MAINTAINERCLEANFILES =
|
||||
|
||||
# Additional files to bundle with the distribution, over and above what
|
||||
# the Autotools include by default.
|
||||
EXTRA_DIST =
|
||||
|
||||
# These files contain maintenance information
|
||||
EXTRA_DIST += \
|
||||
doc/perltest.txt \
|
||||
NON-UNIX-USE \
|
||||
HACKING
|
||||
|
||||
# These files are used in the preparation of a release
|
||||
EXTRA_DIST += \
|
||||
PrepareRelease \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
132html \
|
||||
doc/index.html.src
|
||||
|
||||
# These files are to do with building for Virtual Pascal
|
||||
EXTRA_DIST += \
|
||||
makevp.bat \
|
||||
makevp_c.txt \
|
||||
makevp_l.txt \
|
||||
pcregexp.pas
|
||||
|
||||
# These files are usable versions of pcre.h and config.h that are distributed
|
||||
# for the benefit of people who are building PCRE manually, without the
|
||||
# Autotools support.
|
||||
EXTRA_DIST += \
|
||||
pcre.h.generic \
|
||||
config.h.generic
|
||||
|
||||
pcre.h.generic: configure.ac
|
||||
rm -f $@
|
||||
cp -p pcre.h $@
|
||||
|
||||
MAINTAINERCLEANFILES += pcre.h.generic
|
||||
|
||||
# These are the header files we'll install. We do not distribute pcre.h because
|
||||
# it is generated from pcre.h.in.
|
||||
nodist_include_HEADERS = \
|
||||
pcre.h
|
||||
include_HEADERS = \
|
||||
pcreposix.h
|
||||
|
||||
# These additional headers will be be installed if C++ support is enabled. We
|
||||
# do not distribute pcrecpparg.h or pcre_stringpiece.h, as these are generated
|
||||
# from corresponding .h.in files (which we do distribute).
|
||||
if WITH_PCRE_CPP
|
||||
nodist_include_HEADERS += \
|
||||
pcrecpparg.h \
|
||||
pcre_stringpiece.h
|
||||
include_HEADERS += \
|
||||
pcrecpp.h \
|
||||
pcre_scanner.h
|
||||
endif # WITH_PCRE_CPP
|
||||
|
||||
bin_SCRIPTS = pcre-config
|
||||
|
||||
## ---------------------------------------------------------------
|
||||
## The dftables program is used to rebuild character tables before compiling
|
||||
## PCRE, if --enable-rebuild-chartables is specified. It is not a user-visible
|
||||
## program. The default (when --enable-rebuild-chartables is not specified) is
|
||||
## to copy a distributed set of tables that are defined for ASCII code. In this
|
||||
## case, dftables is not needed.
|
||||
|
||||
if WITH_REBUILD_CHARTABLES
|
||||
|
||||
noinst_PROGRAMS += dftables
|
||||
dftables_SOURCES = dftables.c
|
||||
|
||||
pcre_chartables.c: dftables$(EXEEXT)
|
||||
./dftables$(EXEEXT) $@
|
||||
else
|
||||
|
||||
pcre_chartables.c: $(srcdir)/pcre_chartables.c.dist
|
||||
rm -f $@
|
||||
$(LN_S) $(srcdir)/pcre_chartables.c.dist $@
|
||||
|
||||
endif # WITH_REBUILD_CHARTABLES
|
||||
|
||||
|
||||
## The main pcre library
|
||||
lib_LTLIBRARIES += libpcre.la
|
||||
libpcre_la_SOURCES = \
|
||||
pcre_compile.c \
|
||||
pcre_config.c \
|
||||
pcre_dfa_exec.c \
|
||||
pcre_exec.c \
|
||||
pcre_fullinfo.c \
|
||||
pcre_get.c \
|
||||
pcre_globals.c \
|
||||
pcre_info.c \
|
||||
pcre_internal.h \
|
||||
pcre_maketables.c \
|
||||
pcre_newline.c \
|
||||
pcre_ord2utf8.c \
|
||||
pcre_refcount.c \
|
||||
pcre_study.c \
|
||||
pcre_tables.c \
|
||||
pcre_try_flipped.c \
|
||||
pcre_ucd.c \
|
||||
pcre_valid_utf8.c \
|
||||
pcre_version.c \
|
||||
pcre_xclass.c \
|
||||
ucp.h
|
||||
|
||||
## This file is generated as part of the building process, so don't distribute.
|
||||
nodist_libpcre_la_SOURCES = \
|
||||
pcre_chartables.c
|
||||
|
||||
# The pcre_printint.src file is #included by some source files, so it must be
|
||||
# distributed. The pcre_chartables.c.dist file is the default version of
|
||||
# pcre_chartables.c, used unless --enable-rebuild-chartables is specified.
|
||||
EXTRA_DIST += pcre_printint.src pcre_chartables.c.dist
|
||||
|
||||
libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS)
|
||||
|
||||
CLEANFILES += pcre_chartables.c
|
||||
|
||||
## A version of the main pcre library that has a posix re API.
|
||||
lib_LTLIBRARIES += libpcreposix.la
|
||||
libpcreposix_la_SOURCES = \
|
||||
pcreposix.c
|
||||
libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
||||
libpcreposix_la_LIBADD = libpcre.la
|
||||
|
||||
## There's a C++ library as well.
|
||||
if WITH_PCRE_CPP
|
||||
|
||||
lib_LTLIBRARIES += libpcrecpp.la
|
||||
libpcrecpp_la_SOURCES = \
|
||||
pcrecpp_internal.h \
|
||||
pcrecpp.cc \
|
||||
pcre_scanner.cc \
|
||||
pcre_stringpiece.cc
|
||||
libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS)
|
||||
libpcrecpp_la_LIBADD = libpcre.la
|
||||
|
||||
TESTS += pcrecpp_unittest
|
||||
noinst_PROGRAMS += pcrecpp_unittest
|
||||
pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc
|
||||
pcrecpp_unittest_LDADD = libpcrecpp.la
|
||||
|
||||
TESTS += pcre_scanner_unittest
|
||||
noinst_PROGRAMS += pcre_scanner_unittest
|
||||
pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc
|
||||
pcre_scanner_unittest_LDADD = libpcrecpp.la
|
||||
|
||||
TESTS += pcre_stringpiece_unittest
|
||||
noinst_PROGRAMS += pcre_stringpiece_unittest
|
||||
pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc
|
||||
pcre_stringpiece_unittest_LDADD = libpcrecpp.la
|
||||
|
||||
endif # WITH_PCRE_CPP
|
||||
|
||||
## The main unit tests
|
||||
|
||||
# Each unit test is a binary plus a script that runs that binary in various
|
||||
# ways. We install these test binaries in case folks find it helpful.
|
||||
|
||||
TESTS += RunTest
|
||||
dist_noinst_SCRIPTS += RunTest
|
||||
EXTRA_DIST += RunTest.bat
|
||||
bin_PROGRAMS += pcretest
|
||||
pcretest_SOURCES = pcretest.c
|
||||
pcretest_LDADD = libpcreposix.la $(LIBREADLINE)
|
||||
|
||||
TESTS += RunGrepTest
|
||||
dist_noinst_SCRIPTS += RunGrepTest
|
||||
bin_PROGRAMS += pcregrep
|
||||
pcregrep_SOURCES = pcregrep.c
|
||||
pcregrep_LDADD = libpcreposix.la $(LIBZ) $(LIBBZ2)
|
||||
|
||||
EXTRA_DIST += \
|
||||
testdata/grepinput \
|
||||
testdata/grepinput8 \
|
||||
testdata/grepinputv \
|
||||
testdata/grepinputx \
|
||||
testdata/greplist \
|
||||
testdata/grepoutput \
|
||||
testdata/grepoutput8 \
|
||||
testdata/grepoutputN \
|
||||
testdata/testinput1 \
|
||||
testdata/testinput2 \
|
||||
testdata/testinput3 \
|
||||
testdata/testinput4 \
|
||||
testdata/testinput5 \
|
||||
testdata/testinput6 \
|
||||
testdata/testinput7 \
|
||||
testdata/testinput8 \
|
||||
testdata/testinput9 \
|
||||
testdata/testinput10 \
|
||||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
testdata/testoutput3 \
|
||||
testdata/testoutput4 \
|
||||
testdata/testoutput5 \
|
||||
testdata/testoutput6 \
|
||||
testdata/testoutput7 \
|
||||
testdata/testoutput8 \
|
||||
testdata/testoutput9 \
|
||||
testdata/testoutput10 \
|
||||
testdata/wintestinput3 \
|
||||
testdata/wintestoutput3 \
|
||||
perltest.pl
|
||||
|
||||
CLEANFILES += \
|
||||
testsavedregex \
|
||||
teststderr \
|
||||
testtry \
|
||||
testNinput
|
||||
|
||||
|
||||
# PCRE demonstration program. No longer built automatcally. The point is that
|
||||
# the users should build it themselves. So just distribute the source.
|
||||
# noinst_PROGRAMS += pcredemo
|
||||
# pcredemo_SOURCES = pcredemo.c
|
||||
# pcredemo_LDADD = libpcre.la
|
||||
|
||||
EXTRA_DIST += pcredemo.c
|
||||
|
||||
|
||||
## Utility rules, documentation, etc.
|
||||
|
||||
# A compatibility line, the old build system worked with 'make test'
|
||||
test: check ;
|
||||
|
||||
|
||||
# A PCRE user submitted the following addition, saying that it "will allow
|
||||
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
|
||||
# nice DLL for Windows use". (It is used by the pcre.dll target.)
|
||||
DLL_OBJS= pcre_compile.o pcre_config.o \
|
||||
pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
|
||||
pcre_globals.o pcre_info.o pcre_maketables.o \
|
||||
pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
|
||||
pcre_study.o pcre_tables.o pcre_try_flipped.o \
|
||||
pcre_ucd.o pcre_valid_utf8.o pcre_version.o \
|
||||
pcre_chartables.o \
|
||||
pcre_xclass.o
|
||||
|
||||
# A PCRE user submitted the following addition, saying that it "will allow
|
||||
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
|
||||
# nice DLL for Windows use".
|
||||
pcre.dll: $(DLL_OBJS)
|
||||
$(CC) -shared -o pcre.dll -Wl,"--strip-all" -Wl,"--export-all-symbols" $(DLL_OBJS)
|
||||
|
||||
|
||||
# We have .pc files for pkg-config users.
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = libpcre.pc
|
||||
if WITH_PCRE_CPP
|
||||
pkgconfig_DATA += libpcrecpp.pc
|
||||
endif
|
||||
|
||||
dist_man_MANS = \
|
||||
doc/pcre.3 \
|
||||
doc/pcre-config.1 \
|
||||
doc/pcre_compile.3 \
|
||||
doc/pcre_compile2.3 \
|
||||
doc/pcre_config.3 \
|
||||
doc/pcre_copy_named_substring.3 \
|
||||
doc/pcre_copy_substring.3 \
|
||||
doc/pcre_dfa_exec.3 \
|
||||
doc/pcre_exec.3 \
|
||||
doc/pcre_free_substring.3 \
|
||||
doc/pcre_free_substring_list.3 \
|
||||
doc/pcre_fullinfo.3 \
|
||||
doc/pcre_get_named_substring.3 \
|
||||
doc/pcre_get_stringnumber.3 \
|
||||
doc/pcre_get_stringtable_entries.3 \
|
||||
doc/pcre_get_substring.3 \
|
||||
doc/pcre_get_substring_list.3 \
|
||||
doc/pcre_info.3 \
|
||||
doc/pcre_maketables.3 \
|
||||
doc/pcre_refcount.3 \
|
||||
doc/pcre_study.3 \
|
||||
doc/pcre_version.3 \
|
||||
doc/pcreapi.3 \
|
||||
doc/pcrebuild.3 \
|
||||
doc/pcrecallout.3 \
|
||||
doc/pcrecompat.3 \
|
||||
doc/pcregrep.1 \
|
||||
doc/pcrematching.3 \
|
||||
doc/pcrepartial.3 \
|
||||
doc/pcrepattern.3 \
|
||||
doc/pcreperform.3 \
|
||||
doc/pcreposix.3 \
|
||||
doc/pcreprecompile.3 \
|
||||
doc/pcresample.3 \
|
||||
doc/pcrestack.3 \
|
||||
doc/pcresyntax.3 \
|
||||
doc/pcretest.1
|
||||
|
||||
pcrecpp_man = doc/pcrecpp.3
|
||||
EXTRA_DIST += $(pcrecpp_man)
|
||||
|
||||
if WITH_PCRE_CPP
|
||||
man_MANS = $(pcrecpp_man)
|
||||
endif
|
||||
|
||||
## CMake support
|
||||
|
||||
EXTRA_DIST += \
|
||||
cmake/COPYING-CMAKE-SCRIPTS \
|
||||
cmake/FindPackageHandleStandardArgs.cmake \
|
||||
cmake/FindReadline.cmake \
|
||||
CMakeLists.txt \
|
||||
config-cmake.h.in
|
||||
|
||||
## end Makefile.am
|
428
libs/pcre/NEWS
428
libs/pcre/NEWS
|
@ -1,428 +0,0 @@
|
|||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 7.9 11-Apr-09
|
||||
---------------------
|
||||
|
||||
Mostly bugfixes and tidies with just a couple of minor functional additions.
|
||||
|
||||
|
||||
Release 7.8 05-Sep-08
|
||||
---------------------
|
||||
|
||||
More bug fixes, plus a performance improvement in Unicode character property
|
||||
lookup.
|
||||
|
||||
|
||||
Release 7.7 07-May-08
|
||||
---------------------
|
||||
|
||||
This is once again mainly a bug-fix release, but there are a couple of new
|
||||
features.
|
||||
|
||||
|
||||
Release 7.6 28-Jan-08
|
||||
---------------------
|
||||
|
||||
The main reason for having this release so soon after 7.5 is because it fixes a
|
||||
potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In
|
||||
addition, the CMake configuration files have been brought up to date.
|
||||
|
||||
|
||||
Release 7.5 10-Jan-08
|
||||
---------------------
|
||||
|
||||
This is mainly a bug-fix release. However the ability to link pcregrep with
|
||||
libz or libbz2 and the ability to link pcretest with libreadline have been
|
||||
added. Also the --line-offsets and --file-offsets options were added to
|
||||
pcregrep.
|
||||
|
||||
|
||||
Release 7.4 21-Sep-07
|
||||
---------------------
|
||||
|
||||
The only change of specification is the addition of options to control whether
|
||||
\R matches any Unicode line ending (the default) or just CR, LF, and CRLF.
|
||||
Otherwise, the changes are bug fixes and a refactoring to reduce the number of
|
||||
relocations needed in a shared library. There have also been some documentation
|
||||
updates, in particular, some more information about using CMake to build PCRE
|
||||
has been added to the NON-UNIX-USE file.
|
||||
|
||||
|
||||
Release 7.3 28-Aug-07
|
||||
---------------------
|
||||
|
||||
Most changes are bug fixes. Some that are not:
|
||||
|
||||
1. There is some support for Perl 5.10's experimental "backtracking control
|
||||
verbs" such as (*PRUNE).
|
||||
|
||||
2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
|
||||
restrictive in the strings it accepts.
|
||||
|
||||
3. Checking for potential integer overflow has been made more dynamic, and as a
|
||||
consequence there is no longer a hard limit on the size of a subpattern that
|
||||
has a limited repeat count.
|
||||
|
||||
4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
|
||||
no longer advance by two characters instead of one when an unanchored match
|
||||
fails at CRLF if there are explicit CR or LF matches within the pattern.
|
||||
This gets rid of some anomalous effects that previously occurred.
|
||||
|
||||
5. Some PCRE-specific settings for varying the newline options at the start of
|
||||
a pattern have been added.
|
||||
|
||||
|
||||
Release 7.2 19-Jun-07
|
||||
---------------------
|
||||
|
||||
WARNING: saved patterns that were compiled by earlier versions of PCRE must be
|
||||
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
|
||||
and \V).
|
||||
|
||||
Correction to the notes for 7.1: the note about shared libraries for Windows is
|
||||
wrong. Previously, three libraries were built, but each could function
|
||||
independently. For example, the pcreposix library also included all the
|
||||
functions from the basic pcre library. The change is that the three libraries
|
||||
are no longer independent. They are like the Unix libraries. To use the
|
||||
pcreposix functions, for example, you need to link with both the pcreposix and
|
||||
the basic pcre library.
|
||||
|
||||
Some more features from Perl 5.10 have been added:
|
||||
|
||||
(?-n) and (?+n) relative references for recursion and subroutines.
|
||||
|
||||
(?(-n) and (?(+n) relative references as conditions.
|
||||
|
||||
\k{name} and \g{name} are synonyms for \k<name>.
|
||||
|
||||
\K to reset the start of the matched string; for example, (foo)\Kbar
|
||||
matches bar preceded by foo, but only sets bar as the matched string.
|
||||
|
||||
(?| introduces a group where the capturing parentheses in each alternative
|
||||
start from the same number; for example, (?|(abc)|(xyz)) sets capturing
|
||||
parentheses number 1 in both cases.
|
||||
|
||||
\h, \H, \v, \V match horizontal and vertical whitespace, respectively.
|
||||
|
||||
|
||||
Release 7.1 24-Apr-07
|
||||
---------------------
|
||||
|
||||
There is only one new feature in this release: a linebreak setting of
|
||||
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
|
||||
recognizes only CRLF, CR, and LF as linebreaks.
|
||||
|
||||
A few bugs are fixed (see ChangeLog for details), but the major change is a
|
||||
complete re-implementation of the build system. This now has full Autotools
|
||||
support and so is now "standard" in some sense. It should help with compiling
|
||||
PCRE in a wide variety of environments.
|
||||
|
||||
NOTE: when building shared libraries for Windows, three dlls are now built,
|
||||
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
|
||||
included in a single dll.
|
||||
|
||||
Another important change is that the dftables auxiliary program is no longer
|
||||
compiled and run at "make" time by default. Instead, a default set of character
|
||||
tables (assuming ASCII coding) is used. If you want to use dftables to generate
|
||||
the character tables as previously, add --enable-rebuild-chartables to the
|
||||
"configure" command. You must do this if you are compiling PCRE to run on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
There is a discussion about character tables in the README file. The default is
|
||||
not to use dftables so that that there is no problem when cross-compiling.
|
||||
|
||||
|
||||
Release 7.0 19-Dec-06
|
||||
---------------------
|
||||
|
||||
This release has a new major number because there have been some internal
|
||||
upheavals to facilitate the addition of new optimizations and other facilities,
|
||||
and to make subsequent maintenance and extension easier. Compilation is likely
|
||||
to be a bit slower, but there should be no major effect on runtime performance.
|
||||
Previously compiled patterns are NOT upwards compatible with this release. If
|
||||
you have saved compiled patterns from a previous release, you will have to
|
||||
re-compile them. Important changes that are visible to users are:
|
||||
|
||||
1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
|
||||
some more scripts.
|
||||
|
||||
2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
|
||||
sequence as a newline.
|
||||
|
||||
3. The \R escape matches a single Unicode newline sequence as a single unit.
|
||||
|
||||
4. New features that will appear in Perl 5.10 are now in PCRE. These include
|
||||
alternative Perl syntax for named parentheses, and Perl syntax for
|
||||
recursion.
|
||||
|
||||
5. The C++ wrapper interface has been extended by the addition of a
|
||||
QuoteMeta function and the ability to allow copy construction and
|
||||
assignment.
|
||||
|
||||
For a complete list of changes, see the ChangeLog file.
|
||||
|
||||
|
||||
Release 6.7 04-Jul-06
|
||||
---------------------
|
||||
|
||||
The main additions to this release are the ability to use the same name for
|
||||
multiple sets of parentheses, and support for CRLF line endings in both the
|
||||
library and pcregrep (and in pcretest for testing).
|
||||
|
||||
Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
|
||||
significantly reduced for certain subject strings.
|
||||
|
||||
|
||||
Release 6.5 01-Feb-06
|
||||
---------------------
|
||||
|
||||
Important changes in this release:
|
||||
|
||||
1. A number of new features have been added to pcregrep.
|
||||
|
||||
2. The Unicode property tables have been updated to Unicode 4.1.0, and the
|
||||
supported properties have been extended with script names such as "Arabic",
|
||||
and the derived properties "Any" and "L&". This has necessitated a change to
|
||||
the interal format of compiled patterns. Any saved compiled patterns that
|
||||
use \p or \P must be recompiled.
|
||||
|
||||
3. The specification of recursion in patterns has been changed so that all
|
||||
recursive subpatterns are automatically treated as atomic groups. Thus, for
|
||||
example, (?R) is treated as if it were (?>(?R)). This is necessary because
|
||||
otherwise there are situations where recursion does not work.
|
||||
|
||||
See the ChangeLog for a complete list of changes, which include a number of bug
|
||||
fixes and tidies.
|
||||
|
||||
|
||||
Release 6.0 07-Jun-05
|
||||
---------------------
|
||||
|
||||
The release number has been increased to 6.0 because of the addition of several
|
||||
major new pieces of functionality.
|
||||
|
||||
A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
|
||||
algorithm, has been added. This has a number of advantages for certain cases,
|
||||
though it does run more slowly, and lacks the ability to capture substrings. On
|
||||
the other hand, it does find all matches, not just the first, and it works
|
||||
better for partial matching. The pcrematching man page discusses the
|
||||
differences.
|
||||
|
||||
The pcretest program has been enhanced so that it can make use of the new
|
||||
pcre_dfa_exec() matching function and the extra features it provides.
|
||||
|
||||
The distribution now includes a C++ wrapper library. This is built
|
||||
automatically if a C++ compiler is found. The pcrecpp man page discusses this
|
||||
interface.
|
||||
|
||||
The code itself has been re-organized into many more files, one for each
|
||||
function, so it no longer requires everything to be linked in when static
|
||||
linkage is used. As a consequence, some internal functions have had to have
|
||||
their names exposed. These functions all have names starting with _pcre_. They
|
||||
are undocumented, and are not intended for use by outside callers.
|
||||
|
||||
The pcregrep program has been enhanced with new functionality such as
|
||||
multiline-matching and options for output more matching context. See the
|
||||
ChangeLog for a complete list of changes to the library and the utility
|
||||
programs.
|
||||
|
||||
|
||||
Release 5.0 13-Sep-04
|
||||
---------------------
|
||||
|
||||
The licence under which PCRE is released has been changed to the more
|
||||
conventional "BSD" licence.
|
||||
|
||||
In the code, some bugs have been fixed, and there are also some major changes
|
||||
in this release (which is why I've increased the number to 5.0). Some changes
|
||||
are internal rearrangements, and some provide a number of new facilities. The
|
||||
new features are:
|
||||
|
||||
1. There's an "automatic callout" feature that inserts callouts before every
|
||||
item in the regex, and there's a new callout field that gives the position
|
||||
in the pattern - useful for debugging and tracing.
|
||||
|
||||
2. The extra_data structure can now be used to pass in a set of character
|
||||
tables at exec time. This is useful if compiled regex are saved and re-used
|
||||
at a later time when the tables may not be at the same address. If the
|
||||
default internal tables are used, the pointer saved with the compiled
|
||||
pattern is now set to NULL, which means that you don't need to do anything
|
||||
special unless you are using custom tables.
|
||||
|
||||
3. It is possible, with some restrictions on the content of the regex, to
|
||||
request "partial" matching. A special return code is given if all of the
|
||||
subject string matched part of the regex. This could be useful for testing
|
||||
an input field as it is being typed.
|
||||
|
||||
4. There is now some optional support for Unicode character properties, which
|
||||
means that the patterns items such as \p{Lu} and \X can now be used. Only
|
||||
the general category properties are supported. If PCRE is compiled with this
|
||||
support, an additional 90K data structure is include, which increases the
|
||||
size of the library dramatically.
|
||||
|
||||
5. There is support for saving compiled patterns and re-using them later.
|
||||
|
||||
6. There is support for running regular expressions that were compiled on a
|
||||
different host with the opposite endianness.
|
||||
|
||||
7. The pcretest program has been extended to accommodate the new features.
|
||||
|
||||
The main internal rearrangement is that sequences of literal characters are no
|
||||
longer handled as strings. Instead, each character is handled on its own. This
|
||||
makes some UTF-8 handling easier, and makes the support of partial matching
|
||||
possible. Compiled patterns containing long literal strings will be larger as a
|
||||
result of this change; I hope that performance will not be much affected.
|
||||
|
||||
|
||||
Release 4.5 01-Dec-03
|
||||
---------------------
|
||||
|
||||
Again mainly a bug-fix and tidying release, with only a couple of new features:
|
||||
|
||||
1. It's possible now to compile PCRE so that it does not use recursive
|
||||
function calls when matching. Instead it gets memory from the heap. This slows
|
||||
things down, but may be necessary on systems with limited stacks.
|
||||
|
||||
2. UTF-8 string checking has been tightened to reject overlong sequences and to
|
||||
check that a starting offset points to the start of a character. Failure of the
|
||||
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
|
||||
|
||||
3. PCRE can now be compiled for systems that use EBCDIC code.
|
||||
|
||||
|
||||
Release 4.4 21-Aug-03
|
||||
---------------------
|
||||
|
||||
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
|
||||
checks UTF-8 strings for validity by default. There is an option to suppress
|
||||
this, just in case anybody wants that teeny extra bit of performance.
|
||||
|
||||
|
||||
Releases 4.1 - 4.3
|
||||
------------------
|
||||
|
||||
Sorry, I forgot about updating the NEWS file for these releases. Please take a
|
||||
look at ChangeLog.
|
||||
|
||||
|
||||
Release 4.0 17-Feb-03
|
||||
---------------------
|
||||
|
||||
There have been a lot of changes for the 4.0 release, adding additional
|
||||
functionality and mending bugs. Below is a list of the highlights of the new
|
||||
functionality. For full details of these features, please consult the
|
||||
documentation. For a complete list of changes, see the ChangeLog file.
|
||||
|
||||
1. Support for Perl's \Q...\E escapes.
|
||||
|
||||
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
|
||||
package. They provide some syntactic sugar for simple cases of "atomic
|
||||
grouping".
|
||||
|
||||
3. Support for the \G assertion. It is true when the current matching position
|
||||
is at the start point of the match.
|
||||
|
||||
4. A new feature that provides some of the functionality that Perl provides
|
||||
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
|
||||
is for the caller to provide an optional function, by setting pcre_callout to
|
||||
its entry point. To get the function called, the regex must include (?C) at
|
||||
appropriate points.
|
||||
|
||||
5. Support for recursive calls to individual subpatterns. This makes it really
|
||||
easy to get totally confused.
|
||||
|
||||
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
|
||||
name a group.
|
||||
|
||||
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
|
||||
option for pcregrep to make it operate in UTF-8 mode.
|
||||
|
||||
8. The single man page has been split into a number of separate man pages.
|
||||
These also give rise to individual HTML pages which are put in a separate
|
||||
directory. There is an index.html page that lists them all. Some hyperlinking
|
||||
between the pages has been installed.
|
||||
|
||||
|
||||
Release 3.5 15-Aug-01
|
||||
---------------------
|
||||
|
||||
1. The configuring system has been upgraded to use later versions of autoconf
|
||||
and libtool. By default it builds both a shared and a static library if the OS
|
||||
supports it. You can use --disable-shared or --disable-static on the configure
|
||||
command if you want only one of them.
|
||||
|
||||
2. The pcretest utility is now installed along with pcregrep because it is
|
||||
useful for users (to test regexs) and by doing this, it automatically gets
|
||||
relinked by libtool. The documentation has been turned into a man page, so
|
||||
there are now .1, .txt, and .html versions in /doc.
|
||||
|
||||
3. Upgrades to pcregrep:
|
||||
(i) Added long-form option names like gnu grep.
|
||||
(ii) Added --help to list all options with an explanatory phrase.
|
||||
(iii) Added -r, --recursive to recurse into sub-directories.
|
||||
(iv) Added -f, --file to read patterns from a file.
|
||||
|
||||
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
|
||||
script, to force use of CR or LF instead of \n in the source. On non-Unix
|
||||
systems, the value can be set in config.h.
|
||||
|
||||
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
|
||||
absolute limit. Changed the text of the error message to make this clear, and
|
||||
likewise updated the man page.
|
||||
|
||||
6. The limit of 99 on the number of capturing subpatterns has been removed.
|
||||
The new limit is 65535, which I hope will not be a "real" limit.
|
||||
|
||||
|
||||
Release 3.3 01-Aug-00
|
||||
---------------------
|
||||
|
||||
There is some support for UTF-8 character strings. This is incomplete and
|
||||
experimental. The documentation describes what is and what is not implemented.
|
||||
Otherwise, this is just a bug-fixing release.
|
||||
|
||||
|
||||
Release 3.0 01-Feb-00
|
||||
---------------------
|
||||
|
||||
1. A "configure" script is now used to configure PCRE for Unix systems. It
|
||||
builds a Makefile, a config.h file, and the pcre-config script.
|
||||
|
||||
2. PCRE is built as a shared library by default.
|
||||
|
||||
3. There is support for POSIX classes such as [:alpha:].
|
||||
|
||||
5. There is an experimental recursion feature.
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
|
||||
|
||||
Please note that there has been a change in the API such that a larger
|
||||
ovector is required at matching time, to provide some additional workspace.
|
||||
The new man page has details. This change was necessary in order to support
|
||||
some of the new functionality in Perl 5.005.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
|
||||
|
||||
Another (I hope this is the last!) change has been made to the API for the
|
||||
pcre_compile() function. An additional argument has been added to make it
|
||||
possible to pass over a pointer to character tables built in the current
|
||||
locale by pcre_maketables(). To use the default tables, this new arguement
|
||||
should be passed as NULL.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||
|
||||
Yet another (and again I hope this really is the last) change has been made
|
||||
to the API for the pcre_exec() function. An additional argument has been
|
||||
added to make it possible to start the match other than at the start of the
|
||||
subject string. This is important if there are lookbehinds. The new man
|
||||
page has the details, but you just want to convert existing programs, all
|
||||
you need to do is to stick in a new fifth argument to pcre_exec(), with a
|
||||
value of zero. For example, change
|
||||
|
||||
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
|
||||
to
|
||||
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
|
||||
|
||||
****
|
|
@ -1,448 +0,0 @@
|
|||
Compiling PCRE on non-Unix systems
|
||||
----------------------------------
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE C library
|
||||
The C++ wrapper functions
|
||||
Building for virtual Pascal
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE on Windows with CMake
|
||||
Use of relative paths with CMake on Windows
|
||||
Testing with runtest.bat
|
||||
Building under Windows with BCC5.5
|
||||
Building PCRE on OpenVMS
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
||||
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||
anything other than Unix-like systems are untested by me.
|
||||
|
||||
There are some other comments and files (including some documentation in CHM
|
||||
format) in the Contrib directory on the FTP site:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
If you want to compile PCRE for a non-Unix system (especially for a system that
|
||||
does not support "configure" and "make" files), note that the basic PCRE
|
||||
library consists entirely of code written in Standard C, and so should compile
|
||||
successfully on any system that has a Standard C compiler and library. The C++
|
||||
wrapper functions are a separate issue (see below).
|
||||
|
||||
The PCRE distribution includes a "configure" file for use by the Configure/Make
|
||||
build system, as found in many Unix-like environments. There is also support
|
||||
support for CMake, which some users prefer, in particular in Windows
|
||||
environments. There are some instructions for CMake under Windows in the
|
||||
section entitled "Building PCRE with CMake" below. CMake can also be used to
|
||||
build PCRE in Unix-like systems.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||
|
||||
The following are generic comments about building the PCRE C library "by hand".
|
||||
|
||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||
settings that it contains to whatever is appropriate for your environment.
|
||||
In particular, if you want to force a specific value for newline, you can
|
||||
define the NEWLINE macro. When you compile any of the PCRE modules, you
|
||||
must specify -DHAVE_CONFIG_H to your compiler so that config.h is included
|
||||
in the sources.
|
||||
|
||||
An alternative approach is not to edit config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in config.h are used has changed between releases. (In the configure/make
|
||||
world, this is handled automatically.) When upgrading to a new release,
|
||||
you are strongly advised to review config.h.generic before re-using what
|
||||
you had previously.
|
||||
|
||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
|
||||
you have set up config.h), and then run it with the single argument
|
||||
"pcre_chartables.c". This generates a set of standard character tables
|
||||
and writes them to that file. The tables are generated using the default
|
||||
C locale for your system. If you want to use a locale that is specified
|
||||
by LC_xxx environment variables, add the -L option to the dftables
|
||||
command. You must use this method if you are building on a system that
|
||||
uses EBCDIC code.
|
||||
|
||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) Ensure that you have the following header files:
|
||||
|
||||
pcre_internal.h
|
||||
ucp.h
|
||||
|
||||
(5) Also ensure that you have the following file, which is #included as source
|
||||
when building a debugging version of PCRE, and is also used by pcretest.
|
||||
|
||||
pcre_printint.src
|
||||
|
||||
(6) Compile the following source files, setting -DHAVE_CONFIG_H as a compiler
|
||||
option if you have set up config.h with your configuration, or else use
|
||||
other -D settings to change the configuration as required.
|
||||
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucd.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE header files are first
|
||||
sought in the current directory. Otherwise you run the risk of picking up
|
||||
a previously-installed file from somewhere else.
|
||||
|
||||
(7) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE C library. If
|
||||
your system has static and shared libraries, you may have to do this once
|
||||
for each type.
|
||||
|
||||
(8) Similarly, compile pcreposix.c (remembering -DHAVE_CONFIG_H if necessary)
|
||||
and link the result (on its own) as the pcreposix library.
|
||||
|
||||
(9) Compile the test program pcretest.c (again, don't forget -DHAVE_CONFIG_H).
|
||||
This needs the functions in the pcre and pcreposix libraries when linking.
|
||||
It also needs the pcre_printint.src source file, which it #includes.
|
||||
|
||||
(10) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. Note that the
|
||||
supplied files are in Unix format, with just LF characters as line
|
||||
terminators. You may need to edit them to change this if your system uses
|
||||
a different convention. If you are using Windows, you probably should use
|
||||
the wintestinput3 file instead of testinput3 (and the corresponding output
|
||||
file). This is a locale test; wintestinput3 sets the locale to "french"
|
||||
rather than "fr_FR", and there some minor output differences.
|
||||
|
||||
(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library (it does not need the pcreposix library).
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
The PCRE distribution also contains some C++ wrapper functions and tests,
|
||||
contributed by Google Inc. On a system that can use "configure" and "make",
|
||||
the functions are automatically built into a library called pcrecpp. It should
|
||||
be straightforward to compile the .cc files manually on other systems. The
|
||||
files called xxx_unittest.cc are test programs for each of the corresponding
|
||||
xxx.cc files.
|
||||
|
||||
|
||||
BUILDING FOR VIRTUAL PASCAL
|
||||
|
||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
||||
additional files. The following files in the distribution are for building PCRE
|
||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. The
|
||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
||||
be too small for some pattern/subject combinations.
|
||||
|
||||
PCRE has a compile configuration option to disable the use of stack for
|
||||
recursion so that heap is used instead. However, pattern matching is
|
||||
significantly slower when this is done. There is more about stack usage in the
|
||||
"pcrestack" documentation.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE library in the form of
|
||||
a non-dll .a file, you must define PCRE_STATIC before including pcre.h,
|
||||
otherwise the pcre_malloc() and pcre_free() exported functions will be declared
|
||||
__declspec(dllimport), with unwanted results.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
|
||||
|
||||
There are two ways of building PCRE using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE under Windows. However, the tests are not run
|
||||
automatically when CMake is used.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
The Cygwin DLL currently works with all recent, commercially released x86 32
|
||||
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
||||
|
||||
On both MinGW and Cygwin, PCRE should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre and libpcreposix, and, if you
|
||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
||||
independent libraries: when you like with libpcreposix or libpcrecpp you must
|
||||
also link with libpcre, which contains the basic functions. (Some earlier
|
||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
||||
longer happens.)
|
||||
|
||||
A user submitted a special-purpose patch that makes it easy to create
|
||||
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
|
||||
as a special target. If you use this target, no other files are built, and in
|
||||
particular, the pcretest and pcregrep programs are not built. An example of how
|
||||
this might be used is:
|
||||
|
||||
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE are in Unix format, with LF
|
||||
characters as line terminators. It may be necessary to change the line
|
||||
terminators in order to get some of the tests to work. We hope to improve
|
||||
things in this area in future.
|
||||
|
||||
|
||||
BUILDING PCRE ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative build facility that can be used instead of the
|
||||
traditional Unix "configure". CMake version 2.4.7 supports Borland makefiles,
|
||||
MinGW makefiles, MSYS makefiles, NMake makefiles, UNIX makefiles, Visual Studio
|
||||
6, Visual Studio 7, Visual Studio 8, and Watcom W8. The following instructions
|
||||
were contributed by a PCRE user.
|
||||
|
||||
1. Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
|
||||
that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE source tree into a source
|
||||
directory such as C:\pcre.
|
||||
|
||||
3. Create a new, empty build directory: C:\pcre\build\
|
||||
|
||||
4. Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
|
||||
for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
|
||||
|
||||
5. Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
|
||||
directories, respectively
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual Studio,
|
||||
MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where you can
|
||||
enable UTF-8 support, etc.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "OK" button should now be active.
|
||||
|
||||
10. Hit "OK".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc.
|
||||
|
||||
|
||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||
|
||||
A PCRE user comments as follows:
|
||||
|
||||
I thought that others may want to know the current state of
|
||||
CMAKE_USE_RELATIVE_PATHS support on Windows.
|
||||
|
||||
Here it is:
|
||||
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||
first path - see below)
|
||||
-- Only some of the contained file paths are modified - shown below for
|
||||
pcre.vcproj
|
||||
-- It properly modifies
|
||||
|
||||
I am sure CMake people can fix that if they want to. Until then one will
|
||||
need to replace existing absolute paths in project files with relative
|
||||
paths manually (e.g. from VS) - relative to project file location. I did
|
||||
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
|
||||
deal.
|
||||
|
||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||
|
||||
RelativePath="pcre.h">
|
||||
RelativePath="pcre_chartables.c">
|
||||
RelativePath="pcre_chartables.c.rule">
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcretest.exe has been created.
|
||||
|
||||
2. Edit RunTest.bat and insert a line that indentifies the relative location of
|
||||
the pcre source, e.g.:
|
||||
|
||||
set srcdir=..\pcre-7.4-RC3
|
||||
|
||||
3. Run RunTest.bat from a command shell environment. Test outputs will
|
||||
automatically be compared to expected results, and discrepancies will
|
||||
identified in the console output.
|
||||
|
||||
4. To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
||||
pcre_scanner_unittest.exe.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||
|
||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
|
||||
Some of the core BCC libraries have a version of PCRE from 1998 built in,
|
||||
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
|
||||
version mismatch. I'm including an easy workaround below, if you'd like to
|
||||
include it in the non-unix instructions:
|
||||
|
||||
When linking a project with BCC5.5, pcre.lib must be included before any of
|
||||
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
||||
line.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||
|
||||
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
||||
site.
|
||||
|
||||
|
||||
BUILDING PCRE ON OPENVMS
|
||||
|
||||
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
|
||||
relate to an older version of PCRE that used fewer source files, so the exact
|
||||
commands will need changing. See the current list of source files above.
|
||||
|
||||
"It was quite easy to compile and link the library. I don't have a formal
|
||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||
commands I used to build the library. I had to add #define
|
||||
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
|
||||
|
||||
The library was built on:
|
||||
O/S: HP OpenVMS v7.3-1
|
||||
Compiler: Compaq C v6.5-001-48BCD
|
||||
Linker: vA13-01
|
||||
|
||||
The test results did not match 100% due to the issues you mention in your
|
||||
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
|
||||
modified some of the character tables temporarily and was able to get the
|
||||
results to match. Tests using the fr locale did not match since I don't have
|
||||
that locale loaded. The study size was always reported to be 3 less than the
|
||||
value in the standard test output files."
|
||||
|
||||
=========================
|
||||
$! This DCL procedure builds PCRE on OpenVMS
|
||||
$!
|
||||
$! I followed the instructions in the non-unix-use file in the distribution.
|
||||
$!
|
||||
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
|
||||
$ COMPILE DFTABLES.C
|
||||
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
|
||||
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
|
||||
$ COMPILE MAKETABLES.C
|
||||
$ COMPILE GET.C
|
||||
$ COMPILE STUDY.C
|
||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||
$! did not seem to be defined anywhere.
|
||||
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
|
||||
$ COMPILE PCRE.C
|
||||
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
|
||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||
$! did not seem to be defined anywhere.
|
||||
$ COMPILE PCREPOSIX.C
|
||||
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
|
||||
$ COMPILE PCRETEST.C
|
||||
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
|
||||
$! C programs that want access to command line arguments must be
|
||||
$! defined as a symbol
|
||||
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
|
||||
$! Arguments must be enclosed in quotes.
|
||||
$ PCRETEST "-C"
|
||||
$! Test results:
|
||||
$!
|
||||
$! The test results did not match 100%. The functions isprint(), iscntrl(),
|
||||
$! isgraph() and ispunct() on OpenVMS must not produce the same results
|
||||
$! as the system that built the test output files provided with the
|
||||
$! distribution.
|
||||
$!
|
||||
$! The study size did not match and was always 3 less on OpenVMS.
|
||||
$!
|
||||
$! Locale could not be set to fr
|
||||
$!
|
||||
=========================
|
||||
|
||||
Last Updated: 17 March 2009
|
||||
****
|
|
@ -1,214 +0,0 @@
|
|||
#/bin/sh
|
||||
|
||||
# Script to prepare the files for building a PCRE release. It does some
|
||||
# processing of the documentation, detrails files, and creates pcre.h.generic
|
||||
# and config.h.generic (for use by builders who can't run ./configure).
|
||||
|
||||
# You must run this script before runnning "make dist". It makes use of the
|
||||
# following files:
|
||||
|
||||
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
||||
# is called from MakeRelease. It "knows" the relevant troff
|
||||
# constructs that are used in the PCRE man pages.
|
||||
|
||||
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
||||
# removing backspaces and other redundant text so as to produce
|
||||
# a readable .txt file.
|
||||
|
||||
# Detrail A Perl script that removes trailing spaces from files.
|
||||
|
||||
# doc/index.html.src
|
||||
# A file that is copied as index.html into the doc/html directory
|
||||
# when the HTML documentation is built. It works like this so that
|
||||
# doc/html can be deleted and re-created from scratch.
|
||||
|
||||
|
||||
# First, sort out the documentation
|
||||
|
||||
cd doc
|
||||
echo Processing documentation
|
||||
|
||||
# Make Text form of the documentation. It needs some mangling to make it
|
||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
||||
# individual function pages.
|
||||
|
||||
cat <<End >pcre.txt
|
||||
-----------------------------------------------------------------------------
|
||||
This file contains a concatenation of the PCRE man pages, converted to plain
|
||||
text format for ease of searching with a text editor, or for use on systems
|
||||
that do not have a man page processor. The small individual files that give
|
||||
synopses of each function in the library have not been included. There are
|
||||
separate text files for the pcregrep and pcretest commands.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
End
|
||||
|
||||
echo "Making pcre.txt"
|
||||
for file in pcre pcrebuild pcrematching pcreapi pcrecallout pcrecompat \
|
||||
pcrepattern pcresyntax pcrepartial pcreprecompile \
|
||||
pcreperform pcreposix pcrecpp pcresample pcrestack ; do
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
../CleanTxt <$file.rawtxt >>pcre.txt
|
||||
/bin/rm $file.rawtxt
|
||||
echo "------------------------------------------------------------------------------" >>pcre.txt
|
||||
if [ "$file" != "pcresample" ] ; then
|
||||
echo " " >>pcre.txt
|
||||
echo " " >>pcre.txt
|
||||
fi
|
||||
done
|
||||
|
||||
# The three commands
|
||||
for file in pcretest pcregrep pcre-config ; do
|
||||
echo Making $file.txt
|
||||
nroff -c -man $file.1 >$file.rawtxt
|
||||
../CleanTxt <$file.rawtxt >$file.txt
|
||||
/bin/rm $file.rawtxt
|
||||
done
|
||||
|
||||
|
||||
# Make HTML form of the documentation.
|
||||
|
||||
echo "Making HTML documentation"
|
||||
/bin/rm html/*
|
||||
cp index.html.src html/index.html
|
||||
|
||||
for file in *.1 ; do
|
||||
base=`basename $file .1`
|
||||
echo " Making $base.html"
|
||||
../132html -toc $base <$file >html/$base.html
|
||||
done
|
||||
|
||||
# Exclude table of contents for function summaries. It seems that expr
|
||||
# forces an anchored regex. Also exclude them for small pages that have
|
||||
# only one section.
|
||||
for file in *.3 ; do
|
||||
base=`basename $file .3`
|
||||
toc=-toc
|
||||
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
||||
if [ "$base" = "pcresample" ] || \
|
||||
[ "$base" = "pcrestack" ] || \
|
||||
[ "$base" = "pcrecompat" ] || \
|
||||
[ "$base" = "pcreperform" ] ; then
|
||||
toc=""
|
||||
fi
|
||||
echo " Making $base.html"
|
||||
../132html $toc $base <$file >html/$base.html
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
done
|
||||
|
||||
# End of documentation processing
|
||||
|
||||
cd ..
|
||||
echo Documentation done
|
||||
|
||||
# These files are detrailed; do not detrail the test data because there may be
|
||||
# significant trailing spaces. The configure files are also omitted from the
|
||||
# detrailing.
|
||||
|
||||
files="\
|
||||
Makefile.am \
|
||||
Makefile.in \
|
||||
configure.ac \
|
||||
README \
|
||||
LICENCE \
|
||||
COPYING \
|
||||
AUTHORS \
|
||||
NEWS \
|
||||
NON-UNIX-USE \
|
||||
INSTALL \
|
||||
132html \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
ChangeLog \
|
||||
CMakeLists.txt \
|
||||
RunGrepTest \
|
||||
RunTest \
|
||||
RunTest.bat \
|
||||
pcre-config.in \
|
||||
libpcre.pc.in \
|
||||
libpcrecpp.pc.in \
|
||||
config.h.in \
|
||||
pcre_printint.src \
|
||||
pcre_chartables.c.dist \
|
||||
pcredemo.c \
|
||||
pcregrep.c \
|
||||
pcretest.c \
|
||||
dftables.c \
|
||||
pcreposix.c \
|
||||
pcreposix.h \
|
||||
pcre.h.in \
|
||||
pcre_internal.h
|
||||
pcre_compile.c \
|
||||
pcre_config.c \
|
||||
pcre_dfa_exec.c \
|
||||
pcre_exec.c \
|
||||
pcre_fullinfo.c \
|
||||
pcre_get.c \
|
||||
pcre_globals.c \
|
||||
pcre_info.c \
|
||||
pcre_maketables.c \
|
||||
pcre_newline.c \
|
||||
pcre_ord2utf8.c \
|
||||
pcre_refcount.c \
|
||||
pcre_study.c \
|
||||
pcre_tables.c \
|
||||
pcre_try_flipped.c \
|
||||
pcre_ucp_searchfuncs.c \
|
||||
pcre_valid_utf8.c \
|
||||
pcre_version.c \
|
||||
pcre_xclass.c \
|
||||
pcre_scanner.cc \
|
||||
pcre_scanner.h \
|
||||
pcre_scanner_unittest.cc \
|
||||
pcrecpp.cc \
|
||||
pcrecpp.h \
|
||||
pcrecpparg.h.in \
|
||||
pcrecpp_unittest.cc \
|
||||
pcre_stringpiece.cc \
|
||||
pcre_stringpiece.h.in \
|
||||
pcre_stringpiece_unittest.cc \
|
||||
perltest.pl \
|
||||
ucp.h \
|
||||
ucpinternal.h \
|
||||
ucptable.h \
|
||||
makevp.bat \
|
||||
pcre.def \
|
||||
libpcre.def \
|
||||
libpcreposix.def"
|
||||
|
||||
echo Detrailing
|
||||
./Detrail $files doc/p* doc/html/*
|
||||
|
||||
echo Doing basic configure to get default pcre.h and config.h
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
./configure >/dev/null
|
||||
|
||||
echo Converting pcre.h and config.h to generic forms
|
||||
cp -f pcre.h pcre.h.generic
|
||||
|
||||
perl <<'END'
|
||||
open(IN, "<config.h") || die "Can't open config.h: $!\n";
|
||||
open(OUT, ">config.h.generic") || die "Can't open config.h.generic: $!\n";
|
||||
while (<IN>)
|
||||
{
|
||||
if (/^#define\s(?!PACKAGE)(\w+)/)
|
||||
{
|
||||
print OUT "#ifndef $1\n";
|
||||
print OUT;
|
||||
print OUT "#endif\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
print OUT;
|
||||
}
|
||||
}
|
||||
close IN;
|
||||
close OUT;
|
||||
END
|
||||
|
||||
echo Done
|
||||
|
||||
#End
|
767
libs/pcre/README
767
libs/pcre/README
|
@ -1,767 +0,0 @@
|
|||
README file for PCRE (Perl-compatible regular expression library)
|
||||
-----------------------------------------------------------------
|
||||
|
||||
The latest release of PCRE is always available in three alternative formats
|
||||
from:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
|
||||
pcre-dev@exim.org
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE APIs
|
||||
Documentation for PCRE
|
||||
Contributions by users of PCRE
|
||||
Building PCRE on non-Unix systems
|
||||
Building PCRE on Unix-like systems
|
||||
Retrieving configuration information on Unix-like systems
|
||||
Shared libraries on Unix-like systems
|
||||
Cross-compiling on Unix-like systems
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
Making new tarballs
|
||||
Testing PCRE
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE APIs
|
||||
-------------
|
||||
|
||||
PCRE is written in C, and it has its own API. The distribution also includes a
|
||||
set of C++ wrapper functions (see the pcrecpp man page for details), courtesy
|
||||
of Google Inc.
|
||||
|
||||
In addition, there is a set of C wrapper functions that are based on the POSIX
|
||||
regular expression API (see the pcreposix man page). These end up in the
|
||||
library called libpcreposix. Note that this just provides a POSIX calling
|
||||
interface to PCRE; the regular expressions themselves still follow Perl syntax
|
||||
and semantics. The POSIX API is restricted, and does not give full access to
|
||||
all of PCRE's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcreposix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE with
|
||||
an existing program that uses the POSIX API, pcreposix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
||||
library installed on your system, as well as worrying about the regex.h header
|
||||
file (as mentioned above), you must also take care when linking programs to
|
||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
||||
up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE with the addition of
|
||||
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE
|
||||
----------------------
|
||||
|
||||
If you install PCRE in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre". The one that is just
|
||||
called "pcre" lists all the others. In addition to these man pages, the PCRE
|
||||
documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||
doc/pcretest.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except
|
||||
those that summarize individual functions. The other two are the text
|
||||
forms of the section 1 man pages for the pcregrep and pcretest commands.
|
||||
These text forms are provided for ease of scanning with text editors or
|
||||
similar tools. They are installed in <prefix>/share/doc/pcre, where
|
||||
<prefix> is the installation prefix (defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre/html.
|
||||
|
||||
Users of PCRE have contributed files containing the documentation for various
|
||||
releases in CHM format. These can be found in the Contrib directory of the FTP
|
||||
site (see next section).
|
||||
|
||||
|
||||
Contributions by users of PCRE
|
||||
------------------------------
|
||||
|
||||
You can find contributions from PCRE users in the directory
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
There is a README file giving brief descriptions of what they are. Some are
|
||||
complete in themselves; others are pointers to URLs containing relevant files.
|
||||
Some of this material is likely to be well out-of-date. Several of the earlier
|
||||
contributions provided support for compiling PCRE on various flavours of
|
||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
|
||||
Building PCRE on non-Unix systems
|
||||
---------------------------------
|
||||
|
||||
For a non-Unix system, please read the comments in the file NON-UNIX-USE,
|
||||
though if your system supports the use of "configure" and "make" you may be
|
||||
able to build PCRE in the same way as for Unix-like systems. PCRE can also be
|
||||
configured in many platform environments using the GUI facility of CMake's
|
||||
CMakeSetup. It creates Makefiles, solution files, etc.
|
||||
|
||||
PCRE has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE on Unix-like systems
|
||||
----------------------------------
|
||||
|
||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||
|
||||
The following instructions assume the use of the widely used "configure, make,
|
||||
make install" process. There is also support for CMake in the PCRE
|
||||
distribution; there are some comments about using CMake in the NON-UNIX-USE
|
||||
file, though it can also be used in Unix-like systems.
|
||||
|
||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
||||
PCRE distribution directory, with your current directory set to the directory
|
||||
where you want the files to be created. This command is a standard GNU
|
||||
"autoconf" configuration script, for which generic instructions are supplied in
|
||||
the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
specifies that the C compiler should be run with the flags '-O2 -Wall' instead
|
||||
of the default, and that "make install" should install PCRE under /opt/local
|
||||
instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE source
|
||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
||||
|
||||
cd /build/pcre/pcre-xxx
|
||||
/source/pcre/pcre-xxx/configure
|
||||
|
||||
PCRE is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE
|
||||
library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
. If you want to suppress the building of the C++ wrapper library, you can add
|
||||
--disable-cpp to the "configure" command. Otherwise, when "configure" is run,
|
||||
it will try to find a C++ compiler and C++ header files, and if it succeeds,
|
||||
it will try to build the C++ wrapper.
|
||||
|
||||
. If you want to make use of the support for UTF-8 Unicode character strings in
|
||||
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
|
||||
code for handling UTF-8 is not included in the library. Even when included,
|
||||
it still has to be enabled by an option at run time. When PCRE is compiled
|
||||
with this option, its input can only either be ASCII or UTF-8, even when
|
||||
running on EBCDIC platforms. It is not possible to use both --enable-utf8 and
|
||||
--enable-ebcdic at the same time.
|
||||
|
||||
. If, in addition to support for UTF-8 character strings, you want to include
|
||||
support for the \P, \p, and \X sequences that recognize Unicode character
|
||||
properties, you must add --enable-unicode-properties to the "configure"
|
||||
command. This adds about 30K to the size of the library (in the form of a
|
||||
property table); only the basic two-letter properties such as Lu are
|
||||
supported.
|
||||
|
||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE considers to
|
||||
be the end of a line (see above). However, the caller of PCRE can restrict \R
|
||||
to match only CR, LF, or CRLF. You can make this the default by adding
|
||||
--enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
them in a pattern. You can increase this threshold by setting, for example,
|
||||
|
||||
--with-posix-malloc-threshold=20
|
||||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre_exec() can supply their own value. There is more discussion on the
|
||||
pcreapi man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcrestack man page.
|
||||
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. You can
|
||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
||||
ever to be necessary. Increasing the internal link size will reduce
|
||||
performance.
|
||||
|
||||
. You can build PCRE so that its internal match() function that is called from
|
||||
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
||||
obtained from the heap via the special functions pcre_stack_malloc() and
|
||||
pcre_stack_free() to save data that would otherwise be saved on the stack. To
|
||||
build PCRE like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||
necessary in environments with limited stack sizes. This applies only to the
|
||||
pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not
|
||||
use deeply nested recursion. There is a discussion about stack sizes in the
|
||||
pcrestack man page.
|
||||
|
||||
. For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre_chartables.c. If you do
|
||||
not specify this option, pcre_chartables.c is created as a copy of
|
||||
pcre_chartables.c.dist. See "Character tables" below for further information.
|
||||
|
||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII) by specifying
|
||||
|
||||
--enable-ebcdic
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8.
|
||||
|
||||
. It is possible to compile pcregrep to use libz and/or libbz2, in order to
|
||||
read .gz and .bz2 files (respectively), by specifying one or both of
|
||||
|
||||
--enable-pcregrep-libz
|
||||
--enable-pcregrep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. It is possible to compile pcretest so that it links with the libreadline
|
||||
library, by specifying
|
||||
|
||||
--enable-pcretest-libreadline
|
||||
|
||||
If this is done, when pcretest's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcretest linked in this way, there may be licensing issues.
|
||||
|
||||
Setting this option causes the -lreadline option to be added to the pcretest
|
||||
build. In many operating environments with a sytem-installed readline
|
||||
library this is sufficient. However, in some environments (e.g. if an
|
||||
unmodified distribution version of readline is in use), it may be necessary
|
||||
to specify something like LIBS="-lncurses" as well. This is because, to quote
|
||||
the readline INSTALL, "Readline uses the termcap functions, but does not link
|
||||
with the termcap or curses library itself, allowing applications which link
|
||||
with readline the to choose an appropriate library." If you get error
|
||||
messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
|
||||
this is the problem, and linking with the ncurses library should fix it.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile is the makefile that builds the library
|
||||
. config.h contains build-time configuration options for the library
|
||||
. pcre.h is the public PCRE header file
|
||||
. pcre-config is a script that shows the settings of "configure" options
|
||||
. libpcre.pc is data for the pkg-config command
|
||||
. libtool is a script that builds shared and/or static libraries
|
||||
. RunTest is a script for running tests on the basic C library
|
||||
. RunGrepTest is a script for running tests on the pcregrep command
|
||||
|
||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under
|
||||
the names config.h.generic and pcre.h.generic. These are provided for the
|
||||
benefit of those who have to built PCRE without the benefit of "configure". If
|
||||
you use "configure", the .generic versions are not used.
|
||||
|
||||
If a C++ compiler is found, the following files are also built:
|
||||
|
||||
. libpcrecpp.pc is data for the pkg-config command
|
||||
. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper
|
||||
. pcre_stringpiece.h is the header for the C++ "stringpiece" functions
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". It builds two libraries, called
|
||||
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
|
||||
command. If a C++ compiler was found on your system, "make" also builds the C++
|
||||
wrapper library, which is called libpcrecpp, and some test programs called
|
||||
pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
||||
Building the C++ wrapper can be disabled by adding --disable-cpp to the
|
||||
"configure" command.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE
|
||||
tests are given below in a separate section of this document.
|
||||
|
||||
You can use "make install" to install PCRE into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcretest
|
||||
pcregrep
|
||||
pcre-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre
|
||||
libpcreposix
|
||||
libpcrecpp (if C++ support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre.pc
|
||||
libpcrecpp.pc (if C++ support is enabled)
|
||||
|
||||
Header files (include):
|
||||
pcre.h
|
||||
pcreposix.h
|
||||
pcre_scanner.h )
|
||||
pcre_stringpiece.h ) if C++ support is enabled
|
||||
pcrecpp.h )
|
||||
pcrecpparg.h )
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcregrep.1
|
||||
pcretest.1
|
||||
pcre.3
|
||||
pcre*.3 (lots more pages, all starting "pcre")
|
||||
|
||||
HTML documentation (share/doc/pcre/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre.txt (a concatenation of the man(3) pages)
|
||||
pcretest.txt the pcretest man page
|
||||
pcregrep.txt the pcregrep man page
|
||||
|
||||
If you want to remove PCRE from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information on Unix-like systems
|
||||
---------------------------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre-config, which can be used to
|
||||
recall information about the PCRE configuration and installation. For example:
|
||||
|
||||
pcre-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre-config --libs
|
||||
|
||||
outputs information about where the library is installed. This command can be
|
||||
included in makefiles for programs that use PCRE, saving the programmer from
|
||||
having to remember too many details.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --cflags pcre
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries on Unix-like systems
|
||||
-------------------------------------
|
||||
|
||||
The default distribution builds PCRE as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcretest and pcregrep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcregrep and pcretest are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling on Unix-like systems
|
||||
------------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
|
||||
by making a copy of pcre_chartables.c.dist, which is a default set of tables
|
||||
that assumes ASCII code. Cross-compiling with the default tables should not be
|
||||
a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
|
||||
run it on the local host to make a new version of pcre_chartables.c.dist.
|
||||
Then when you cross-compile PCRE this new version of the tables will be used.
|
||||
|
||||
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
----------------------------------
|
||||
|
||||
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
|
||||
"configure" script, you must include the "-AA" option in the CXXFLAGS
|
||||
environment variable in order for the C++ components to compile correctly.
|
||||
|
||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
|
||||
needed libraries fail to get included when specifying the "-AA" compiler
|
||||
option. If you experience unresolved symbols when linking the C++ programs,
|
||||
use the workaround of specifying the following environment variable prior to
|
||||
running the "configure" script:
|
||||
|
||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE
|
||||
------------
|
||||
|
||||
To test the basic PCRE library on a Unix system, run the RunTest script that is
|
||||
created by the configuring process. There is also a script called RunGrepTest
|
||||
that tests the options of the pcregrep command. If the C++ wrapper library is
|
||||
built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and
|
||||
pcre_stringpiece_unittest are also built.
|
||||
|
||||
Both the scripts and all the program tests are run if you obey "make check" or
|
||||
"make test". For other systems, see the instructions in NON-UNIX-USE.
|
||||
|
||||
The RunTest script runs the pcretest test program (which is documented in its
|
||||
own man page) on each of the testinput files in the testdata directory in
|
||||
turn, and compares the output with the contents of the corresponding testoutput
|
||||
files. A file called testtry is used to hold the main output from pcretest
|
||||
(testsavedregex is also used as a working file). To run pcretest on just one of
|
||||
the test files, give its number as an argument to RunTest, for example:
|
||||
|
||||
RunTest 2
|
||||
|
||||
The first test file can also be fed directly into the perltest.pl script to
|
||||
check that Perl gives the same results. The only difference you should see is
|
||||
in the first few lines, where the Perl version is given instead of the PCRE
|
||||
version.
|
||||
|
||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||
wrapper API. It also uses the debugging flags to check some of the internals of
|
||||
pcre_compile().
|
||||
|
||||
If you build PCRE with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
||||
bug in PCRE.
|
||||
|
||||
The third set of tests checks pcre_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
|
||||
running the test, the script checks for the presence of this locale by running
|
||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
|
||||
in the list of available locales, the third test cannot be run, and a comment
|
||||
is output to say why. If running this test produces instances of the error
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
in the comparison output, it means that locale is not available on your system,
|
||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||
|
||||
[If you are trying to run this test on Windows, you may be able to get it to
|
||||
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
|
||||
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
|
||||
Windows versions of test 2. More info on using RunTest.bat is included in the
|
||||
document entitled NON-UNIX-USE.]
|
||||
|
||||
The fourth test checks the UTF-8 support. It is not run automatically unless
|
||||
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
|
||||
running "configure". This file can be also fed directly to the perltest script,
|
||||
provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch,
|
||||
commented in the script, can be be used.)
|
||||
|
||||
The fifth test checks error handling with UTF-8 encoding, and internal UTF-8
|
||||
features of PCRE that are not relevant to Perl.
|
||||
|
||||
The sixth test checks the support for Unicode character properties. It it not
|
||||
run automatically unless PCRE is built with Unicode property support. To to
|
||||
this you must set --enable-unicode-properties when running "configure".
|
||||
|
||||
The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative
|
||||
matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode
|
||||
property support, respectively. The eighth and ninth tests are not run
|
||||
automatically unless PCRE is build with the relevant support.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. The final argument of the
|
||||
pcre_compile() function is a pointer to a block of memory containing the
|
||||
concatenated tables. A call to pcre_maketables() can be used to generate a set
|
||||
of tables in the current locale. If the final argument for pcre_compile() is
|
||||
passed as NULL, a set of default tables that is built into the binary is used.
|
||||
|
||||
The source file called pcre_chartables.c contains the default set of tables. By
|
||||
default, this is created as a copy of pcre_chartables.c.dist, which contains
|
||||
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
|
||||
for ./configure, a different version of pcre_chartables.c is built by the
|
||||
program dftables (compiled from dftables.c), which uses the ANSI C character
|
||||
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
|
||||
build the table sources. This means that the default C locale which is set for
|
||||
your system will control the contents of these default tables. You can change
|
||||
the default tables by editing pcre_chartables.c and then re-building PCRE. If
|
||||
you do this, you should take care to ensure that the file does not get
|
||||
automatically re-generated. The best way to do this is to move
|
||||
pcre_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256.
|
||||
|
||||
The final 256-byte table has bits indicating various character types, as
|
||||
follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE to malfunction.
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the following files:
|
||||
|
||||
(A) Source files of the PCRE library functions and their headers:
|
||||
|
||||
dftables.c auxiliary program for building pcre_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
||||
coding; used, unless --enable-rebuild-chartables is
|
||||
specified, by copying to pcre_chartables.c
|
||||
|
||||
pcreposix.c )
|
||||
pcre_compile.c )
|
||||
pcre_config.c )
|
||||
pcre_dfa_exec.c )
|
||||
pcre_exec.c )
|
||||
pcre_fullinfo.c )
|
||||
pcre_get.c ) sources for the functions in the library,
|
||||
pcre_globals.c ) and some internal functions that they use
|
||||
pcre_info.c )
|
||||
pcre_maketables.c )
|
||||
pcre_newline.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_refcount.c )
|
||||
pcre_study.c )
|
||||
pcre_tables.c )
|
||||
pcre_try_flipped.c )
|
||||
pcre_ucd.c )
|
||||
pcre_valid_utf8.c )
|
||||
pcre_version.c )
|
||||
pcre_xclass.c )
|
||||
pcre_printint.src ) debugging function that is #included in pcretest,
|
||||
) and can also be #included in pcre_compile()
|
||||
pcre.h.in template for pcre.h when built by "configure"
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
ucp.h header for Unicode property handling
|
||||
|
||||
config.h.in template for config.h, which is built by "configure"
|
||||
|
||||
pcrecpp.h public header file for the C++ wrapper
|
||||
pcrecpparg.h.in template for another C++ header file
|
||||
pcre_scanner.h public header file for C++ scanner functions
|
||||
pcrecpp.cc )
|
||||
pcre_scanner.cc ) source for the C++ wrapper library
|
||||
|
||||
pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the
|
||||
C++ stringpiece functions
|
||||
pcre_stringpiece.cc source for the C++ stringpiece functions
|
||||
|
||||
(B) Source files for programs that use PCRE:
|
||||
|
||||
pcredemo.c simple demonstration of coding calls to PCRE
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcretest.c comprehensive test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcregrep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre.txt plain text version of the man pages
|
||||
doc/pcretest.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre.pc.in template for libpcre.pc for pkg-config
|
||||
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.pl Perl test program
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
pcrecpp_unittest.cc )
|
||||
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
||||
pcre_stringpiece_unittest.cc )
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcregrep tests
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for VPASCAL
|
||||
|
||||
makevp.bat
|
||||
makevp_c.txt
|
||||
makevp_l.txt
|
||||
pcregexp.pas
|
||||
|
||||
(F) Auxiliary files for building PCRE "by hand"
|
||||
|
||||
pcre.h.generic ) a version of the public PCRE header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Miscellaneous
|
||||
|
||||
RunTest.bat a script for running tests under Windows
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 21 March 2009
|
|
@ -1,280 +0,0 @@
|
|||
#! /bin/sh
|
||||
|
||||
# Run pcregrep tests. The assumption is that the PCRE tests check the library
|
||||
# itself. What we are checking here is the file handling and options that are
|
||||
# supported by pcregrep.
|
||||
|
||||
# Set the C locale, so that sort(1) behaves predictably.
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
pcregrep=`pwd`/pcregrep
|
||||
|
||||
echo " "
|
||||
echo "Testing pcregrep"
|
||||
$pcregrep -V
|
||||
|
||||
cf="diff -ub"
|
||||
valgrind=
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
valgrind) valgrind="valgrind -q --leak-check=no";;
|
||||
*) echo "Unknown argument $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# If PCRE has been built in a directory other than the source directory, and
|
||||
# this test is being run from "make check" as usual, then $(srcdir) will be
|
||||
# set. If not, set it to the current directory. We then arrange to run the
|
||||
# pcregrep command in the source directory so that the file names that appear
|
||||
# in the output are always the same.
|
||||
|
||||
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
||||
srcdir=.
|
||||
fi
|
||||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
./pcretest -C | ./pcregrep "No UTF-8 support" >/dev/null
|
||||
utf8=$?
|
||||
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtry
|
||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --exclude_dir='^\.' 'fox' ./testdata) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude_dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtry 2>teststderr
|
||||
echo "RC=$?" >>testtry
|
||||
echo "======== STDERR ========" >>testtry
|
||||
cat teststderr >>testtry
|
||||
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on -e before -e zero -e after ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -e abc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
|
||||
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
|
||||
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
|
||||
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
|
||||
echo "---------------------------- Test 54 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# These tests require UTF-8 support
|
||||
|
||||
if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcregrep UTF-8 features"
|
||||
|
||||
echo "---------------------------- Test U1 ------------------------------" >testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
|
||||
|
||||
echo "---------------------------- Test U2 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
|
||||
|
||||
$cf $srcdir/testdata/grepoutput8 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
else
|
||||
echo "Skipping pcregrep UTF-8 tests: no UTF-8 support in PCRE library"
|
||||
fi
|
||||
|
||||
|
||||
# We go to some contortions to try to ensure that the tests for the various
|
||||
# newline settings will work in environments where the normal newline sequence
|
||||
# is not \n. Do not use exported files, whose line endings might be changed.
|
||||
# Instead, create an input file using printf so that its contents are exactly
|
||||
# what we want. Note the messy fudge to get printf to write a string that
|
||||
# starts with a hyphen.
|
||||
|
||||
echo "Testing pcregrep newline settings"
|
||||
printf "abc\rdef\r\nghi\njkl" >testNinput
|
||||
|
||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtry
|
||||
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
|
||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
|
||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtry
|
||||
pattern=`printf 'def\rjkl'`
|
||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
|
||||
|
||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
|
||||
pattern=`printf 'xxx\r\njkl'`
|
||||
$valgrind $pcregrep -n --newline=crlf -F "$pattern" testNinput >>testtry
|
||||
|
||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
|
||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
exit 0
|
||||
|
||||
# End
|
|
@ -1,208 +0,0 @@
|
|||
#! /bin/sh
|
||||
|
||||
# This file is generated by configure from RunGrepTest.in. Make any changes
|
||||
# to that file.
|
||||
|
||||
echo "Testing pcregrep"
|
||||
./pcregrep -V
|
||||
|
||||
# Run pcregrep tests. The assumption is that the PCRE tests check the library
|
||||
# itself. What we are checking here is the file handling and options that are
|
||||
# supported by pcregrep.
|
||||
|
||||
cf=diff
|
||||
valgrind=
|
||||
if [ ! -d testdata ] ; then
|
||||
ln -s @top_srcdir@/testdata testdata
|
||||
fi
|
||||
testdata=./testdata
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
valgrind) valgrind="valgrind -q --leak-check=no";;
|
||||
*) echo "Unknown argument $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtry
|
||||
$valgrind ./pcregrep PATTERN $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep '^PATTERN' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -in PATTERN $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -ic PATTERN $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -in PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -inh PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -il PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -l PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -q PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -q NEVER-PATTERN $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -vn pattern $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -ix pattern $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -f$testdata/greplist $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -w pat $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep 'abc^*' $testdata/grepinput 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep abc $testdata/grepinput $testdata/nonexistfile 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -M 'the\noutput' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -Mn '(the\noutput|dog\.\n--)' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -Mix 'Pattern' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -Mixn 'complete pair\nof lines' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -nA3 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -nB3 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -C3 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -A9 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -nB9 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -A9 -B9 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -A10 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -nB10 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -C12 -B10 'four' $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -inB3 'pattern' $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -inA3 'pattern' $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -L 'fox' $testdata/grepinput $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep 'fox' $testdata/grepnonexist >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -s 'fox' $testdata/grepnonexist >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -L -r --include=grepinputx 'fox' $testdata >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep -L -r --include=grepinput --exclude 'grepinput$' 'fox' $testdata >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
||||
$valgrind ./pcregrep '^(a+)*\d' $testdata/grepinput >>testtry 2>teststderr
|
||||
echo "RC=$?" >>testtry
|
||||
echo "======== STDERR ========" >>testtry
|
||||
cat teststderr >>testtry
|
||||
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep '>\x00<' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -A1 'before the binary zero' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -B1 'after the binary zero' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -B1 -o '\w+ the binary zero' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -B1 -onH '\w+ the binary zero' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -on 'before|zero|after' $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -on -e before -e zero -e after $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -on -f $testdata/greplist -e binary $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -e abc -e '(unclosed' $testdata/grepinput 2>>testtry >>testtry
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -Fx "AB.VE
|
||||
elephant" $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -F "AB.VE
|
||||
elephant" $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -F -e DATA -e "AB.VE
|
||||
elephant" $testdata/grepinput >>testtry
|
||||
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep "^(abc|def|ghi|jkl)" $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep -N CR "^(abc|def|ghi|jkl)" $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep --newline=crlf "^(abc|def|ghi|jkl)" $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep --newline=cr -F "def
jkl" $testdata/grepinputx >>testtry
|
||||
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
||||
$valgrind ./pcregrep --newline=crlf -F "xxx
|
||||
jkl" $testdata/grepinputx >>testtry
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf testtry $testdata/grepoutput
|
||||
if [ $? != 0 ] ; then exit 1; else exit 0; fi
|
||||
|
||||
# End
|
|
@ -1,292 +0,0 @@
|
|||
#! /bin/sh
|
||||
|
||||
# Run PCRE tests.
|
||||
|
||||
valgrind=
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems
|
||||
# have a diff that lacks a -u option. Try to deal with this.
|
||||
|
||||
if diff -u /dev/null /dev/null; then cf="diff -u"; else cf="diff"; fi
|
||||
|
||||
# Find the test data
|
||||
|
||||
testdata=testdata
|
||||
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
|
||||
testdata="$srcdir/testdata"
|
||||
fi
|
||||
|
||||
# Find which optional facilities are available
|
||||
|
||||
case `./pcretest -C | ./pcregrep 'Internal link size'` in
|
||||
*2) link_size=2;;
|
||||
*3) link_size=3;;
|
||||
*4) link_size=4;;
|
||||
*) echo "Failed to find internal link size"; exit 1;;
|
||||
esac
|
||||
|
||||
./pcretest -C | ./pcregrep 'No UTF-8 support' >/dev/null
|
||||
utf8=$?
|
||||
|
||||
./pcretest -C | ./pcregrep 'No Unicode properties support' >/dev/null
|
||||
ucp=$?
|
||||
|
||||
# Select which tests to run; for those that are explicitly requested, check
|
||||
# that the necessary optional facilities are available.
|
||||
|
||||
do1=no
|
||||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
do8=no
|
||||
do9=no
|
||||
do10=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
8) do8=yes;;
|
||||
9) do9=yes;;
|
||||
10) do10=yes;;
|
||||
valgrind) valgrind="valgrind -q";;
|
||||
*) echo "Unknown test number $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ $utf8 -eq 0 ] ; then
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Can't run test 4 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do8 = yes ] ; then
|
||||
echo "Can't run test 8 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $ucp -eq 0 ] ; then
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Can't run test 9 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do10 = yes ] ; then
|
||||
echo "Can't run test 10 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
if [ $do10 = yes ] ; then
|
||||
echo "Can't run test 10 because the link size ($link_size) is not 2"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all that are relevant.
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
$do9 = no -a $do10 = no ] ; then
|
||||
do1=yes
|
||||
do2=yes
|
||||
do3=yes
|
||||
if [ $utf8 -ne 0 ] ; then do4=yes; fi
|
||||
if [ $utf8 -ne 0 ] ; then do5=yes; fi
|
||||
if [ $utf8 -ne 0 -a $ucp -ne 0 ] ; then do6=yes; fi
|
||||
do7=yes
|
||||
if [ $utf8 -ne 0 ] ; then do8=yes; fi
|
||||
if [ $utf8 -ne 0 -a $ucp -ne 0 ] ; then do9=yes; fi
|
||||
if [ $link_size -eq 2 -a $ucp -ne 0 ] ; then do10=yes; fi
|
||||
fi
|
||||
|
||||
# Show which release
|
||||
|
||||
echo ""
|
||||
echo PCRE C library tests
|
||||
./pcretest /dev/null
|
||||
|
||||
# Primary test, Perl-compatible
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo "Test 1: main functionality (Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput1 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput1 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
# PCRE tests that are not Perl-compatible - API & error tests, mostly
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo "Test 2: API and error handling (not Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput2 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
echo " "
|
||||
echo "** Test 2 requires a lot of stack. If it has crashed with a"
|
||||
echo "** segmentation fault, it may be that you do not have enough"
|
||||
echo "** stack available by default. Please see the 'pcrestack' man"
|
||||
echo "** page for a discussion of PCRE's stack usage."
|
||||
echo " "
|
||||
exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
# Locale-specific tests, provided that either the "fr_FR" or the "french"
|
||||
# locale is available. The former is the Unix-like standard; the latter is
|
||||
# for Windows.
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
locale -a | grep '^fr_FR$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
locale=fr_FR
|
||||
infile=$testdata/testinput3
|
||||
outfile=$testdata/testoutput3
|
||||
else
|
||||
locale -a | grep '^french$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
locale=french
|
||||
sed 's/fr_FR/french/' $testdata/testinput3 >test3input
|
||||
sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
|
||||
infile=test3input
|
||||
outfile=test3output
|
||||
else
|
||||
locale=
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$locale" != "" ] ; then
|
||||
echo "Test 3: locale-specific features (using '$locale' locale)"
|
||||
$valgrind ./pcretest -q $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $outfile testtry
|
||||
if [ $? != 0 ] ; then
|
||||
echo " "
|
||||
echo "Locale test did not run entirely successfully."
|
||||
echo "This usually means that there is a problem with the locale"
|
||||
echo "settings rather than a bug in PCRE."
|
||||
else
|
||||
echo "OK"
|
||||
fi
|
||||
else exit 1
|
||||
fi
|
||||
else
|
||||
echo "Cannot test locale-specific features - neither the 'fr_FR' nor the"
|
||||
echo "'french' locale exists, or the \"locale\" command is not available"
|
||||
echo "to check for them."
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Additional tests for UTF8 support
|
||||
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Test 4: UTF-8 support (Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput4 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput4 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Test 5: API and internals for UTF-8 support (not Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput5 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput5 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Test 6: Unicode property support"
|
||||
$valgrind ./pcretest -q $testdata/testinput6 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput6 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
# Tests for DFA matching support
|
||||
|
||||
if [ $do7 = yes ] ; then
|
||||
echo "Test 7: DFA matching"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput7 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput7 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo "Test 8: DFA matching with UTF-8"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput8 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput8 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Test 9: DFA matching with Unicode properties"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput9 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput9 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
# Test of internal offsets and code sizes. This test is run only when there
|
||||
# is Unicode property support and the link size is 2. The actual tests are
|
||||
# mostly the same as in some of the above, but in this test we inspect some
|
||||
# offsets and sizes that require a known link size. This is a doublecheck for
|
||||
# the maintainer, just in case something changes unexpectely.
|
||||
|
||||
if [ $do10 = yes ] ; then
|
||||
echo "Test 10: Internal offsets and code size tests"
|
||||
$valgrind ./pcretest -q $testdata/testinput10 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput10 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
fi
|
||||
|
||||
# End
|
|
@ -1,39 +0,0 @@
|
|||
@rem This file was contributed by Ralf Junker, and touched up by
|
||||
@rem Daniel Richard G. Test 10 added by Philip H.
|
||||
@rem Philip H also changed test 3 to use "wintest" files.
|
||||
@rem
|
||||
@rem MS Windows batch file to run pcretest on testfiles with the correct
|
||||
@rem options.
|
||||
@rem
|
||||
@rem Output is written to a newly created subfolder named "testdata".
|
||||
|
||||
setlocal
|
||||
|
||||
if [%srcdir%]==[] set srcdir=.
|
||||
if [%pcretest%]==[] set pcretest=pcretest
|
||||
|
||||
if not exist testout md testout
|
||||
|
||||
%pcretest% -q %srcdir%\testdata\testinput1 > testout\testoutput1
|
||||
%pcretest% -q %srcdir%\testdata\testinput2 > testout\testoutput2
|
||||
@rem %pcretest% -q %srcdir%\testdata\testinput3 > testout\testoutput3
|
||||
%pcretest% -q %srcdir%\testdata\wintestinput3 > testout\wintestoutput3
|
||||
%pcretest% -q %srcdir%\testdata\testinput4 > testout\testoutput4
|
||||
%pcretest% -q %srcdir%\testdata\testinput5 > testout\testoutput5
|
||||
%pcretest% -q %srcdir%\testdata\testinput6 > testout\testoutput6
|
||||
%pcretest% -q -dfa %srcdir%\testdata\testinput7 > testout\testoutput7
|
||||
%pcretest% -q -dfa %srcdir%\testdata\testinput8 > testout\testoutput8
|
||||
%pcretest% -q -dfa %srcdir%\testdata\testinput9 > testout\testoutput9
|
||||
%pcretest% -q %srcdir%\testdata\testinput10 > testout\testoutput10
|
||||
|
||||
fc /n %srcdir%\testdata\testoutput1 testout\testoutput1
|
||||
fc /n %srcdir%\testdata\testoutput2 testout\testoutput2
|
||||
rem fc /n %srcdir%\testdata\testoutput3 testout\testoutput3
|
||||
fc /n %srcdir%\testdata\wintestoutput3 testout\wintestoutput3
|
||||
fc /n %srcdir%\testdata\testoutput4 testout\testoutput4
|
||||
fc /n %srcdir%\testdata\testoutput5 testout\testoutput5
|
||||
fc /n %srcdir%\testdata\testoutput6 testout\testoutput6
|
||||
fc /n %srcdir%\testdata\testoutput7 testout\testoutput7
|
||||
fc /n %srcdir%\testdata\testoutput8 testout\testoutput8
|
||||
fc /n %srcdir%\testdata\testoutput9 testout\testoutput9
|
||||
fc /n %srcdir%\testdata\testoutput10 testout\testoutput10
|
|
@ -1,258 +0,0 @@
|
|||
#! /bin/sh
|
||||
|
||||
# This file is generated by configure from RunTest.in. Make any changes
|
||||
# to that file.
|
||||
|
||||
# Run PCRE tests
|
||||
|
||||
cf=diff
|
||||
valgrind=
|
||||
if [ ! -d testdata ] ; then
|
||||
ln -s @top_srcdir@/testdata testdata
|
||||
fi
|
||||
testdata=./testdata
|
||||
|
||||
|
||||
# Select which tests to run; if no selection, run all
|
||||
|
||||
do1=no
|
||||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
do8=no
|
||||
do9=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
8) do8=yes;;
|
||||
9) do9=yes;;
|
||||
valgrind) valgrind="valgrind -q";;
|
||||
*) echo "Unknown test number $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ "@LINK_SIZE@" != "" -a "@LINK_SIZE@" != "-DLINK_SIZE=2" ] ; then
|
||||
if [ $do2 = yes ] ; then
|
||||
echo "Can't run test 2 with an internal link size other than 2"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 with an internal link size other than 2"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 with an internal link size other than 2"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "@UTF8@" = "" ] ; then
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Can't run test 4 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do8 = yes ] ; then
|
||||
echo "Can't run test 8 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Can't run test 9 because UTF-8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "@UCP@" = "" ] ; then
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Can't run test 9 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
$do9 = no ] ; then
|
||||
do1=yes
|
||||
do2=yes
|
||||
do3=yes
|
||||
if [ "@UTF8@" != "" ] ; then do4=yes; fi
|
||||
if [ "@UTF8@" != "" ] ; then do5=yes; fi
|
||||
if [ "@UTF8@" != "" -a "@UCP@" != "" ] ; then do6=yes; fi
|
||||
do7=yes
|
||||
if [ "@UTF8@" != "" ] ; then do8=yes; fi
|
||||
if [ "@UTF8@" != "" -a "@UCP@" != "" ] ; then do9=yes; fi
|
||||
fi
|
||||
|
||||
# Show which release
|
||||
|
||||
./pcretest /dev/null
|
||||
|
||||
# Primary test, Perl-compatible
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo "Test 1: main functionality (Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput1 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput1
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
# PCRE tests that are not Perl-compatible - API & error tests, mostly
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
if [ "@LINK_SIZE@" = "" -o "@LINK_SIZE@" = "-DLINK_SIZE=2" ] ; then
|
||||
echo "Test 2: API and error handling (not Perl compatible)"
|
||||
$valgrind ./pcretest -q -i $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput2
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
else
|
||||
echo Test 2 skipped for link size other than 2 \(@LINK_SIZE@\)
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Locale-specific tests, provided the "fr_FR" locale is available
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
locale -a | grep '^fr_FR$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "Test 3: locale-specific features (using 'fr_FR' locale)"
|
||||
$valgrind ./pcretest -q $testdata/testinput3 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput3
|
||||
if [ $? != 0 ] ; then
|
||||
echo " "
|
||||
echo "Locale test did not run entirely successfully."
|
||||
echo "This usually means that there is a problem with the locale"
|
||||
echo "settings rather than a bug in PCRE."
|
||||
else
|
||||
echo "OK"
|
||||
fi
|
||||
echo " "
|
||||
else exit 1
|
||||
fi
|
||||
else
|
||||
echo "Cannot test locale-specific features - 'fr_FR' locale not found,"
|
||||
echo "or the \"locale\" command is not available to check for it."
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Additional tests for UTF8 support
|
||||
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Test 4: UTF-8 support (Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput4 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput4
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
if [ $do5 = yes ] ; then
|
||||
if [ "@LINK_SIZE@" = "" -o "@LINK_SIZE@" = "-DLINK_SIZE=2" ] ; then
|
||||
echo "Test 5: API and internals for UTF-8 support (not Perl compatible)"
|
||||
$valgrind ./pcretest -q $testdata/testinput5 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput5
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
else
|
||||
echo Test 5 skipped for link size other than 2 \(@LINK_SIZE@\)
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
if [ "@LINK_SIZE@" = "" -o "@LINK_SIZE@" = "-DLINK_SIZE=2" ] ; then
|
||||
echo "Test 6: Unicode property support"
|
||||
$valgrind ./pcretest -q $testdata/testinput6 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput6
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
else
|
||||
echo Test 6 skipped for link size other than 2 \(@LINK_SIZE@\)
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA matching support
|
||||
|
||||
if [ $do7 = yes ] ; then
|
||||
echo "Test 7: DFA matching"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput7 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput7
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo "Test 8: DFA matching with UTF-8"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput8 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput8
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Test 9: DFA matching with Unicode properties"
|
||||
$valgrind ./pcretest -q -dfa $testdata/testinput9 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput9
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
# End
|
|
@ -1,22 +0,0 @@
|
|||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,58 +0,0 @@
|
|||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
|
||||
# This macro is intended to be used in FindXXX.cmake modules files.
|
||||
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
|
||||
# it also sets the <UPPERCASED_NAME>_FOUND variable.
|
||||
# The package is found if all variables listed are TRUE.
|
||||
# Example:
|
||||
#
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
|
||||
#
|
||||
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
|
||||
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
|
||||
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
|
||||
# independent whether QUIET was used or not.
|
||||
# If it is found, the location is reported using the VAR1 argument, so
|
||||
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
|
||||
# If the second argument is DEFAULT_MSG, the message in the failure case will
|
||||
# be "Could NOT find LibXml2", if you don't like this message you can specify
|
||||
# your own custom failure message there.
|
||||
|
||||
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
|
||||
|
||||
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
|
||||
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
|
||||
STRING(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
|
||||
SET(${_NAME_UPPER}_FOUND TRUE)
|
||||
IF(NOT ${_VAR1})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_VAR1})
|
||||
|
||||
FOREACH(_CURRENT_VAR ${ARGN})
|
||||
IF(NOT ${_CURRENT_VAR})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_CURRENT_VAR})
|
||||
ENDFOREACH(_CURRENT_VAR)
|
||||
|
||||
IF (${_NAME_UPPER}_FOUND)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ELSE (${_NAME_UPPER}_FOUND)
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "${_FAIL_MESSAGE}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ENDIF (${_NAME_UPPER}_FOUND)
|
||||
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
|
|
@ -1,29 +0,0 @@
|
|||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||
# --> BSD licensed
|
||||
#
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
|
||||
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
|
@ -1,44 +0,0 @@
|
|||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
||||
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
#cmakedefine HAVE_STRTOLL 1
|
||||
#cmakedefine HAVE_STRTOQ 1
|
||||
#cmakedefine HAVE__STRTOI64 1
|
||||
|
||||
#cmakedefine PCRE_STATIC 1
|
||||
|
||||
#cmakedefine SUPPORT_UTF8 1
|
||||
#cmakedefine SUPPORT_UCP 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine NO_RECURSE 1
|
||||
|
||||
#cmakedefine HAVE_LONG_LONG 1
|
||||
#cmakedefine HAVE_UNSIGNED_LONG_LONG 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
|
||||
#define NEWLINE @NEWLINE@
|
||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||
|
||||
|
||||
#define MAX_NAME_SIZE 32
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
|
@ -1,313 +0,0 @@
|
|||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
||||
should copy the distributed config.h.generic to config.h, and then set up the
|
||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
||||
all of your compile commands, so that config.h is included at the start of
|
||||
every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
||||
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
|
||||
default can be overridden by the user of PCRE at runtime. On systems that
|
||||
support it, "configure" can be used to override the default. */
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use
|
||||
"configure", this can be done via --enable-ebcdic. PCRE will then assume
|
||||
that all input strings are in EBCDIC. If you do not define this macro, PCRE
|
||||
will assume input strings are ASCII or UTF-8 Unicode. It is not possible to
|
||||
build a version of PCRE that supports both EBCDIC and UTF-8. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#ifndef HAVE_BCOPY
|
||||
#define HAVE_BCOPY 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#ifndef HAVE_BZLIB_H
|
||||
#define HAVE_BZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#ifndef HAVE_DIRENT_H
|
||||
#define HAVE_DIRENT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#ifndef HAVE_DLFCN_H
|
||||
#define HAVE_DLFCN_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#ifndef HAVE_INTTYPES_H
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#ifndef HAVE_LIMITS_H
|
||||
#define HAVE_LIMITS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#ifndef HAVE_LONG_LONG
|
||||
#define HAVE_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#define HAVE_MEMMOVE 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#ifndef HAVE_MEMORY_H
|
||||
#define HAVE_MEMORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#ifndef HAVE_READLINE_HISTORY_H
|
||||
#define HAVE_READLINE_HISTORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
#define HAVE_READLINE_READLINE_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#ifndef HAVE_STDINT_H
|
||||
#define HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#ifndef HAVE_STDLIB_H
|
||||
#define HAVE_STDLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#ifndef HAVE_STRERROR
|
||||
#define HAVE_STRERROR 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#ifndef HAVE_STRING
|
||||
#define HAVE_STRING 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#ifndef HAVE_STRINGS_H
|
||||
#define HAVE_STRINGS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#ifndef HAVE_STRING_H
|
||||
#define HAVE_STRING_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strtoll' function. */
|
||||
/* #undef HAVE_STRTOLL */
|
||||
|
||||
/* Define to 1 if you have the `strtoq' function. */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#ifndef HAVE_SYS_TYPES_H
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#ifndef HAVE_UNISTD_H
|
||||
#define HAVE_UNISTD_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#ifndef HAVE_ZLIB_H
|
||||
#define HAVE_ZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `_strtoi64' function. */
|
||||
/* #undef HAVE__STRTOI64 */
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. On systems that
|
||||
support it, "configure" can be used to override this default default. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override the default. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* The value of NEWLINE determines the newline character sequence. On systems
|
||||
that support it, "configure" can be used to override the default, which is
|
||||
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
|
||||
(ANYCRLF). */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
||||
match() function; instead it creates its own stack by steam using
|
||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
||||
the comments and other stuff just above the match() function. On systems
|
||||
that support it, "configure" can be used to set this in the Makefile (use
|
||||
--disable-stack-for-recursion). */
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.9"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.9"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, it
|
||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
||||
compiler on non-Win32 systems. This macro apears at the start of
|
||||
every exported function that is part of the external API. It does
|
||||
not appear on functions that are "external" in the C sense, but
|
||||
which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
|
||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE_STATIC */
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#ifndef STDC_HEADERS
|
||||
#define STDC_HEADERS 1
|
||||
#endif
|
||||
|
||||
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
|
||||
handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to allow pcretest to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to allow pcregrep to be linked with libz, so that it is able to
|
||||
handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to enable support for Unicode properties */
|
||||
/* #undef SUPPORT_UCP */
|
||||
|
||||
/* Define to enable support for the UTF-8 Unicode encoding. This will work
|
||||
even in an EBCDIC environment, but it is incompatible with the EBCDIC
|
||||
macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but
|
||||
not both at once. */
|
||||
/* #undef SUPPORT_UTF8 */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.9"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
|
@ -1,691 +0,0 @@
|
|||
dnl Process this file with autoconf to produce a configure script.
|
||||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use major or minor version numbers with
|
||||
dnl leading zeros, because they may be treated as octal constants. The
|
||||
dnl PCRE_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be defined
|
||||
dnl empty.
|
||||
|
||||
m4_define(pcre_major, [7])
|
||||
m4_define(pcre_minor, [9])
|
||||
m4_define(pcre_prerelease, [])
|
||||
m4_define(pcre_date, [2009-04-11])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre_version, [0:1:0])
|
||||
m4_define(libpcreposix_version, [0:0:0])
|
||||
m4_define(libpcrecpp_version, [0:0:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre)
|
||||
AC_CONFIG_SRCDIR([pcre.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
|
||||
CFLAGS="$CFLAGS $CONFIGURE_CFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $CONFIGURE_CXXFLAGS"
|
||||
LDFLAGS="$LDFLAGS $CONFIGURE_LDFLAGS"
|
||||
|
||||
# The default CFLAGS and CXXFLAGS in Autoconf are "-g -O2" for gcc and just
|
||||
# "-g" for any other compiler. There doesn't seem to be a standard way of
|
||||
# getting rid of the -g (which I don't think is needed for a production
|
||||
# library). This fudge seems to achieve the necessary. First, we remember the
|
||||
# externally set values of CFLAGS and CXXFLAGS. Then call the AC_PROG_CC and
|
||||
# AC_PROG_CXX macros to find the compilers - if CFLAGS and CXXFLAGS are not
|
||||
# set, they will be set to Autoconf's defaults. Afterwards, if the original
|
||||
# values were not set, remove the -g from the Autoconf defaults.
|
||||
# (PH 02-May-07)
|
||||
|
||||
remember_set_CFLAGS="$CFLAGS"
|
||||
remember_set_CXXFLAGS="$CXXFLAGS"
|
||||
|
||||
AC_PROG_CC
|
||||
AC_PROG_CXX
|
||||
|
||||
if test "x$remember_set_CFLAGS" = "x"
|
||||
then
|
||||
if test "$CFLAGS" = "-g -O2"
|
||||
then
|
||||
CFLAGS="-O2"
|
||||
elif test "$CFLAGS" = "-g"
|
||||
then
|
||||
CFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "x$remember_set_CXXFLAGS" = "x"
|
||||
then
|
||||
if test "$CXXFLAGS" = "-g -O2"
|
||||
then
|
||||
CXXFLAGS="-O2"
|
||||
elif test "$CXXFLAGS" = "-g"
|
||||
then
|
||||
CXXFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# AC_PROG_CXX will return "g++" even if no c++ compiler is installed.
|
||||
# Check for that case, and just disable c++ code if g++ doesn't run.
|
||||
AC_LANG_PUSH(C++)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]],[[]])],, CXX=""; CXXCP=""; CXXFLAGS="")
|
||||
AC_LANG_POP
|
||||
|
||||
AC_PROG_INSTALL
|
||||
AC_LIBTOOL_WIN32_DLL
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_LN_S
|
||||
|
||||
PCRE_MAJOR="pcre_major"
|
||||
PCRE_MINOR="pcre_minor"
|
||||
PCRE_PRERELEASE="pcre_prerelease"
|
||||
PCRE_DATE="pcre_date"
|
||||
|
||||
AC_SUBST(PCRE_MAJOR)
|
||||
AC_SUBST(PCRE_MINOR)
|
||||
AC_SUBST(PCRE_PRERELEASE)
|
||||
AC_SUBST(PCRE_DATE)
|
||||
|
||||
# Set a more sensible default value for $(htmldir).
|
||||
if test "x$htmldir" = 'x${docdir}'
|
||||
then
|
||||
htmldir='${docdir}/html'
|
||||
fi
|
||||
|
||||
# Handle --disable-cpp
|
||||
AC_ARG_ENABLE(cpp,
|
||||
AS_HELP_STRING([--disable-cpp],
|
||||
[disable C++ support]),
|
||||
, enable_cpp=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --enable-utf8 (disabled by default)
|
||||
AC_ARG_ENABLE(utf8,
|
||||
AS_HELP_STRING([--enable-utf8],
|
||||
[enable UTF-8 support (incompatible with --enable-ebcdic)]),
|
||||
, enable_utf8=unset)
|
||||
|
||||
# Handle --enable-unicode-properties
|
||||
AC_ARG_ENABLE(unicode-properties,
|
||||
AS_HELP_STRING([--enable-unicode-properties],
|
||||
[enable Unicode properties support (implies --enable-utf8)]),
|
||||
, enable_unicode_properties=no)
|
||||
|
||||
# Handle --enable-newline=NL
|
||||
dnl AC_ARG_ENABLE(newline,
|
||||
dnl AS_HELP_STRING([--enable-newline=NL],
|
||||
dnl [use NL as newline (lf, cr, crlf, anycrlf, any; default=lf)]),
|
||||
dnl , enable_newline=lf)
|
||||
|
||||
# Separate newline options
|
||||
ac_pcre_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre_newline=any)
|
||||
enable_newline="$ac_pcre_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --disable-stack-for-recursion
|
||||
AC_ARG_ENABLE(stack-for-recursion,
|
||||
AS_HELP_STRING([--disable-stack-for-recursion],
|
||||
[don't use stack recursion when matching]),
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --enable-pcregrep-libz
|
||||
AC_ARG_ENABLE(pcregrep-libz,
|
||||
AS_HELP_STRING([--enable-pcregrep-libz],
|
||||
[link pcregrep with libz to handle .gz files]),
|
||||
, enable_pcregrep_libz=no)
|
||||
|
||||
# Handle --enable-pcregrep-libbz2
|
||||
AC_ARG_ENABLE(pcregrep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcregrep-libbz2],
|
||||
[link pcregrep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcregrep_libbz2=no)
|
||||
|
||||
# Handle --enable-pcretest-libreadline
|
||||
AC_ARG_ENABLE(pcretest-libreadline,
|
||||
AS_HELP_STRING([--enable-pcretest-libreadline],
|
||||
[link pcretest with libreadline]),
|
||||
, enable_pcretest_libreadline=no)
|
||||
|
||||
# Handle --with-posix-malloc-threshold=NBYTES
|
||||
AC_ARG_WITH(posix-malloc-threshold,
|
||||
AS_HELP_STRING([--with-posix-malloc-threshold=NBYTES],
|
||||
[threshold for POSIX malloc usage (default=10)]),
|
||||
, with_posix_malloc_threshold=10)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
AS_HELP_STRING([--with-link-size=N],
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
[default limit on internal looping (default=10000000)]),
|
||||
, with_match_limit=10000000)
|
||||
|
||||
# Handle --with-match-limit_recursion=N
|
||||
#
|
||||
# Note: In config.h, the default is to define MATCH_LIMIT_RECURSION
|
||||
# symbolically as MATCH_LIMIT, which in turn is defined to be some numeric
|
||||
# value (e.g. 10000000). MATCH_LIMIT_RECURSION can otherwise be set to some
|
||||
# different numeric value (or even the same numeric value as MATCH_LIMIT,
|
||||
# though no longer defined in terms of the latter).
|
||||
#
|
||||
AC_ARG_WITH(match-limit-recursion,
|
||||
AS_HELP_STRING([--with-match-limit-recursion=N],
|
||||
[default limit on internal recursion (default=MATCH_LIMIT)]),
|
||||
, with_match_limit_recursion=MATCH_LIMIT)
|
||||
|
||||
# Make sure that if enable_unicode_properties was set, that UTF-8 support
|
||||
# is enabled.
|
||||
#
|
||||
if test "x$enable_unicode_properties" = "xyes"
|
||||
then
|
||||
if test "x$enable_utf8" = "xno"
|
||||
then
|
||||
AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
|
||||
fi
|
||||
enable_utf8=yes
|
||||
fi
|
||||
|
||||
if test "x$enable_utf8" = "xunset"
|
||||
then
|
||||
enable_utf8=no
|
||||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF-8 support is not requested, because PCRE cannot handle
|
||||
# EBCDIC and UTF-8 in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"
|
||||
then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_utf8" = "xyes"
|
||||
then
|
||||
AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value.
|
||||
case "$enable_newline" in
|
||||
lf) ac_pcre_newline_value=10 ;;
|
||||
cr) ac_pcre_newline_value=13 ;;
|
||||
crlf) ac_pcre_newline_value=3338 ;;
|
||||
anycrlf) ac_pcre_newline_value=-2 ;;
|
||||
any) ac_pcre_newline_value=-1 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
|
||||
;;
|
||||
esac
|
||||
|
||||
# Check argument to --with-link-size
|
||||
case "$with_link_size" in
|
||||
2|3|4) ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
|
||||
;;
|
||||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
||||
should copy the distributed config.h.generic to config.h, and then set up the
|
||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
||||
all of your compile commands, so that config.h is included at the start of
|
||||
every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */])
|
||||
|
||||
AC_DEFUN([AX_COMPILER_VENDOR],
|
||||
[
|
||||
AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor,
|
||||
[ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown
|
||||
# note: don't check for gcc first since some other compilers define __GNUC__
|
||||
for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do
|
||||
vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")"
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[
|
||||
#if !($vencpp)
|
||||
thisisanerror;
|
||||
#endif
|
||||
])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break])
|
||||
done
|
||||
])
|
||||
])
|
||||
|
||||
AX_COMPILER_VENDOR
|
||||
|
||||
# Enable 64 bit build
|
||||
AC_ARG_ENABLE(64,
|
||||
[AC_HELP_STRING([--enable-64],[build with 64 bit support])],[enable_64="$enable_64"],[enable_64="no"])
|
||||
|
||||
if test "x${ax_cv_c_compiler_vendor}" = "xsun" ; then
|
||||
if test "${enable_64}" = "yes"; then
|
||||
CFLAGS="$CFLAGS -m64"
|
||||
CXXFLAGS="$CXXFLAGS -m64"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Checks for header files.
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
|
||||
|
||||
# The files below are C++ header files.
|
||||
pcre_have_type_traits="0"
|
||||
pcre_have_bits_type_traits="0"
|
||||
if test "x$enable_cpp" = "xyes" -a -n "$CXX"
|
||||
then
|
||||
AC_LANG_PUSH(C++)
|
||||
|
||||
# Older versions of pcre defined pcrecpp::no_arg, but in new versions
|
||||
# it's called pcrecpp::RE::no_arg. For backwards ABI compatibility,
|
||||
# we want to make one an alias for the other. Different systems do
|
||||
# this in different ways. Some systems, for instance, can do it via
|
||||
# a linker flag: -alias (for os x 10.5) or -i (for os x <=10.4).
|
||||
OLD_LDFLAGS="$LDFLAGS"
|
||||
for flag in "-alias,__ZN7pcrecpp2RE6no_argE,__ZN7pcrecpp6no_argE" \
|
||||
"-i__ZN7pcrecpp6no_argE:__ZN7pcrecpp2RE6no_argE"; do
|
||||
AC_MSG_CHECKING([for alias support in the linker])
|
||||
LDFLAGS="$OLD_LDFLAGS -Wl,$flag"
|
||||
# We try to run the linker with this new ld flag. If the link fails,
|
||||
# we give up and remove the new flag from LDFLAGS.
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[namespace pcrecpp {
|
||||
class RE { static int no_arg; };
|
||||
int RE::no_arg;
|
||||
}]],
|
||||
[[]])],
|
||||
[AC_MSG_RESULT([yes]);
|
||||
EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag";
|
||||
break;],
|
||||
AC_MSG_RESULT([no]))
|
||||
done
|
||||
LDFLAGS="$OLD_LDFLAGS"
|
||||
|
||||
# We could be more clever here, given we're doing AC_SUBST with this
|
||||
# (eg set a var to be the name of the include file we want). But we're not
|
||||
# so it's easy to change back to 'regular' autoconf vars if we needed to.
|
||||
AC_CHECK_HEADERS(string, [pcre_have_cpp_headers="1"],
|
||||
[pcre_have_cpp_headers="0"])
|
||||
AC_CHECK_HEADERS(bits/type_traits.h, [pcre_have_bits_type_traits="1"],
|
||||
[pcre_have_bits_type_traits="0"])
|
||||
AC_CHECK_HEADERS(type_traits.h, [pcre_have_type_traits="1"],
|
||||
[pcre_have_type_traits="0"])
|
||||
|
||||
AC_LANG_POP
|
||||
fi
|
||||
# Using AC_SUBST eliminates the need to include config.h in a public .h file
|
||||
AC_SUBST(pcre_have_type_traits)
|
||||
AC_SUBST(pcre_have_bits_type_traits)
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
||||
AC_C_CONST
|
||||
AC_TYPE_SIZE_T
|
||||
|
||||
pcre_have_strotolonglong=0
|
||||
AC_CHECK_FUNCS(strtoq strtoll _strtoi64, [pcre_have_strotolonglong="1"; break])
|
||||
# If we can't convert a string to a long long, pretend we don't even
|
||||
# have a long long.
|
||||
if test $pcre_have_strotolonglong = "0"; then
|
||||
pcre_have_long_long="0"
|
||||
pcre_have_ulong_long="0"
|
||||
else
|
||||
AC_CHECK_TYPES([long long],
|
||||
[pcre_have_long_long="1"],
|
||||
[pcre_have_long_long="0"])
|
||||
AC_CHECK_TYPES([unsigned long long],
|
||||
[pcre_have_ulong_long="1"],
|
||||
[pcre_have_ulong_long="0"])
|
||||
fi
|
||||
AC_SUBST(pcre_have_long_long)
|
||||
AC_SUBST(pcre_have_ulong_long)
|
||||
|
||||
# Checks for library functions.
|
||||
|
||||
AC_CHECK_FUNCS(bcopy memmove strerror)
|
||||
|
||||
# Check for the availability of libz (aka zlib)
|
||||
|
||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||
|
||||
# Check for the availability of libbz2
|
||||
|
||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||
AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [HAVE_LIB_READLINE=1])
|
||||
|
||||
# This facilitates -ansi builds under Linux
|
||||
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
|
||||
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
AC_DEFINE([PCRE_STATIC], [1], [
|
||||
Define if linking statically (TODO: make nice with Libtool)])
|
||||
fi
|
||||
|
||||
# Here is where pcre specific defines are handled
|
||||
|
||||
if test "$enable_utf8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UTF8], [], [
|
||||
Define to enable support for the UTF-8 Unicode encoding. This will
|
||||
work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
|
||||
*or* ASCII/UTF-8, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_unicode_properties" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UCP], [], [
|
||||
Define to enable support for Unicode properties])
|
||||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
AC_DEFINE([NO_RECURSE], [], [
|
||||
PCRE uses recursive function calls to handle backtracking while
|
||||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define NO_RECURSE to get a version that
|
||||
doesn't use recursion in the match() function; instead it creates
|
||||
its own stack by steam using pcre_recurse_malloc() to obtain memory
|
||||
from the heap. For more detail, see the comments and other stuff
|
||||
just above the match() function. On systems that support it,
|
||||
"configure" can be used to set this in the Makefile
|
||||
(use --disable-stack-for-recursion).])
|
||||
fi
|
||||
|
||||
if test "$enable_pcregrep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to allow pcregrep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcregrep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to allow pcregrep to be linked with libbz2, so that it is
|
||||
able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to allow pcretest to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
|
||||
The value of NEWLINE determines the newline character sequence. On
|
||||
systems that support it, "configure" can be used to override the
|
||||
default, which is 10. The possible values are 10 (LF), 13 (CR),
|
||||
3338 (CRLF), -1 (ANY), or -2 (ANYCRLF).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
||||
changed so that backslash-R matches only CR, LF, or CRLF. The build-
|
||||
time default can be overridden by the user of PCRE at runtime. On
|
||||
systems that support it, "configure" can be used to override the
|
||||
default.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 64K long. This covers the vast
|
||||
majority of cases. However, PCRE can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases. On
|
||||
systems that support it, "configure" can be used to override this default.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
||||
When calling PCRE via the POSIX interface, additional working storage
|
||||
is required for holding the pointers to capturing substrings because
|
||||
PCRE requires three integers per substring, whereas the POSIX
|
||||
interface provides only two. If the number of expected substrings is
|
||||
small, the wrapper function uses space on the stack, because this is
|
||||
faster than using malloc() for each call. The threshold above which
|
||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD. On
|
||||
systems that support it, "configure" can be used to override this
|
||||
default.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular
|
||||
expressions that take for ever to determine that they do not match.
|
||||
The default is set very large so that it does not accidentally catch
|
||||
legitimate cases. On systems that support it, "configure" can be
|
||||
used to override this default default.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
||||
The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable
|
||||
to limit the depth of recursive calls of match() more strictly, in
|
||||
order to restrict the maximum amount of stack (or heap, if
|
||||
NO_RECURSE is defined) that is used. The value of
|
||||
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||
have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
||||
There is a runtime method for setting a different limit. On systems
|
||||
that support it, "configure" can be used to override the default.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AH_VERBATIM([PCRE_EXP_DEFN], [
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, it
|
||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
||||
compiler on non-Win32 systems. This macro apears at the start of
|
||||
every exported function that is part of the external API. It does
|
||||
not appear on functions that are "external" in the C sense, but
|
||||
which are internal to the library. */
|
||||
#undef PCRE_EXP_DEFN])
|
||||
|
||||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use
|
||||
"configure", this can be done via --enable-ebcdic. PCRE will then
|
||||
assume that all input strings are in EBCDIC. If you do not define
|
||||
this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
|
||||
It is not possible to build a version of PCRE that supports both
|
||||
EBCDIC and UTF-8.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
NO_UNDEFINED=
|
||||
EXPORT_ALL_SYMBOLS=
|
||||
case $host_os in
|
||||
cygwin* | mingw* )
|
||||
if test X"$enable_shared" = Xyes; then
|
||||
NO_UNDEFINED="-no-undefined"
|
||||
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# The extra LDFLAGS for each particular library
|
||||
# (Note: The libpcre*_version bits are m4 variables, assigned above)
|
||||
|
||||
EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre_version"
|
||||
|
||||
EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcreposix_version"
|
||||
|
||||
EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcrecpp_version \
|
||||
$EXPORT_ALL_SYMBOLS"
|
||||
|
||||
AC_SUBST(EXTRA_LIBPCRE_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS)
|
||||
|
||||
# When we run 'make distcheck', use these arguments.
|
||||
DISTCHECK_CONFIGURE_FLAGS="--enable-cpp --enable-unicode-properties"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcregrep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcregrep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcretest-readline
|
||||
|
||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
LIBREADLINE="-lreadline"
|
||||
fi
|
||||
AC_SUBST(LIBREADLINE)
|
||||
|
||||
# Produce these files, in addition to config.h.
|
||||
AC_CONFIG_FILES(
|
||||
Makefile
|
||||
libpcre.pc
|
||||
libpcrecpp.pc
|
||||
pcre-config
|
||||
pcre.h
|
||||
pcre_stringpiece.h
|
||||
pcrecpparg.h
|
||||
)
|
||||
|
||||
# Make the generated script files executable.
|
||||
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre-config])
|
||||
|
||||
# Make sure that pcre_chartables.c is removed in case the method for
|
||||
# creating it was changed by reconfiguration.
|
||||
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre_chartables.c])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
# Print out a nice little message after configure is run displaying your
|
||||
# chosen options.
|
||||
|
||||
cat <<EOF
|
||||
|
||||
$PACKAGE-$VERSION configuration summary:
|
||||
|
||||
Install prefix .................. : ${prefix}
|
||||
C preprocessor .................. : ${CPP}
|
||||
C compiler ...................... : ${CC}
|
||||
C++ preprocessor ................ : ${CXXCPP}
|
||||
C++ compiler .................... : ${CXX}
|
||||
Linker .......................... : ${LD}
|
||||
C preprocessor flags ............ : ${CPPFLAGS}
|
||||
C compiler flags ................ : ${CFLAGS}
|
||||
C++ compiler flags .............. : ${CXXFLAGS}
|
||||
Linker flags .................... : ${LDFLAGS}
|
||||
Extra libraries ................. : ${LIBS}
|
||||
|
||||
Build C++ library ............... : ${enable_cpp}
|
||||
Enable UTF-8 support ............ : ${enable_utf8}
|
||||
Unicode properties .............. : ${enable_unicode_properties}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||
Internal link size .............. : ${with_link_size}
|
||||
Match limit ..................... : ${with_match_limit}
|
||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
Build static libs ............... : ${enable_static}
|
||||
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
|
||||
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
|
||||
Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
|
||||
|
||||
EOF
|
||||
|
||||
dnl end configure.ac
|
|
@ -1,4 +0,0 @@
|
|||
#! /bin/sh
|
||||
srcpath=$(dirname $0 2>/dev/null ) || srcpath="."
|
||||
$srcpath/configure "$@" --disable-shared --with-pic --disable-cpp
|
||||
|
|
@ -1,589 +0,0 @@
|
|||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2007-03-29.01
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software
|
||||
# Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
# 02110-1301, USA.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by `PROGRAMS ARGS'.
|
||||
object Object file output by `PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputing dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say).
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||
## The second -e expression handles DOS-style file names with drive letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the `deleted header file' problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" |
|
||||
## Some versions of gcc put a space before the `:'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like `#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||
tr '
|
||||
' ' ' >> $depfile
|
||||
echo >> $depfile
|
||||
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> $depfile
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts `$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form `foo.o: dependent.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
# That's a tab and a space in the [].
|
||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
icc)
|
||||
# Intel's C compiler understands `-MD -MF file'. However on
|
||||
# icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||
# ICC 7.0 will fill foo.d with something like
|
||||
# foo.o: sub/foo.c
|
||||
# foo.o: sub/foo.h
|
||||
# which is wrong. We want:
|
||||
# sub/foo.o: sub/foo.c
|
||||
# sub/foo.o: sub/foo.h
|
||||
# sub/foo.c:
|
||||
# sub/foo.h:
|
||||
# ICC 7.1 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using \ :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
|
||||
sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add `dependent.h:' lines.
|
||||
sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in `foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# With Tru64 cc, shared objects can also be used to make a
|
||||
# static library. This mechanism is used in libtool 1.4 series to
|
||||
# handle both shared and static libraries in a single compilation.
|
||||
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
||||
#
|
||||
# With libtool 1.5 this exception was removed, and libtool now
|
||||
# generates 2 separate objects for the 2 libraries. These two
|
||||
# compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
||||
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
||||
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.o.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
tmpdepfile4=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
# That's a tab and a space in the [].
|
||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test $1 != '--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove `-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for `:'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test $1 != '--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no
|
||||
for arg in "$@"; do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix="`echo $object | sed 's/^.*\././'`"
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
sed '1,2d' "$tmpdepfile" | tr ' ' '
|
||||
' | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test $1 != '--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove `-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E |
|
||||
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
||||
sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o,
|
||||
# because we must use -o when running libtool.
|
||||
"$@" || exit $?
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
|
||||
echo " " >> "$depfile"
|
||||
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-end: "$"
|
||||
# End:
|
|
@ -1,199 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE. The tables are built according to the current
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#define DFTABLES /* pcre_maketables.c notices this */
|
||||
#include "pcre_maketables.c"
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i = 1;
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
/* By default, the default C locale is used rather than what the building user
|
||||
happens to have set. However, if the -L option is given, set the locale from
|
||||
the LC_xxx environment variables. */
|
||||
|
||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
|
||||
{
|
||||
setlocale(LC_ALL, ""); /* Set from environment variables */
|
||||
i++;
|
||||
}
|
||||
|
||||
if (argc < i + 1)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
tables = pcre_maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are several fprintf() calls here, because gcc in pedantic mode
|
||||
complains about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256.\n\n");
|
||||
fprintf(f,
|
||||
"The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n"
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre_internal.h\"\n\n");
|
||||
fprintf(f,
|
||||
"const unsigned char _pcre_default_tables[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table contains bit maps for various character classes.\n"
|
||||
"Each map is 32 bytes long and the bits run from the least\n"
|
||||
"significant end of each byte. The classes that have their own\n"
|
||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x hexadecimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n"
|
||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||
ctype_meta);
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
|
@ -1,348 +0,0 @@
|
|||
Technical Notes about PCRE
|
||||
--------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE internals.
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
Perl code does, but instead checked all possibilities simultaneously by keeping
|
||||
a list of current states and checking all of them as it advanced through the
|
||||
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
|
||||
algorithm". When the pattern was all used up, all remaining states were
|
||||
possible matches, and the one matching the longest subset of the subject string
|
||||
was chosen. This did not necessarily maximize the individual wild portions of
|
||||
the pattern, as is expected in Unix and Perl-style regular expressions.
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that form the "basic" PCRE library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is needed, for a number of reasons. PCRE works
|
||||
by running a very degenerate first pass to calculate a maximum store size, and
|
||||
then a second pass to do the real compile - which may use a bit less than the
|
||||
predicted amount of store. The idea is that this is going to turn out faster
|
||||
because the first pass is degenerate and the second pass can just store stuff
|
||||
straight into the vector, which it knows is big enough. It does make the
|
||||
compiling functions bigger, of course, but they have become quite big anyway to
|
||||
handle all the Perl stuff.
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre_exec(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. Not surprising, since it is intended to be as
|
||||
compatible with Perl as possible. This is the function most users of PCRE will
|
||||
use most of the time.
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
From PCRE 6.0, there is also a supplementary matching function called
|
||||
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
|
||||
simultaneously for all possible matches that start at one point in the subject
|
||||
string. (Going back to my roots: see Historical Note 1 above.) This function
|
||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the
|
||||
facilities are available, and those that are do not always work in quite the
|
||||
same way. See the user documentation for details.
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of bytes, containing items of
|
||||
variable length. The first byte in an item is an opcode, and the length of the
|
||||
item is either implicit in the opcode or contained in the data bytes that
|
||||
follow it.
|
||||
|
||||
In many cases below "two-byte" data values are specified. This is in fact just
|
||||
a default. PCRE can be compiled to use 3-byte or 4-byte values (impairing the
|
||||
performance). This is necessary only when patterns whose compiled length is
|
||||
greater than 64K are going to be processed. In this description, we assume the
|
||||
"normal" compilation options.
|
||||
|
||||
A list of all the opcodes follows:
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one byte long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any character
|
||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_CIRC ^ (start of data, or after \n in multiline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or \n at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before \n in multiline)
|
||||
OP_EXTUNI match an extended Unicode character
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?) when applied to a single character use the
|
||||
following opcodes:
|
||||
|
||||
OP_STAR
|
||||
OP_MINSTAR
|
||||
OP_PLUS
|
||||
OP_MINPLUS
|
||||
OP_QUERY
|
||||
OP_MINQUERY
|
||||
|
||||
In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
|
||||
Those with "MIN" in their name are the minimizing versions. Each is followed by
|
||||
the character that is to be repeated. Other repeats make use of
|
||||
|
||||
OP_UPTO
|
||||
OP_MINUPTO
|
||||
OP_EXACT
|
||||
|
||||
which are followed by a two-byte count (most significant first) and the
|
||||
repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
|
||||
non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
|
||||
OP_UPTO (or OP_MINUPTO).
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type is stored in the data
|
||||
byte. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two bytes that encode the desired property as a type and a
|
||||
value.
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the
|
||||
character may be more than one byte long. (Earlier versions of PCRE used
|
||||
multi-character strings, but this was changed to allow some new features to be
|
||||
added.)
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character, OP_CHAR or OP_CHARNC is used for a positive
|
||||
class, and OP_NOT for a negative one (that is, for something like [^a]).
|
||||
However, in UTF-8 mode, the use of OP_NOT applies only to characters with
|
||||
values < 128, because OP_NOT is confined to single bytes.
|
||||
|
||||
Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
|
||||
negated, single-character class. The normal ones (OP_STAR etc.) are used for a
|
||||
repeated positive single-character class.
|
||||
|
||||
When there's more than one character in a class and all the characters are less
|
||||
than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative
|
||||
one. In either case, the opcode is followed by a 32-byte bit map containing a 1
|
||||
bit for every character that is acceptable. The bits are counted from the least
|
||||
significant end of each byte.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode,
|
||||
subject characters with values greater than 256 can be handled correctly. For
|
||||
OP_CLASS they don't match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values > 255, OP_XCLASS is used. It
|
||||
optionally uses a bit map (if any characters lie within it), followed by a list
|
||||
of pairs and single characters. There is a flag character than indicates
|
||||
whether it's a positive or a negative class.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF is followed by two bytes containing the reference number.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This applies to
|
||||
OP_CLASS and OP_REF. In both cases, the repeat information follows the base
|
||||
item. The matching code looks at the following opcode to see if it is one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
|
||||
All but the last two are just single-byte items. The others are followed by
|
||||
four bytes of data, comprising the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA, while capturing brackets use
|
||||
OP_BRA+1, OP_BRA+2, etc. [Note for North Americans: "bracket" to some English
|
||||
speakers, including myself, can be round, square, curly, or pointy. Hence this
|
||||
usage.]
|
||||
|
||||
Originally PCRE was limited to 99 capturing brackets (so as not to use up all
|
||||
the opcodes). From release 3.5, there is no limit. What happens is that the
|
||||
first ones, up to EXTRACT_BASIC_MAX are handled with separate opcodes, as
|
||||
above. If there are more, the opcode is set to EXTRACT_BASIC_MAX+1, and the
|
||||
first operation in the bracket is OP_BRANUMBER, followed by a 2-byte bracket
|
||||
number. This opcode is ignored while matching, but is fished out when handling
|
||||
the bracket itself. (They could have all been done like this, but I was making
|
||||
minimal changes.)
|
||||
|
||||
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, while
|
||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
||||
maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
|
||||
positive number) the offset back to the matching OP_BRA opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO or OP_BRAMINZERO. These are single-byte
|
||||
opcodes which tell the matcher that skipping this subpattern entirely is a
|
||||
valid branch.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are just like other subpatterns, but starting with one of
|
||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a two byte count of the number of characters to move
|
||||
back the pointer in the subject string. When operating in UTF-8 mode, the count
|
||||
is a character count rather than a byte count. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Once-only subpatterns
|
||||
---------------------
|
||||
|
||||
These are also just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by two bytes containing the
|
||||
reference number. If the condition is "in recursion" (coded as "(?(R)"), the
|
||||
same scheme is used, with a "reference number" of 0xffff. Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
||||
are not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
OP_CALLOUT is followed by one byte of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a two-byte value giving the offset in the pattern to the
|
||||
start of the following item, and another two-byte item giving the length of the
|
||||
next item.
|
||||
|
||||
|
||||
Changing options
|
||||
----------------
|
||||
|
||||
If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
|
||||
opcode is compiled, followed by one byte containing the new settings of these
|
||||
flags. If there are several alternatives, there is an occurrence of OP_OPT at
|
||||
the start of all those following the first options change, to set appropriate
|
||||
options for the start of the alternative. Immediately after the end of the
|
||||
group there is another such item to reset the flags to their previous values. A
|
||||
change of flag right at the very start of the pattern can be handled entirely
|
||||
at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
June 2006
|
|
@ -1,140 +0,0 @@
|
|||
<html>
|
||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||
the PCRE documentation. When the HTML documents are built from the man
|
||||
page versions, the entire doc/html directory is emptied, this file is then
|
||||
copied into doc/html/index.html, and the remaining files therein are
|
||||
created by the 132html script.
|
||||
-->
|
||||
<head>
|
||||
<title>PCRE specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||
<p>
|
||||
The HTML documentation for PCRE comprises the following pages:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr><td><a href="pcre.html">pcre</a></td>
|
||||
<td> Introductory page</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||
<td> PCRE's native API</td></tr>
|
||||
|
||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||
<td> Options for building PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||
<td> The <i>callout</i> facility</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecompat.html">pcrecompat</a></td>
|
||||
<td> Compability with Perl</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
||||
<td> The C++ wrapper for the PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
||||
<td> The <b>pcregrep</b> command</td></tr>
|
||||
|
||||
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
||||
<td> Discussion of the two matching algorithms</td></tr>
|
||||
|
||||
<tr><td><a href="pcrepartial.html">pcrepartial</a></td>
|
||||
<td> Using PCRE for partial matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcrepattern.html">pcrepattern</a></td>
|
||||
<td> Specification of the regular expressions supported by PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcreperform.html">pcreperform</a></td>
|
||||
<td> Some comments on performance</td></tr>
|
||||
|
||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||
<td> The POSIX API to the PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||
<td> How to save and re-use compiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcresample.html">pcresample</a></td>
|
||||
<td> Description of the sample program</td></tr>
|
||||
|
||||
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
||||
<td> Discussion of PCRE's stack usage</td></tr>
|
||||
|
||||
<tr><td><a href="pcresyntax.html">pcresyntax</a></td>
|
||||
<td> Syntax quick-reference summary</td></tr>
|
||||
|
||||
<tr><td><a href="pcretest.html">pcretest</a></td>
|
||||
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
There are also individual pages that summarize the interface for each function
|
||||
in the library:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
|
||||
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
||||
<td> Compile a regular expression</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_compile2.html">pcre_compile2</a></td>
|
||||
<td> Compile a regular expression (alternate interface)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_config.html">pcre_config</a></td>
|
||||
<td> Show build-time configuration options</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_copy_named_substring.html">pcre_copy_named_substring</a></td>
|
||||
<td> Extract named substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_copy_substring.html">pcre_copy_substring</a></td>
|
||||
<td> Extract numbered substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_dfa_exec.html">pcre_dfa_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||
<td> Free extracted substring</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring_list.html">pcre_free_substring_list</a></td>
|
||||
<td> Free list of extracted substrings</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_fullinfo.html">pcre_fullinfo</a></td>
|
||||
<td> Extract information about a pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_named_substring.html">pcre_get_named_substring</a></td>
|
||||
<td> Extract named substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||
<td> Convert captured string name to number</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||
<td> Extract numbered substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||
<td> Extract all substrings into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||
<td> Obsolete information extraction function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
||||
<td> Build character tables in current locale</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
||||
<td> Maintain reference count in compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
||||
<td> Study a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
||||
<td> Return PCRE version and release date</td></tr>
|
||||
</table>
|
||||
|
||||
</html>
|
|
@ -1,88 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre-config specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre-config man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
|
||||
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
||||
<b>[--libs-posix] [--cflags] [--cflags-posix]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
<b>pcre-config</b> returns the configuration of the installed PCRE
|
||||
libraries and the options required to compile a program to use them.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
<b>--prefix</b>
|
||||
Writes the directory prefix used in the PCRE installation for architecture
|
||||
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
|
||||
systems) to the standard output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exec-prefix</b>
|
||||
Writes the directory prefix used in the PCRE installation for architecture
|
||||
dependent files (normally the same as <b>--prefix</b>) to the standard output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--version</b>
|
||||
Writes the version number of the installed PCRE libraries to the standard
|
||||
output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs</b>
|
||||
Writes to the standard output the command line options required to link
|
||||
with PCRE (<b>-lpcre</b> on many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs-posix</b>
|
||||
Writes to the standard output the command line options required to link with
|
||||
the PCRE posix emulation library (<b>-lpcreposix</b> <b>-lpcre</b> on many
|
||||
systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--cflags</b>
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use PCRE (this may include some <b>-I</b> options, but is blank on
|
||||
many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--cflags-posix</b>
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use the PCRE posix emulation library (this may include some <b>-I</b>
|
||||
options, but is blank on many systems).
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre(3)</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
||||
system. It has been slightly revised as a generic PCRE man page.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 18 April 2007
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,306 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
|
||||
<li><a name="TOC2" href="#SEC2">USER DOCUMENTATION</a>
|
||||
<li><a name="TOC3" href="#SEC3">LIMITATIONS</a>
|
||||
<li><a name="TOC4" href="#SEC4">UTF-8 AND UNICODE PROPERTY SUPPORT</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
|
||||
<P>
|
||||
The PCRE library is a set of functions that implement regular expression
|
||||
pattern matching using the same syntax and semantics as Perl, with just a few
|
||||
differences. Certain features that appeared in Python and PCRE before they
|
||||
appeared in Perl are also available using the Python syntax. There is also some
|
||||
support for certain .NET and Oniguruma syntax items, and there is an option for
|
||||
requesting some minor changes that give better JavaScript compatibility.
|
||||
</P>
|
||||
<P>
|
||||
The current implementation of PCRE (release 7.x) corresponds approximately with
|
||||
Perl 5.10, including support for UTF-8 encoded strings and Unicode general
|
||||
category properties. However, UTF-8 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 5.1.
|
||||
</P>
|
||||
<P>
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
alternative matching function that matches the same compiled patterns in a
|
||||
different way. In certain circumstances, the alternative function has some
|
||||
advantages. For a discussion of the two matching algorithms, see the
|
||||
<a href="pcrematching.html"><b>pcrematching</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
PCRE is written in C and released as a C library. A number of people have
|
||||
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
||||
have provided a comprehensive C++ wrapper. This is now included as part of the
|
||||
PCRE distribution. The
|
||||
<a href="pcrecpp.html"><b>pcrecpp</b></a>
|
||||
page has details of this interface. Other people's contributions can be found
|
||||
in the <i>Contrib</i> directory at the primary FTP site, which is:
|
||||
<a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre</a>
|
||||
</P>
|
||||
<P>
|
||||
Details of exactly which Perl regular expression features are and are not
|
||||
supported by PCRE are given in separate documents. See the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
and
|
||||
<a href="pcrecompat.html"><b>pcrecompat</b></a>
|
||||
pages. There is a syntax summary in the
|
||||
<a href="pcresyntax.html"><b>pcresyntax</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
Some features of PCRE can be included, excluded, or changed when the library is
|
||||
built. The
|
||||
<a href="pcre_config.html"><b>pcre_config()</b></a>
|
||||
function makes it possible for a client to discover which features are
|
||||
available. The features themselves are described in the
|
||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||
page. Documentation about building PCRE for various operating systems can be
|
||||
found in the <b>README</b> file in the source distribution.
|
||||
</P>
|
||||
<P>
|
||||
The library contains a number of undocumented internal functions and data
|
||||
tables that are used by more than one of the exported external functions, but
|
||||
which are not intended for use by external callers. Their names all begin with
|
||||
"_pcre_", which hopefully will not provoke any name clashes. In some
|
||||
environments, it is possible to control which external symbols are exported
|
||||
when a shared library is built, and in these cases the undocumented symbols are
|
||||
not exported.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">USER DOCUMENTATION</a><br>
|
||||
<P>
|
||||
The user documentation for PCRE comprises a number of different sections. In
|
||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||
each is a separate page, linked from the index page. In the plain text format,
|
||||
all the sections are concatenated, for ease of searching. The sections are as
|
||||
follows:
|
||||
<pre>
|
||||
pcre this document
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcreapi details of PCRE's native C API
|
||||
pcrebuild options for building PCRE
|
||||
pcrecallout details of the callout feature
|
||||
pcrecompat discussion of Perl compatibility
|
||||
pcrecpp details of the C++ wrapper
|
||||
pcregrep description of the <b>pcregrep</b> command
|
||||
pcrematching discussion of the two matching algorithms
|
||||
pcrepartial details of the partial matching facility
|
||||
pcrepattern syntax and semantics of supported regular expressions
|
||||
pcresyntax quick syntax reference
|
||||
pcreperform discussion of performance issues
|
||||
pcreposix the POSIX-compatible C API
|
||||
pcreprecompile details of saving and re-using precompiled patterns
|
||||
pcresample discussion of the sample program
|
||||
pcrestack discussion of stack usage
|
||||
pcretest description of the <b>pcretest</b> testing command
|
||||
</pre>
|
||||
In addition, in the "man" and HTML formats, there is a short page for each
|
||||
C library function, listing its arguments and results.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">LIMITATIONS</a><br>
|
||||
<P>
|
||||
There are some size limitations in PCRE but it is hoped that they will never in
|
||||
practice be relevant.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
|
||||
compiled with the default internal linkage size of 2. If you want to process
|
||||
regular expressions that are truly enormous, you can compile PCRE with an
|
||||
internal linkage size of 3 or 4 (see the <b>README</b> file in the source
|
||||
distribution and the
|
||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||
documentation for details). In these cases the limit is substantially larger.
|
||||
However, the speed of execution is slower.
|
||||
</P>
|
||||
<P>
|
||||
All values in repeating quantifiers must be less than 65536.
|
||||
</P>
|
||||
<P>
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of name for a named subpattern is 32 characters, and the
|
||||
maximum number of named subpatterns is 10000.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a subject string is the largest positive number that an
|
||||
integer variable can hold. However, when using the traditional matching
|
||||
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
|
||||
This means that the available stack space may limit the size of a subject
|
||||
string that can be processed by certain patterns. For a discussion of stack
|
||||
issues, see the
|
||||
<a href="pcrestack.html"><b>pcrestack</b></a>
|
||||
documentation.
|
||||
<a name="utf8support"></a></P>
|
||||
<br><a name="SEC4" href="#TOC1">UTF-8 AND UNICODE PROPERTY SUPPORT</a><br>
|
||||
<P>
|
||||
From release 3.3, PCRE has had some support for character strings encoded in
|
||||
the UTF-8 format. For release 4.0 this was greatly extended to cover most
|
||||
common requirements, and in release 5.0 additional support for Unicode general
|
||||
category properties was added.
|
||||
</P>
|
||||
<P>
|
||||
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
|
||||
the code, and, in addition, you must call
|
||||
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
|
||||
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
|
||||
(*UTF8). When either of these is the case, both the pattern and any subject
|
||||
strings that are matched against it are treated as UTF-8 strings instead of
|
||||
just strings of bytes.
|
||||
</P>
|
||||
<P>
|
||||
If you compile PCRE with UTF-8 support, but do not use it at run time, the
|
||||
library will be a bit bigger, but the additional run time overhead is limited
|
||||
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
|
||||
</P>
|
||||
<P>
|
||||
If PCRE is built with Unicode character property support (which implies UTF-8
|
||||
support), the escape sequences \p{..}, \P{..}, and \X are supported.
|
||||
The available properties that can be tested are limited to the general
|
||||
category properties such as Lu for an upper case letter or Nd for a decimal
|
||||
number, the Unicode script names such as Arabic or Han, and the derived
|
||||
properties Any and L&. A full list is given in the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
documentation. Only the short names for properties are supported. For example,
|
||||
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
|
||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||
compatibility with Perl 5.6. PCRE does not support this.
|
||||
<a name="utf8strings"></a></P>
|
||||
<br><b>
|
||||
Validity of UTF-8 strings
|
||||
</b><br>
|
||||
<P>
|
||||
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions. From
|
||||
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
|
||||
themselves derived from the Unicode specification. Earlier releases of PCRE
|
||||
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
|
||||
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
|
||||
U+10FFFF, excluding U+D800 to U+DFFF.
|
||||
</P>
|
||||
<P>
|
||||
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
|
||||
Unicode Standard says this: "The Low Surrogate Area does not contain any
|
||||
character assignments, consequently no character code charts or namelists are
|
||||
provided for this area. Surrogates are reserved for use with UTF-16 and then
|
||||
must be used in pairs." The code points that are encoded by UTF-16 pairs are
|
||||
available as independent code points in the UTF-8 encoding. (In other words,
|
||||
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
|
||||
UTF-8.)
|
||||
</P>
|
||||
<P>
|
||||
If an invalid UTF-8 string is passed to PCRE, an error return
|
||||
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
|
||||
your strings are valid, and therefore want to skip these checks in order to
|
||||
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
|
||||
at run time, PCRE assumes that the pattern or subject it is given
|
||||
(respectively) contains only valid UTF-8 codes. In this case, it does not
|
||||
diagnose an invalid UTF-8 string.
|
||||
</P>
|
||||
<P>
|
||||
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
|
||||
happens depends on why the string is invalid. If the string conforms to the
|
||||
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
|
||||
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
|
||||
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
|
||||
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
|
||||
the result is undefined. Your program may crash.
|
||||
</P>
|
||||
<P>
|
||||
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
|
||||
encoded in a UTF-8-like manner as per the old RFC, you can set
|
||||
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
|
||||
situation, you will have to apply your own validity check.
|
||||
</P>
|
||||
<br><b>
|
||||
General comments about UTF-8 mode
|
||||
</b><br>
|
||||
<P>
|
||||
1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
|
||||
UTF-8 character if the value is greater than 127.
|
||||
</P>
|
||||
<P>
|
||||
2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
|
||||
characters for values greater than \177.
|
||||
</P>
|
||||
<P>
|
||||
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
|
||||
bytes, for example: \x{100}{3}.
|
||||
</P>
|
||||
<P>
|
||||
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
|
||||
</P>
|
||||
<P>
|
||||
5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
|
||||
but its use can lead to some strange effects. This facility is not available in
|
||||
the alternative matching function, <b>pcre_dfa_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
|
||||
test characters of any code value, but the characters that PCRE recognizes as
|
||||
digits, spaces, or word characters remain the same set as before, all with
|
||||
values less than 256. This remains true even when PCRE includes Unicode
|
||||
property support, because to do otherwise would slow down PCRE in many common
|
||||
cases. If you really want to test for a wider sense of, say, "digit", you
|
||||
must use Unicode property tests such as \p{Nd}. Note that this also applies to
|
||||
\b, because it is defined in terms of \w and \W.
|
||||
</P>
|
||||
<P>
|
||||
7. Similarly, characters that match the POSIX named character classes are all
|
||||
low-valued characters.
|
||||
</P>
|
||||
<P>
|
||||
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
|
||||
(\h, \H, \v, and \V) do match all the appropriate Unicode characters.
|
||||
</P>
|
||||
<P>
|
||||
9. Case-insensitive matching applies only to characters whose values are less
|
||||
than 128, unless PCRE is built with Unicode property support. Even when Unicode
|
||||
property support is available, PCRE still uses its own character tables when
|
||||
checking the case of low-valued characters, so as not to degrade performance.
|
||||
The Unicode property information is used only for characters with higher
|
||||
values. Even when Unicode property support is available, PCRE supports
|
||||
case-insensitive matching only when there is a one-to-one mapping between a
|
||||
letter's cases. There are a small number of many-to-one mappings in Unicode;
|
||||
these are not supported by PCRE.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<P>
|
||||
Putting an actual email address here seems to have been a spam magnet, so I've
|
||||
taken it away. If you want to email me, use my two initials, followed by the
|
||||
two digits 10, at the domain cam.ac.uk.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 April 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,89 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_compile specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_compile man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function compiles a regular expression into an internal form. It is the
|
||||
same as <b>pcre_compile2()</b>, except for the absence of the <i>errorcodeptr</i>
|
||||
argument. Its arguments are:
|
||||
<pre>
|
||||
<i>pattern</i> A zero-terminated string containing the
|
||||
regular expression to be compiled
|
||||
<i>options</i> Zero or more option bits
|
||||
<i>errptr</i> Where to put an error message
|
||||
<i>erroffset</i> Offset in pattern where error was found
|
||||
<i>tableptr</i> Pointer to character tables, or NULL to
|
||||
use the built-in default
|
||||
</pre>
|
||||
The option bits are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Force pattern anchoring
|
||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||
PCRE_CASELESS Do caseless matching
|
||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE_DOTALL . matches anything including NL
|
||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE_EXTENDED Ignore whitespace and # comments
|
||||
PCRE_EXTRA PCRE extra features
|
||||
(not much use currently)
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||
PCRE_UTF8 Run in UTF-8 mode
|
||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||
validity (only relevant if
|
||||
PCRE_UTF8 is set)
|
||||
</pre>
|
||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
||||
PCRE_NO_UTF8_CHECK.
|
||||
</P>
|
||||
<P>
|
||||
The yield of the function is a pointer to a private data structure that
|
||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||
compiling regular expressions with one version of PCRE for use with a different
|
||||
version is not guaranteed to work and may cause crashes.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,89 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_compile2 specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_compile2 man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function compiles a regular expression into an internal form. It is the
|
||||
same as <b>pcre_compile()</b>, except for the addition of the <i>errorcodeptr</i>
|
||||
argument. The arguments are:
|
||||
</P>
|
||||
<P>
|
||||
<pre>
|
||||
<i>pattern</i> A zero-terminated string containing the
|
||||
regular expression to be compiled
|
||||
<i>options</i> Zero or more option bits
|
||||
<i>errorcodeptr</i> Where to put an error code
|
||||
<i>errptr</i> Where to put an error message
|
||||
<i>erroffset</i> Offset in pattern where error was found
|
||||
<i>tableptr</i> Pointer to character tables, or NULL to
|
||||
use the built-in default
|
||||
</pre>
|
||||
The option bits are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Force pattern anchoring
|
||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE_CASELESS Do caseless matching
|
||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE_DOTALL . matches anything including NL
|
||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE_EXTENDED Ignore whitespace and # comments
|
||||
PCRE_EXTRA PCRE extra features
|
||||
(not much use currently)
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||
PCRE_UTF8 Run in UTF-8 mode
|
||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||
validity (only relevant if
|
||||
PCRE_UTF8 is set)
|
||||
</pre>
|
||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
||||
PCRE_NO_UTF8_CHECK.
|
||||
</P>
|
||||
<P>
|
||||
The yield of the function is a pointer to a private data structure that
|
||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||
compiling regular expressions with one version of PCRE for use with a different
|
||||
version is not guaranteed to work and may cause crashes.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,70 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_config specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_config man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes it possible for a client program to find out which optional
|
||||
features are available in the version of the PCRE library it is using. Its
|
||||
arguments are as follows:
|
||||
<pre>
|
||||
<i>what</i> A code specifying what information is required
|
||||
<i>where</i> Points to where to put the data
|
||||
</pre>
|
||||
The available codes are:
|
||||
<pre>
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
PCRE_CONFIG_NEWLINE Value of the default newline sequence:
|
||||
13 (0x000d) for CR
|
||||
10 (0x000a) for LF
|
||||
3338 (0x0d0a) for CRLF
|
||||
-2 for ANYCRLF
|
||||
-1 for ANY
|
||||
PCRE_CONFIG_BSR Indicates what \R matches by default:
|
||||
0 all Unicode line endings
|
||||
1 CR, LF, or CRLF only
|
||||
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
||||
Threshold of return slots, above
|
||||
which <b>malloc()</b> is used by
|
||||
the POSIX API
|
||||
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
||||
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)
|
||||
PCRE_CONFIG_UNICODE_PROPERTIES
|
||||
Availability of Unicode property support
|
||||
(1=yes 0=no)
|
||||
</pre>
|
||||
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,53 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_copy_named_substring specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_copy_named_substring man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring, identified
|
||||
by name, into a given buffer. The arguments are:
|
||||
<pre>
|
||||
<i>code</i> Pattern that was successfully matched
|
||||
<i>subject</i> Subject that has been successfully matched
|
||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
||||
<i>stringname</i> Name of the required substring
|
||||
<i>buffer</i> Buffer to receive the string
|
||||
<i>buffersize</i> Size of buffer
|
||||
</pre>
|
||||
The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was
|
||||
too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,51 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_copy_substring specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_copy_substring man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring into a given
|
||||
buffer. The arguments are:
|
||||
<pre>
|
||||
<i>subject</i> Subject that has been successfully matched
|
||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
||||
<i>stringnumber</i> Number of the required substring
|
||||
<i>buffer</i> Buffer to receive the string
|
||||
<i>buffersize</i> Size of buffer
|
||||
</pre>
|
||||
The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was
|
||||
too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,98 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_dfa_exec specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_dfa_exec man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using an alternative matching algorithm that scans the subject string
|
||||
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
|
||||
matching function is <b>pcre_exec()</b>. The arguments for this function are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>extra</i> Points to an associated <b>pcre_extra</b> structure,
|
||||
or is NULL
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string, in bytes
|
||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
||||
start matching
|
||||
<i>options</i> Option bits
|
||||
<i>ovector</i> Points to a vector of ints for result offsets
|
||||
<i>ovecsize</i> Number of elements in the vector
|
||||
<i>workspace</i> Points to a vector of ints used as working space
|
||||
<i>wscount</i> Number of elements in the vector
|
||||
</pre>
|
||||
The options are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NOTBOL Subject is not the beginning of a line
|
||||
PCRE_NOTEOL Subject is not the end of a line
|
||||
PCRE_NOTEMPTY An empty string is not a valid match
|
||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||
validity (only relevant if PCRE_UTF8
|
||||
was set at compile time)
|
||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
||||
PCRE_DFA_SHORTEST Return only the shortest match
|
||||
PCRE_DFA_RESTART This is a restart after a partial match
|
||||
</pre>
|
||||
There are restrictions on what may appear in a pattern when using this matching
|
||||
function. Details are given in the
|
||||
<a href="pcrematching.html"><b>pcrematching</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
A <b>pcre_extra</b> structure contains the following fields:
|
||||
<pre>
|
||||
<i>flags</i> Bits indicating which fields are set
|
||||
<i>study_data</i> Opaque data from <b>pcre_study()</b>
|
||||
<i>match_limit</i> Limit on internal resource use
|
||||
<i>match_limit_recursion</i> Limit on internal recursion depth
|
||||
<i>callout_data</i> Opaque data passed back to callouts
|
||||
<i>tables</i> Points to character tables or is NULL
|
||||
</pre>
|
||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
||||
PCRE_EXTRA_TABLES. For this matching function, the <i>match_limit</i> and
|
||||
<i>match_limit_recursion</i> fields are not used, and must not be set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,91 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_exec specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_exec man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using a matching algorithm that is similar to Perl's. It returns
|
||||
offsets to captured substrings. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>extra</i> Points to an associated <b>pcre_extra</b> structure,
|
||||
or is NULL
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string, in bytes
|
||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
||||
start matching
|
||||
<i>options</i> Option bits
|
||||
<i>ovector</i> Points to a vector of ints for result offsets
|
||||
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
|
||||
</pre>
|
||||
The options are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NOTBOL Subject is not the beginning of a line
|
||||
PCRE_NOTEOL Subject is not the end of a line
|
||||
PCRE_NOTEMPTY An empty string is not a valid match
|
||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||
validity (only relevant if PCRE_UTF8
|
||||
was set at compile time)
|
||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
||||
</pre>
|
||||
There are restrictions on what may appear in a pattern when partial matching is
|
||||
requested. For details, see the
|
||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
A <b>pcre_extra</b> structure contains the following fields:
|
||||
<pre>
|
||||
<i>flags</i> Bits indicating which fields are set
|
||||
<i>study_data</i> Opaque data from <b>pcre_study()</b>
|
||||
<i>match_limit</i> Limit on internal resource use
|
||||
<i>match_limit_recursion</i> Limit on internal recursion depth
|
||||
<i>callout_data</i> Opaque data passed back to callouts
|
||||
<i>tables</i> Points to character tables or is NULL
|
||||
</pre>
|
||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
||||
PCRE_EXTRA_TABLES.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,40 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_free_substring specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_free_substring man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its
|
||||
only argument is a pointer to the string.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,40 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_free_substring_list specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_free_substring_list man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the
|
||||
list of string pointers.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,72 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_fullinfo specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_fullinfo man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns information about a compiled pattern. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>extra</i> Result of <b>pcre_study()</b> or NULL
|
||||
<i>what</i> What information is required
|
||||
<i>where</i> Where to put the information
|
||||
</pre>
|
||||
The following information is available:
|
||||
<pre>
|
||||
PCRE_INFO_BACKREFMAX Number of highest back reference
|
||||
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
||||
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
||||
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or
|
||||
-1 for start of string
|
||||
or after newline, or
|
||||
-2 otherwise
|
||||
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying)
|
||||
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE_INFO_LASTLITERAL Literal last byte required
|
||||
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
||||
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
||||
PCRE_INFO_NAMETABLE Pointer to name table
|
||||
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
||||
PCRE_INFO_OPTIONS Option bits used for compilation
|
||||
PCRE_INFO_SIZE Size of compiled pattern
|
||||
PCRE_INFO_STUDYSIZE Size of study data
|
||||
</pre>
|
||||
The yield of the function is zero on success or:
|
||||
<pre>
|
||||
PCRE_ERROR_NULL the argument <i>code</i> was NULL
|
||||
the argument <i>where</i> was NULL
|
||||
PCRE_ERROR_BADMAGIC the "magic number" was not found
|
||||
PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,55 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_get_named_substring specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_get_named_substring man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring by name. The
|
||||
arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled pattern
|
||||
<i>subject</i> Subject that has been successfully matched
|
||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
||||
<i>stringname</i> Name of the required substring
|
||||
<i>stringptr</i> Where to put the string pointer
|
||||
</pre>
|
||||
The memory in which the substring is placed is obtained by calling
|
||||
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
|
||||
be used to free it when it is no longer needed. The yield of the function is
|
||||
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
|
||||
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,49 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_get_stringnumber specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_get_stringnumber man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This convenience function finds the number of a named substring capturing
|
||||
parenthesis in a compiled pattern. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>name</i> Name whose number is required
|
||||
</pre>
|
||||
The yield of the function is the number of the parenthesis if the name is
|
||||
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
||||
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
|
||||
<b>pcre_get_stringnumber()</b>. You can obtain the complete list by calling
|
||||
<b>pcre_get_stringtable_entries()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,52 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_get_stringtable_entries specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_get_stringtable_entries man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This convenience function finds, for a compiled pattern, the first and last
|
||||
entries for a given name in the table that translates capturing parenthesis
|
||||
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
|
||||
<i>not</i> set), it is usually easier to use <b>pcre_get_stringnumber()</b>
|
||||
instead.
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>name</i> Name whose entries required
|
||||
<i>first</i> Where to return a pointer to the first entry
|
||||
<i>last</i> Where to return a pointer to the last entry
|
||||
</pre>
|
||||
The yield of the function is the length of each entry, or
|
||||
PCRE_ERROR_NOSUBSTRING if none are found.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API, including the format of
|
||||
the table entries, in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page, and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,53 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_get_substring specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_get_substring man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring. The
|
||||
arguments are:
|
||||
<pre>
|
||||
<i>subject</i> Subject that has been successfully matched
|
||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
||||
<i>stringnumber</i> Number of the required substring
|
||||
<i>stringptr</i> Where to put the string pointer
|
||||
</pre>
|
||||
The memory in which the substring is placed is obtained by calling
|
||||
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
|
||||
be used to free it when it is no longer needed. The yield of the function is
|
||||
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not
|
||||
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,53 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_get_substring_list specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_get_substring_list man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a list of all the captured
|
||||
substrings. The arguments are:
|
||||
<pre>
|
||||
<i>subject</i> Subject that has been successfully matched
|
||||
<i>ovector</i> Offset vector that <b>pcre_exec</b> used
|
||||
<i>stringcount</i> Value returned by <b>pcre_exec</b>
|
||||
<i>listptr</i> Where to put a pointer to the list
|
||||
</pre>
|
||||
The memory in which the substrings and the list are placed is obtained by
|
||||
calling <b>pcre_malloc()</b>. The convenience function
|
||||
<b>pcre_free_substring_list()</b> can be used to free it when it is no longer
|
||||
needed. A pointer to a list of pointers is put in the variable whose address is
|
||||
in <i>listptr</i>. The list is terminated by a NULL pointer. The yield of the
|
||||
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could
|
||||
not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,39 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_info specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_info man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>
|
||||
<b>*<i>firstcharptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,42 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_maketables specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_maketables man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>const unsigned char *pcre_maketables(void);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function builds a set of character tables for character values less than
|
||||
256. These can be passed to <b>pcre_compile()</b> to override PCRE's internal,
|
||||
built-in tables (which were made by <b>pcre_maketables()</b> when PCRE was
|
||||
compiled). You might want to do this if you are using a non-standard locale.
|
||||
The function yields a pointer to the tables.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,45 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_refcount specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_refcount man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is used to maintain a reference count inside a data block that
|
||||
contains a compiled pattern. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>adjust</i> Adjustment to reference value
|
||||
</pre>
|
||||
The yield of the function is the adjusted reference value, which is constrained
|
||||
to lie between 0 and 65535.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,56 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_study specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_study man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function studies a compiled pattern, to see if additional information can
|
||||
be extracted that might speed up matching. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> A compiled regular expression
|
||||
<i>options</i> Options for <b>pcre_study()</b>
|
||||
<i>errptr</i> Where to put an error message
|
||||
</pre>
|
||||
If the function succeeds, it returns a value that can be passed to
|
||||
<b>pcre_exec()</b> via its <i>extra</i> argument.
|
||||
</P>
|
||||
<P>
|
||||
If the function returns NULL, either it could not find any additional
|
||||
information, or there was an error. You can tell the difference by looking at
|
||||
the error value. It is NULL in first case.
|
||||
</P>
|
||||
<P>
|
||||
There are currently no options defined; the value of the second argument should
|
||||
always be zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,39 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_version specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_version man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>char *pcre_version(void);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns a character string that gives the version number of the
|
||||
PCRE library and the date of its release.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
File diff suppressed because it is too large
Load Diff
|
@ -1,348 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrebuild specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrebuild man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC2" href="#SEC2">C++ SUPPORT</a>
|
||||
<li><a name="TOC3" href="#SEC3">UTF-8 SUPPORT</a>
|
||||
<li><a name="TOC4" href="#SEC4">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
||||
<li><a name="TOC5" href="#SEC5">CODE VALUE OF NEWLINE</a>
|
||||
<li><a name="TOC6" href="#SEC6">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC7" href="#SEC7">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||
<li><a name="TOC8" href="#SEC8">POSIX MALLOC USAGE</a>
|
||||
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
|
||||
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||
<li><a name="TOC11" href="#SEC11">LIMITING PCRE RESOURCE USAGE</a>
|
||||
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
|
||||
<li><a name="TOC14" href="#SEC14">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||
<li><a name="TOC15" href="#SEC15">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||
<li><a name="TOC16" href="#SEC16">SEE ALSO</a>
|
||||
<li><a name="TOC17" href="#SEC17">AUTHOR</a>
|
||||
<li><a name="TOC18" href="#SEC18">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
||||
<P>
|
||||
This document describes the optional features of PCRE that can be selected when
|
||||
the library is compiled. It assumes use of the <b>configure</b> script, where
|
||||
the optional features are selected or deselected by providing options to
|
||||
<b>configure</b> before running the <b>make</b> command. However, the same
|
||||
options can be selected in both Unix-like and non-Unix-like environments using
|
||||
the GUI facility of <b>CMakeSetup</b> if you are using <b>CMake</b> instead of
|
||||
<b>configure</b> to build PCRE.
|
||||
</P>
|
||||
<P>
|
||||
The complete list of options for <b>configure</b> (which includes the standard
|
||||
ones such as the selection of the installation directory) can be obtained by
|
||||
running
|
||||
<pre>
|
||||
./configure --help
|
||||
</pre>
|
||||
The following sections include descriptions of options whose names begin with
|
||||
--enable or --disable. These settings specify changes to the defaults for the
|
||||
<b>configure</b> command. Because of the way that <b>configure</b> works,
|
||||
--enable and --disable always come in pairs, so the complementary option always
|
||||
exists as well, but as it specifies the default, it is not described.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">C++ SUPPORT</a><br>
|
||||
<P>
|
||||
By default, the <b>configure</b> script will search for a C++ compiler and C++
|
||||
header files. If it finds them, it automatically builds the C++ wrapper library
|
||||
for PCRE. You can disable this by adding
|
||||
<pre>
|
||||
--disable-cpp
|
||||
</pre>
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">UTF-8 SUPPORT</a><br>
|
||||
<P>
|
||||
To build PCRE with support for UTF-8 Unicode character strings, add
|
||||
<pre>
|
||||
--enable-utf8
|
||||
</pre>
|
||||
to the <b>configure</b> command. Of itself, this does not make PCRE treat
|
||||
strings as UTF-8. As well as compiling PCRE with this option, you also have
|
||||
have to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b>
|
||||
function.
|
||||
</P>
|
||||
<P>
|
||||
If you set --enable-utf8 when compiling in an EBCDIC environment, PCRE expects
|
||||
its input to be either ASCII or UTF-8 (depending on the runtime option). It is
|
||||
not possible to support both EBCDIC and UTF-8 codes in the same version of the
|
||||
library. Consequently, --enable-utf8 and --enable-ebcdic are mutually
|
||||
exclusive.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
||||
<P>
|
||||
UTF-8 support allows PCRE to process character values greater than 255 in the
|
||||
strings that it handles. On its own, however, it does not provide any
|
||||
facilities for accessing the properties of such characters. If you want to be
|
||||
able to use the pattern escapes \P, \p, and \X, which refer to Unicode
|
||||
character properties, you must add
|
||||
<pre>
|
||||
--enable-unicode-properties
|
||||
</pre>
|
||||
to the <b>configure</b> command. This implies UTF-8 support, even if you have
|
||||
not explicitly requested it.
|
||||
</P>
|
||||
<P>
|
||||
Including Unicode property support adds around 30K of tables to the PCRE
|
||||
library. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
|
||||
supported. Details are given in the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
||||
<P>
|
||||
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
||||
of a line. This is the normal newline character on Unix-like systems. You can
|
||||
compile PCRE to use carriage return (CR) instead, by adding
|
||||
<pre>
|
||||
--enable-newline-is-cr
|
||||
</pre>
|
||||
to the <b>configure</b> command. There is also a --enable-newline-is-lf option,
|
||||
which explicitly specifies linefeed as the newline character.
|
||||
<br>
|
||||
<br>
|
||||
Alternatively, you can specify that line endings are to be indicated by the two
|
||||
character sequence CRLF. If you want this, add
|
||||
<pre>
|
||||
--enable-newline-is-crlf
|
||||
</pre>
|
||||
to the <b>configure</b> command. There is a fourth option, specified by
|
||||
<pre>
|
||||
--enable-newline-is-anycrlf
|
||||
</pre>
|
||||
which causes PCRE to recognize any of the three sequences CR, LF, or CRLF as
|
||||
indicating a line ending. Finally, a fifth option, specified by
|
||||
<pre>
|
||||
--enable-newline-is-any
|
||||
</pre>
|
||||
causes PCRE to recognize any Unicode newline sequence.
|
||||
</P>
|
||||
<P>
|
||||
Whatever line ending convention is selected when PCRE is built can be
|
||||
overridden when the library functions are called. At build time it is
|
||||
conventional to use the standard for your operating system.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||
whatever has been selected as the line ending sequence. If you specify
|
||||
<pre>
|
||||
--enable-bsr-anycrlf
|
||||
</pre>
|
||||
the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
||||
selected when PCRE is built can be overridden when the library functions are
|
||||
called.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||
<P>
|
||||
The PCRE building process uses <b>libtool</b> to build both shared and static
|
||||
Unix libraries by default. You can suppress one of these by adding one of
|
||||
<pre>
|
||||
--disable-shared
|
||||
--disable-static
|
||||
</pre>
|
||||
to the <b>configure</b> command, as required.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
||||
<P>
|
||||
When PCRE is called through the POSIX interface (see the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
documentation), additional working storage is required for holding the pointers
|
||||
to capturing substrings, because PCRE requires three integers per substring,
|
||||
whereas the POSIX interface provides only two. If the number of expected
|
||||
substrings is small, the wrapper function uses space on the stack, because this
|
||||
is faster than using <b>malloc()</b> for each call. The default threshold above
|
||||
which the stack is no longer used is 10; it can be changed by adding a setting
|
||||
such as
|
||||
<pre>
|
||||
--with-posix-malloc-threshold=20
|
||||
</pre>
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||
<P>
|
||||
Within a compiled pattern, offset values are used to point from one part to
|
||||
another (for example, from an opening parenthesis to an alternation
|
||||
metacharacter). By default, two-byte values are used for these offsets, leading
|
||||
to a maximum size for a compiled pattern of around 64K. This is sufficient to
|
||||
handle all but the most gigantic patterns. Nevertheless, some people do want to
|
||||
process enormous patterns, so it is possible to compile PCRE to use three-byte
|
||||
or four-byte offsets by adding a setting such as
|
||||
<pre>
|
||||
--with-link-size=3
|
||||
</pre>
|
||||
to the <b>configure</b> command. The value given must be 2, 3, or 4. Using
|
||||
longer offsets slows down the operation of PCRE because it has to load
|
||||
additional bytes when handling them.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||
<P>
|
||||
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
||||
by making recursive calls to an internal function called <b>match()</b>. In
|
||||
environments where the size of the stack is limited, this can severely limit
|
||||
PCRE's operation. (The Unix environment does not usually suffer from this
|
||||
problem, but it may sometimes be necessary to increase the maximum stack size.
|
||||
There is a discussion in the
|
||||
<a href="pcrestack.html"><b>pcrestack</b></a>
|
||||
documentation.) An alternative approach to recursion that uses memory from the
|
||||
heap to remember data, instead of using recursive function calls, has been
|
||||
implemented to work round the problem of limited stack size. If you want to
|
||||
build a version of PCRE that works this way, add
|
||||
<pre>
|
||||
--disable-stack-for-recursion
|
||||
</pre>
|
||||
to the <b>configure</b> command. With this configuration, PCRE will use the
|
||||
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
|
||||
management functions. By default these point to <b>malloc()</b> and
|
||||
<b>free()</b>, but you can replace the pointers so that your own functions are
|
||||
used.
|
||||
</P>
|
||||
<P>
|
||||
Separate functions are provided rather than using <b>pcre_malloc</b> and
|
||||
<b>pcre_free</b> because the usage is very predictable: the block sizes
|
||||
requested are always the same, and the blocks are always freed in reverse
|
||||
order. A calling program might be able to implement optimized functions that
|
||||
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
|
||||
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
||||
function; it is not relevant for the the <b>pcre_dfa_exec()</b> function.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
||||
<P>
|
||||
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
||||
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
||||
function. By controlling the maximum number of times this function may be
|
||||
called during a single matching operation, a limit can be placed on the
|
||||
resources used by a single call to <b>pcre_exec()</b>. The limit can be changed
|
||||
at run time, as described in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation. The default is 10 million, but this can be changed by adding a
|
||||
setting such as
|
||||
<pre>
|
||||
--with-match-limit=500000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting has no effect on the
|
||||
<b>pcre_dfa_exec()</b> matching function.
|
||||
</P>
|
||||
<P>
|
||||
In some environments it is desirable to limit the depth of recursive calls of
|
||||
<b>match()</b> more strictly than the total number of calls, in order to
|
||||
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
|
||||
is specified) that is used. A second limit controls this; it defaults to the
|
||||
value that is set for --with-match-limit, which imposes no additional
|
||||
constraints. However, you can set a lower limit by adding, for example,
|
||||
<pre>
|
||||
--with-match-limit-recursion=10000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
PCRE uses fixed tables for processing characters whose code values are less
|
||||
than 256. By default, PCRE is built with a set of tables that are distributed
|
||||
in the file <i>pcre_chartables.c.dist</i>. These tables are for ASCII codes
|
||||
only. If you add
|
||||
<pre>
|
||||
--enable-rebuild-chartables
|
||||
</pre>
|
||||
to the <b>configure</b> command, the distributed tables are no longer used.
|
||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
||||
source for new set of tables, created in the default locale of your C runtime
|
||||
system. (This method of replacing the tables does not work if you are cross
|
||||
compiling, because <b>dftables</b> is run on the local host. If you need to
|
||||
create alternative tables when cross compiling, you will have to do so "by
|
||||
hand".)
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
<P>
|
||||
PCRE assumes by default that it will run in an environment where the character
|
||||
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
||||
most computer operating systems. PCRE can, however, be compiled to run in an
|
||||
EBCDIC environment by adding
|
||||
<pre>
|
||||
--enable-ebcdic
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting implies
|
||||
--enable-rebuild-chartables. You should only use it if you know that you are in
|
||||
an EBCDIC environment (for example, an IBM mainframe operating system). The
|
||||
--enable-ebcdic option is incompatible with --enable-utf8.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||
<P>
|
||||
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||
them with <b>libz</b> or <b>libbz2</b>, respectively, by adding one or both of
|
||||
<pre>
|
||||
--enable-pcregrep-libz
|
||||
--enable-pcregrep-libbz2
|
||||
</pre>
|
||||
to the <b>configure</b> command. These options naturally require that the
|
||||
relevant libraries are installed on your system. Configuration will fail if
|
||||
they are not.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||
<P>
|
||||
If you add
|
||||
<pre>
|
||||
--enable-pcretest-libreadline
|
||||
</pre>
|
||||
to the <b>configure</b> command, <b>pcretest</b> is linked with the
|
||||
<b>libreadline</b> library, and when its input is from a terminal, it reads it
|
||||
using the <b>readline()</b> function. This provides line-editing and history
|
||||
facilities. Note that <b>libreadline</b> is GPL-licenced, so if you distribute a
|
||||
binary of <b>pcretest</b> linked in this way, there may be licensing issues.
|
||||
</P>
|
||||
<P>
|
||||
Setting this option causes the <b>-lreadline</b> option to be added to the
|
||||
<b>pcretest</b> build. In many operating environments with a sytem-installed
|
||||
<b>libreadline</b> this is sufficient. However, in some environments (e.g.
|
||||
if an unmodified distribution version of readline is in use), some extra
|
||||
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
|
||||
this:
|
||||
<pre>
|
||||
"Readline uses the termcap functions, but does not link with the
|
||||
termcap or curses library itself, allowing applications which link
|
||||
with readline the to choose an appropriate library."
|
||||
</pre>
|
||||
If your environment has not been set up so that an appropriate library is
|
||||
automatically included, you may need to add something like
|
||||
<pre>
|
||||
LIBS="-ncurses"
|
||||
</pre>
|
||||
immediately before the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcreapi</b>(3), <b>pcre_config</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 March 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,208 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrecallout specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrecallout man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE CALLOUTS</a>
|
||||
<li><a name="TOC2" href="#SEC2">MISSING CALLOUTS</a>
|
||||
<li><a name="TOC3" href="#SEC3">THE CALLOUT INTERFACE</a>
|
||||
<li><a name="TOC4" href="#SEC4">RETURN VALUES</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE CALLOUTS</a><br>
|
||||
<P>
|
||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
||||
</P>
|
||||
<P>
|
||||
PCRE provides a feature called "callout", which is a means of temporarily
|
||||
passing control to the caller of PCRE in the middle of pattern matching. The
|
||||
caller of PCRE provides an external function by putting its entry point in the
|
||||
global variable <i>pcre_callout</i>. By default, this variable contains NULL,
|
||||
which disables all calling out.
|
||||
</P>
|
||||
<P>
|
||||
Within a regular expression, (?C) indicates the points at which the external
|
||||
function is to be called. Different callout points can be identified by putting
|
||||
a number less than 256 after the letter C. The default value is zero.
|
||||
For example, this pattern has two callout points:
|
||||
<pre>
|
||||
(?C1)abc(?C2)def
|
||||
</pre>
|
||||
If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> is called,
|
||||
PCRE automatically inserts callouts, all with number 255, before each item in
|
||||
the pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
|
||||
<pre>
|
||||
A(\d{2}|--)
|
||||
</pre>
|
||||
it is processed as if it were
|
||||
<br>
|
||||
<br>
|
||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||
<br>
|
||||
<br>
|
||||
Notice that there is a callout before and after each parenthesis and
|
||||
alternation bar. Automatic callouts can be used for tracking the progress of
|
||||
pattern matching. The
|
||||
<a href="pcretest.html"><b>pcretest</b></a>
|
||||
command has an option that sets automatic callouts; when it is used, the output
|
||||
indicates how the pattern is matched. This is useful information when you are
|
||||
trying to optimize the performance of a particular pattern.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||
<P>
|
||||
You should be aware that, because of optimizations in the way PCRE matches
|
||||
patterns by default, callouts sometimes do not happen. For example, if the
|
||||
pattern is
|
||||
<pre>
|
||||
ab(?C4)cd
|
||||
</pre>
|
||||
PCRE knows that any matching string must contain the letter "d". If the subject
|
||||
string is "abyz", the lack of "d" means that matching doesn't ever start, and
|
||||
the callout is never reached. However, with "abyd", though the result is still
|
||||
no match, the callout is obeyed.
|
||||
</P>
|
||||
<P>
|
||||
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
|
||||
option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. This slows down the
|
||||
matching process, but does ensure that callouts such as the example above are
|
||||
obeyed.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||
<P>
|
||||
During matching, when PCRE reaches a callout point, the external function
|
||||
defined by <i>pcre_callout</i> is called (if it is set). This applies to both
|
||||
the <b>pcre_exec()</b> and the <b>pcre_dfa_exec()</b> matching functions. The
|
||||
only argument to the callout function is a pointer to a <b>pcre_callout</b>
|
||||
block. This structure contains the following fields:
|
||||
<pre>
|
||||
int <i>version</i>;
|
||||
int <i>callout_number</i>;
|
||||
int *<i>offset_vector</i>;
|
||||
const char *<i>subject</i>;
|
||||
int <i>subject_length</i>;
|
||||
int <i>start_match</i>;
|
||||
int <i>current_position</i>;
|
||||
int <i>capture_top</i>;
|
||||
int <i>capture_last</i>;
|
||||
void *<i>callout_data</i>;
|
||||
int <i>pattern_position</i>;
|
||||
int <i>next_item_length</i>;
|
||||
</pre>
|
||||
The <i>version</i> field is an integer containing the version number of the
|
||||
block format. The initial version was 0; the current version is 1. The version
|
||||
number will change again in future if additional fields are added, but the
|
||||
intention is never to remove any of the existing fields.
|
||||
</P>
|
||||
<P>
|
||||
The <i>callout_number</i> field contains the number of the callout, as compiled
|
||||
into the pattern (that is, the number after ?C for manual callouts, and 255 for
|
||||
automatically generated callouts).
|
||||
</P>
|
||||
<P>
|
||||
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
||||
passed by the caller to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. When
|
||||
<b>pcre_exec()</b> is used, the contents can be inspected in order to extract
|
||||
substrings that have been matched so far, in the same way as for extracting
|
||||
substrings after a match has completed. For <b>pcre_dfa_exec()</b> this field is
|
||||
not useful.
|
||||
</P>
|
||||
<P>
|
||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
||||
that were passed to <b>pcre_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The <i>start_match</i> field normally contains the offset within the subject at
|
||||
which the current match attempt started. However, if the escape sequence \K
|
||||
has been encountered, this value is changed to reflect the modified starting
|
||||
point. If the pattern is not anchored, the callout function may be called
|
||||
several times from the same point in the pattern for different starting points
|
||||
in the subject.
|
||||
</P>
|
||||
<P>
|
||||
The <i>current_position</i> field contains the offset within the subject of the
|
||||
current match pointer.
|
||||
</P>
|
||||
<P>
|
||||
When the <b>pcre_exec()</b> function is used, the <i>capture_top</i> field
|
||||
contains one more than the number of the highest numbered captured substring so
|
||||
far. If no substrings have been captured, the value of <i>capture_top</i> is
|
||||
one. This is always the case when <b>pcre_dfa_exec()</b> is used, because it
|
||||
does not support captured substrings.
|
||||
</P>
|
||||
<P>
|
||||
The <i>capture_last</i> field contains the number of the most recently captured
|
||||
substring. If no substrings have been captured, its value is -1. This is always
|
||||
the case when <b>pcre_dfa_exec()</b> is used.
|
||||
</P>
|
||||
<P>
|
||||
The <i>callout_data</i> field contains a value that is passed to
|
||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> specifically so that it can be
|
||||
passed back in callouts. It is passed in the <i>pcre_callout</i> field of the
|
||||
<b>pcre_extra</b> data structure. If no such data was passed, the value of
|
||||
<i>callout_data</i> in a <b>pcre_callout</b> block is NULL. There is a
|
||||
description of the <b>pcre_extra</b> structure in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
The <i>pattern_position</i> field is present from version 1 of the
|
||||
<i>pcre_callout</i> structure. It contains the offset to the next item to be
|
||||
matched in the pattern string.
|
||||
</P>
|
||||
<P>
|
||||
The <i>next_item_length</i> field is present from version 1 of the
|
||||
<i>pcre_callout</i> structure. It contains the length of the next item to be
|
||||
matched in the pattern string. When the callout immediately precedes an
|
||||
alternation bar, a closing parenthesis, or the end of the pattern, the length
|
||||
is zero. When the callout precedes an opening parenthesis, the length is that
|
||||
of the entire subpattern.
|
||||
</P>
|
||||
<P>
|
||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||
help in distinguishing between different automatic callouts, which all have the
|
||||
same callout number. However, they are set for all callouts.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">RETURN VALUES</a><br>
|
||||
<P>
|
||||
The external callout function returns an integer to PCRE. If the value is zero,
|
||||
matching proceeds as normal. If the value is greater than zero, matching fails
|
||||
at the current point, but the testing of other matching possibilities goes
|
||||
ahead, just as if a lookahead assertion had failed. If the value is less than
|
||||
zero, the match is abandoned, and <b>pcre_exec()</b> (or <b>pcre_dfa_exec()</b>)
|
||||
returns the negative value.
|
||||
</P>
|
||||
<P>
|
||||
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
|
||||
values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
|
||||
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
|
||||
it will never be used by PCRE itself.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 15 March 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,179 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrecompat specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrecompat man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
DIFFERENCES BETWEEN PCRE AND PERL
|
||||
</b><br>
|
||||
<P>
|
||||
This document describes the differences in the ways that PCRE and Perl handle
|
||||
regular expressions. The differences described here are mainly with respect to
|
||||
Perl 5.8, though PCRE versions 7.0 and later contain some features that are
|
||||
expected to be in the forthcoming Perl 5.10.
|
||||
</P>
|
||||
<P>
|
||||
1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
|
||||
it does have are given in the
|
||||
<a href="pcre.html#utf8support">section on UTF-8 support</a>
|
||||
in the main
|
||||
<a href="pcre.html"><b>pcre</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits
|
||||
them, but they do not mean what you might think. For example, (?!a){3} does
|
||||
not assert that the next three characters are not "a". It just asserts that the
|
||||
next character is not "a" three times.
|
||||
</P>
|
||||
<P>
|
||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||
counted, but their entries in the offsets vector are never set. Perl sets its
|
||||
numerical variables from any such patterns that are matched before the
|
||||
assertion fails to match something (thereby succeeding), but only if the
|
||||
negative lookahead assertion contains just one branch.
|
||||
</P>
|
||||
<P>
|
||||
4. Though binary zero characters are supported in the subject string, they are
|
||||
not allowed in a pattern string because it is passed as a normal C string,
|
||||
terminated by zero. The escape sequence \0 can be used in the pattern to
|
||||
represent a binary zero.
|
||||
</P>
|
||||
<P>
|
||||
5. The following Perl escape sequences are not supported: \l, \u, \L,
|
||||
\U, and \N. In fact these are implemented by Perl's general string-handling
|
||||
and are not part of its pattern matching engine. If any of these are
|
||||
encountered by PCRE, an error is generated.
|
||||
</P>
|
||||
<P>
|
||||
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
|
||||
built with Unicode character property support. The properties that can be
|
||||
tested with \p and \P are limited to the general category properties such as
|
||||
Lu and Nd, script names such as Greek or Han, and the derived properties Any
|
||||
and L&.
|
||||
</P>
|
||||
<P>
|
||||
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
|
||||
between are treated as literals. This is slightly different from Perl in that $
|
||||
and @ are also handled as literals inside the quotes. In Perl, they cause
|
||||
variable interpolation (but of course PCRE does not have variables). Note the
|
||||
following examples:
|
||||
<pre>
|
||||
Pattern PCRE matches Perl matches
|
||||
|
||||
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
|
||||
\Qabc\$xyz\E abc\$xyz abc\$xyz
|
||||
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
|
||||
</pre>
|
||||
The \Q...\E sequence is recognized both inside and outside character classes.
|
||||
</P>
|
||||
<P>
|
||||
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
|
||||
constructions. However, there is support for recursive patterns. This is not
|
||||
available in Perl 5.8, but will be in Perl 5.10. Also, the PCRE "callout"
|
||||
feature allows an external function to be called during pattern matching. See
|
||||
the
|
||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||
documentation for details.
|
||||
</P>
|
||||
<P>
|
||||
9. Subpatterns that are called recursively or as "subroutines" are always
|
||||
treated as atomic groups in PCRE. This is like Python, but unlike Perl.
|
||||
</P>
|
||||
<P>
|
||||
10. There are some differences that are concerned with the settings of captured
|
||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
||||
</P>
|
||||
<P>
|
||||
11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F),
|
||||
(*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an
|
||||
argument. PCRE does not support (*MARK). If (*ACCEPT) is within capturing
|
||||
parentheses, PCRE does not set that capture group; this is different to Perl.
|
||||
</P>
|
||||
<P>
|
||||
12. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
Perl 5.10 will include new features that are not in earlier versions, some of
|
||||
which (such as named parentheses) have been in PCRE for some time. This list is
|
||||
with respect to Perl 5.10:
|
||||
<br>
|
||||
<br>
|
||||
(a) Although lookbehind assertions must match fixed length strings, each
|
||||
alternative branch of a lookbehind assertion can match a different length of
|
||||
string. Perl requires them all to have the same length.
|
||||
<br>
|
||||
<br>
|
||||
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
|
||||
meta-character matches only at the very end of the string.
|
||||
<br>
|
||||
<br>
|
||||
(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special
|
||||
meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored.
|
||||
(Perl can be made to issue a warning.)
|
||||
<br>
|
||||
<br>
|
||||
(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
|
||||
inverted, that is, by default they are not greedy, but if followed by a
|
||||
question mark they are.
|
||||
<br>
|
||||
<br>
|
||||
(e) PCRE_ANCHORED can be used at matching time to force a pattern to be tried
|
||||
only at the first matching position in the subject string.
|
||||
<br>
|
||||
<br>
|
||||
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NO_AUTO_CAPTURE
|
||||
options for <b>pcre_exec()</b> have no Perl equivalents.
|
||||
<br>
|
||||
<br>
|
||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||
by the PCRE_BSR_ANYCRLF option.
|
||||
<br>
|
||||
<br>
|
||||
(h) The callout facility is PCRE-specific.
|
||||
<br>
|
||||
<br>
|
||||
(i) The partial matching facility is PCRE-specific.
|
||||
<br>
|
||||
<br>
|
||||
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
|
||||
different hosts that have the other endianness.
|
||||
<br>
|
||||
<br>
|
||||
(k) The alternative matching function (<b>pcre_dfa_exec()</b>) matches in a
|
||||
different way and is not Perl-compatible.
|
||||
<br>
|
||||
<br>
|
||||
(l) PCRE recognizes some special sequences such as (*CR) at the start of
|
||||
a pattern that set overall options that cannot be changed within the pattern.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><b>
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 11 September 2007
|
||||
<br>
|
||||
Copyright © 1997-2007 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,370 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrecpp specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrecpp man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF C++ WRAPPER</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">MATCHING INTERFACE</a>
|
||||
<li><a name="TOC4" href="#SEC4">QUOTING METACHARACTERS</a>
|
||||
<li><a name="TOC5" href="#SEC5">PARTIAL MATCHES</a>
|
||||
<li><a name="TOC6" href="#SEC6">UTF-8 AND THE MATCHING INTERFACE</a>
|
||||
<li><a name="TOC7" href="#SEC7">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a>
|
||||
<li><a name="TOC8" href="#SEC8">SCANNING TEXT INCREMENTALLY</a>
|
||||
<li><a name="TOC9" href="#SEC9">PARSING HEX/OCTAL/C-RADIX NUMBERS</a>
|
||||
<li><a name="TOC10" href="#SEC10">REPLACING PARTS OF STRINGS</a>
|
||||
<li><a name="TOC11" href="#SEC11">AUTHOR</a>
|
||||
<li><a name="TOC12" href="#SEC12">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF C++ WRAPPER</a><br>
|
||||
<P>
|
||||
<b>#include <pcrecpp.h></b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
The C++ wrapper for PCRE was provided by Google Inc. Some additional
|
||||
functionality was added by Giuseppe Maxia. This brief man page was constructed
|
||||
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
|
||||
further details.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
|
||||
<P>
|
||||
The "FullMatch" operation checks that supplied text matches a supplied pattern
|
||||
exactly. If pointer arguments are supplied, it copies matched sub-strings that
|
||||
match sub-patterns into them.
|
||||
<pre>
|
||||
Example: successful match
|
||||
pcrecpp::RE re("h.*o");
|
||||
re.FullMatch("hello");
|
||||
|
||||
Example: unsuccessful match (requires full match):
|
||||
pcrecpp::RE re("e");
|
||||
!re.FullMatch("hello");
|
||||
|
||||
Example: creating a temporary RE object:
|
||||
pcrecpp::RE("h.*o").FullMatch("hello");
|
||||
</pre>
|
||||
You can pass in a "const char*" or a "string" for "text". The examples below
|
||||
tend to use a const char*. You can, as in the different examples above, store
|
||||
the RE object explicitly in a variable or use a temporary RE object. The
|
||||
examples below use one mode or the other arbitrarily. Either could correctly be
|
||||
used for any of these examples.
|
||||
</P>
|
||||
<P>
|
||||
You must supply extra pointer arguments to extract matched subpieces.
|
||||
<pre>
|
||||
Example: extracts "ruby" into "s" and 1234 into "i"
|
||||
int i;
|
||||
string s;
|
||||
pcrecpp::RE re("(\\w+):(\\d+)");
|
||||
re.FullMatch("ruby:1234", &s, &i);
|
||||
|
||||
Example: does not try to extract any extra sub-patterns
|
||||
re.FullMatch("ruby:1234", &s);
|
||||
|
||||
Example: does not try to extract into NULL
|
||||
re.FullMatch("ruby:1234", NULL, &i);
|
||||
|
||||
Example: integer overflow causes failure
|
||||
!re.FullMatch("ruby:1234567891234", NULL, &i);
|
||||
|
||||
Example: fails because there aren't enough sub-patterns:
|
||||
!pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
|
||||
|
||||
Example: fails because string cannot be stored in integer
|
||||
!pcrecpp::RE("(.*)").FullMatch("ruby", &i);
|
||||
</pre>
|
||||
The provided pointer arguments can be pointers to any scalar numeric
|
||||
type, or one of:
|
||||
<pre>
|
||||
string (matched piece is copied to string)
|
||||
StringPiece (StringPiece is mutated to point to matched piece)
|
||||
T (where "bool T::ParseFrom(const char*, int)" exists)
|
||||
NULL (the corresponding matched sub-pattern is not copied)
|
||||
</pre>
|
||||
The function returns true iff all of the following conditions are satisfied:
|
||||
<pre>
|
||||
a. "text" matches "pattern" exactly;
|
||||
|
||||
b. The number of matched sub-patterns is >= number of supplied
|
||||
pointers;
|
||||
|
||||
c. The "i"th argument has a suitable type for holding the
|
||||
string captured as the "i"th sub-pattern. If you pass in
|
||||
void * NULL for the "i"th argument, or a non-void * NULL
|
||||
of the correct type, or pass fewer arguments than the
|
||||
number of sub-patterns, "i"th captured sub-pattern is
|
||||
ignored.
|
||||
</pre>
|
||||
CAVEAT: An optional sub-pattern that does not exist in the matched
|
||||
string is assigned the empty string. Therefore, the following will
|
||||
return false (because the empty string is not a valid number):
|
||||
<pre>
|
||||
int number;
|
||||
pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
|
||||
</pre>
|
||||
The matching interface supports at most 16 arguments per call.
|
||||
If you need more, consider using the more general interface
|
||||
<b>pcrecpp::RE::DoMatch</b>. See <b>pcrecpp.h</b> for the signature for
|
||||
<b>DoMatch</b>.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: Do not use <b>no_arg</b>, which is used internally to mark the end of a
|
||||
list of optional arguments, as a placeholder for missing arguments, as this can
|
||||
lead to segfaults.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">QUOTING METACHARACTERS</a><br>
|
||||
<P>
|
||||
You can use the "QuoteMeta" operation to insert backslashes before all
|
||||
potentially meaningful characters in a string. The returned string, used as a
|
||||
regular expression, will exactly match the original string.
|
||||
<pre>
|
||||
Example:
|
||||
string quoted = RE::QuoteMeta(unquoted);
|
||||
</pre>
|
||||
Note that it's legal to escape a character even if it has no special meaning in
|
||||
a regular expression -- so this function does that. (This also makes it
|
||||
identical to the perl function of the same name; see "perldoc -f quotemeta".)
|
||||
For example, "1.5-2.0?" becomes "1\.5\-2\.0\?".
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PARTIAL MATCHES</a><br>
|
||||
<P>
|
||||
You can use the "PartialMatch" operation when you want the pattern
|
||||
to match any substring of the text.
|
||||
<pre>
|
||||
Example: simple search for a string:
|
||||
pcrecpp::RE("ell").PartialMatch("hello");
|
||||
|
||||
Example: find first number in a string:
|
||||
int number;
|
||||
pcrecpp::RE re("(\\d+)");
|
||||
re.PartialMatch("x*100 + 20", &number);
|
||||
assert(number == 100);
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">UTF-8 AND THE MATCHING INTERFACE</a><br>
|
||||
<P>
|
||||
By default, pattern and text are plain text, one byte per character. The UTF8
|
||||
flag, passed to the constructor, causes both pattern and string to be treated
|
||||
as UTF-8 text, still a byte stream but potentially multiple bytes per
|
||||
character. In practice, the text is likelier to be UTF-8 than the pattern, but
|
||||
the match returned may depend on the UTF8 flag, so always use it when matching
|
||||
UTF8 text. For example, "." will match one byte normally but with UTF8 set may
|
||||
match up to three bytes of a multi-byte character.
|
||||
<pre>
|
||||
Example:
|
||||
pcrecpp::RE_Options options;
|
||||
options.set_utf8();
|
||||
pcrecpp::RE re(utf8_pattern, options);
|
||||
re.FullMatch(utf8_string);
|
||||
|
||||
Example: using the convenience function UTF8():
|
||||
pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
|
||||
re.FullMatch(utf8_string);
|
||||
</pre>
|
||||
NOTE: The UTF8 flag is ignored if pcre was not configured with the
|
||||
<pre>
|
||||
--enable-utf8 flag.
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a><br>
|
||||
<P>
|
||||
PCRE defines some modifiers to change the behavior of the regular expression
|
||||
engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
|
||||
pass such modifiers to a RE class. Currently, the following modifiers are
|
||||
supported:
|
||||
<pre>
|
||||
modifier description Perl corresponding
|
||||
|
||||
PCRE_CASELESS case insensitive match /i
|
||||
PCRE_MULTILINE multiple lines match /m
|
||||
PCRE_DOTALL dot matches newlines /s
|
||||
PCRE_DOLLAR_ENDONLY $ matches only at end N/A
|
||||
PCRE_EXTRA strict escape parsing N/A
|
||||
PCRE_EXTENDED ignore whitespaces /x
|
||||
PCRE_UTF8 handles UTF8 chars built-in
|
||||
PCRE_UNGREEDY reverses * and *? N/A
|
||||
PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
|
||||
</pre>
|
||||
(*) Both Perl and PCRE allow non capturing parentheses by means of the
|
||||
"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
|
||||
capture, while (ab|cd) does.
|
||||
</P>
|
||||
<P>
|
||||
For a full account on how each modifier works, please check the
|
||||
PCRE API reference page.
|
||||
</P>
|
||||
<P>
|
||||
For each modifier, there are two member functions whose name is made
|
||||
out of the modifier in lowercase, without the "PCRE_" prefix. For
|
||||
instance, PCRE_CASELESS is handled by
|
||||
<pre>
|
||||
bool caseless()
|
||||
</pre>
|
||||
which returns true if the modifier is set, and
|
||||
<pre>
|
||||
RE_Options & set_caseless(bool)
|
||||
</pre>
|
||||
which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can be
|
||||
accessed through the <b>set_match_limit()</b> and <b>match_limit()</b> member
|
||||
functions. Setting <i>match_limit</i> to a non-zero value will limit the
|
||||
execution of pcre to keep it from doing bad things like blowing the stack or
|
||||
taking an eternity to return a result. A value of 5000 is good enough to stop
|
||||
stack blowup in a 2MB thread stack. Setting <i>match_limit</i> to zero disables
|
||||
match limiting. Alternatively, you can call <b>match_limit_recursion()</b>
|
||||
which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much PCRE
|
||||
recurses. <b>match_limit()</b> limits the number of matches PCRE does;
|
||||
<b>match_limit_recursion()</b> limits the depth of internal recursion, and
|
||||
therefore the amount of stack that is used.
|
||||
</P>
|
||||
<P>
|
||||
Normally, to pass one or more modifiers to a RE class, you declare
|
||||
a <i>RE_Options</i> object, set the appropriate options, and pass this
|
||||
object to a RE constructor. Example:
|
||||
<pre>
|
||||
RE_options opt;
|
||||
opt.set_caseless(true);
|
||||
if (RE("HELLO", opt).PartialMatch("hello world")) ...
|
||||
</pre>
|
||||
RE_options has two constructors. The default constructor takes no arguments and
|
||||
creates a set of flags that are off by default. The optional parameter
|
||||
<i>option_flags</i> is to facilitate transfer of legacy code from C programs.
|
||||
This lets you do
|
||||
<pre>
|
||||
RE(pattern,
|
||||
RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
|
||||
</pre>
|
||||
However, new code is better off doing
|
||||
<pre>
|
||||
RE(pattern,
|
||||
RE_Options().set_caseless(true).set_multiline(true))
|
||||
.PartialMatch(str);
|
||||
</pre>
|
||||
If you are going to pass one of the most used modifiers, there are some
|
||||
convenience functions that return a RE_Options class with the
|
||||
appropriate modifier already set: <b>CASELESS()</b>, <b>UTF8()</b>,
|
||||
<b>MULTILINE()</b>, <b>DOTALL</b>(), and <b>EXTENDED()</b>.
|
||||
</P>
|
||||
<P>
|
||||
If you need to set several options at once, and you don't want to go through
|
||||
the pains of declaring a RE_Options object and setting several options, there
|
||||
is a parallel method that give you such ability on the fly. You can concatenate
|
||||
several <b>set_xxxxx()</b> member functions, since each of them returns a
|
||||
reference to its class object. For example, to pass PCRE_CASELESS,
|
||||
PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
|
||||
<pre>
|
||||
RE(" ^ xyz \\s+ .* blah$",
|
||||
RE_Options()
|
||||
.set_caseless(true)
|
||||
.set_extended(true)
|
||||
.set_multiline(true)).PartialMatch(sometext);
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">SCANNING TEXT INCREMENTALLY</a><br>
|
||||
<P>
|
||||
The "Consume" operation may be useful if you want to repeatedly
|
||||
match regular expressions at the front of a string and skip over
|
||||
them as they match. This requires use of the "StringPiece" type,
|
||||
which represents a sub-range of a real string. Like RE, StringPiece
|
||||
is defined in the pcrecpp namespace.
|
||||
<pre>
|
||||
Example: read lines of the form "var = value" from a string.
|
||||
string contents = ...; // Fill string somehow
|
||||
pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
<pre>
|
||||
string var;
|
||||
int value;
|
||||
pcrecpp::RE re("(\\w+) = (\\d+)\n");
|
||||
while (re.Consume(&input, &var, &value)) {
|
||||
...;
|
||||
}
|
||||
</pre>
|
||||
Each successful call to "Consume" will set "var/value", and also
|
||||
advance "input" so it points past the matched text.
|
||||
</P>
|
||||
<P>
|
||||
The "FindAndConsume" operation is similar to "Consume" but does not
|
||||
anchor your match at the beginning of the string. For example, you
|
||||
could extract all words from a string by repeatedly calling
|
||||
<pre>
|
||||
pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">PARSING HEX/OCTAL/C-RADIX NUMBERS</a><br>
|
||||
<P>
|
||||
By default, if you pass a pointer to a numeric value, the
|
||||
corresponding text is interpreted as a base-10 number. You can
|
||||
instead wrap the pointer with a call to one of the operators Hex(),
|
||||
Octal(), or CRadix() to interpret the text in another base. The
|
||||
CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
|
||||
prefixes, but defaults to base-10.
|
||||
<pre>
|
||||
Example:
|
||||
int a, b, c, d;
|
||||
pcrecpp::RE re("(.*) (.*) (.*) (.*)");
|
||||
re.FullMatch("100 40 0100 0x40",
|
||||
pcrecpp::Octal(&a), pcrecpp::Hex(&b),
|
||||
pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
|
||||
</pre>
|
||||
will leave 64 in a, b, c, and d.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">REPLACING PARTS OF STRINGS</a><br>
|
||||
<P>
|
||||
You can replace the first match of "pattern" in "str" with "rewrite".
|
||||
Within "rewrite", backslash-escaped digits (\1 to \9) can be
|
||||
used to insert text matching corresponding parenthesized group
|
||||
from the pattern. \0 in "rewrite" refers to the entire matching
|
||||
text. For example:
|
||||
<pre>
|
||||
string s = "yabba dabba doo";
|
||||
pcrecpp::RE("b+").Replace("d", &s);
|
||||
</pre>
|
||||
will leave "s" containing "yada dabba doo". The result is true if the pattern
|
||||
matches and a replacement occurs, false otherwise.
|
||||
</P>
|
||||
<P>
|
||||
<b>GlobalReplace</b> is like <b>Replace</b> except that it replaces all
|
||||
occurrences of the pattern in the string with the rewrite. Replacements are
|
||||
not subject to re-matching. For example:
|
||||
<pre>
|
||||
string s = "yabba dabba doo";
|
||||
pcrecpp::RE("b+").GlobalReplace("d", &s);
|
||||
</pre>
|
||||
will leave "s" containing "yada dada doo". It returns the number of
|
||||
replacements made.
|
||||
</P>
|
||||
<P>
|
||||
<b>Extract</b> is like <b>Replace</b>, except that if the pattern matches,
|
||||
"rewrite" is copied into "out" (an additional argument) with substitutions.
|
||||
The non-matching portions of "text" are ignored. Returns true iff a match
|
||||
occurred and the extraction happened successfully; if no match occurs, the
|
||||
string is left unaffected.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
The C++ wrapper was contributed by Google Inc.
|
||||
<br>
|
||||
Copyright © 2007 Google Inc.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 March 2009
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,533 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcregrep specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcregrep man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
|
||||
<li><a name="TOC4" href="#SEC4">OPTIONS</a>
|
||||
<li><a name="TOC5" href="#SEC5">ENVIRONMENT VARIABLES</a>
|
||||
<li><a name="TOC6" href="#SEC6">NEWLINES</a>
|
||||
<li><a name="TOC7" href="#SEC7">OPTIONS COMPATIBILITY</a>
|
||||
<li><a name="TOC8" href="#SEC8">OPTIONS WITH DATA</a>
|
||||
<li><a name="TOC9" href="#SEC9">MATCHING ERRORS</a>
|
||||
<li><a name="TOC10" href="#SEC10">DIAGNOSTICS</a>
|
||||
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
|
||||
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
|
||||
<li><a name="TOC13" href="#SEC13">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcregrep [options] [long options] [pattern] [path1 path2 ...]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
<b>pcregrep</b> searches files for character patterns, in the same way as other
|
||||
grep commands do, but it uses the PCRE regular expression library to support
|
||||
patterns that are compatible with the regular expressions of Perl 5. See
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
|
||||
for a full description of syntax and semantics of the regular expressions
|
||||
that PCRE supports.
|
||||
</P>
|
||||
<P>
|
||||
Patterns, whether supplied on the command line or in a separate file, are given
|
||||
without delimiters. For example:
|
||||
<pre>
|
||||
pcregrep Thursday /etc/motd
|
||||
</pre>
|
||||
If you attempt to use delimiters (for example, by surrounding a pattern with
|
||||
slashes, as is common in Perl scripts), they are interpreted as part of the
|
||||
pattern. Quotes can of course be used to delimit patterns on the command line
|
||||
because they are interpreted by the shell, and indeed they are required if a
|
||||
pattern contains white space or shell metacharacters.
|
||||
</P>
|
||||
<P>
|
||||
The first argument that follows any option settings is treated as the single
|
||||
pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
|
||||
Conversely, when one or both of these options are used to specify patterns, all
|
||||
arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
|
||||
argument pattern must be provided.
|
||||
</P>
|
||||
<P>
|
||||
If no files are specified, <b>pcregrep</b> reads the standard input. The
|
||||
standard input can also be referenced by a name consisting of a single hyphen.
|
||||
For example:
|
||||
<pre>
|
||||
pcregrep some-pattern /file1 - /file3
|
||||
</pre>
|
||||
By default, each line that matches a pattern is copied to the standard
|
||||
output, and if there is more than one file, the file name is output at the
|
||||
start of each line, followed by a colon. However, there are options that can
|
||||
change how <b>pcregrep</b> behaves. In particular, the <b>-M</b> option makes it
|
||||
possible to search for patterns that span line boundaries. What defines a line
|
||||
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
|
||||
</P>
|
||||
<P>
|
||||
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
|
||||
BUFSIZ is defined in <b><stdio.h></b>. When there is more than one pattern
|
||||
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
|
||||
each line in the order in which they are defined, except that all the <b>-e</b>
|
||||
patterns are tried before the <b>-f</b> patterns.
|
||||
</P>
|
||||
<P>
|
||||
By default, as soon as one pattern matches (or fails to match when <b>-v</b> is
|
||||
used), no further patterns are considered. However, if <b>--colour</b> (or
|
||||
<b>--color</b>) is used to colour the matching substrings, or if
|
||||
<b>--only-matching</b>, <b>--file-offsets</b>, or <b>--line-offsets</b> is used to
|
||||
output only the part of the line that matched (either shown literally, or as an
|
||||
offset), scanning resumes immediately following the match, so that further
|
||||
matches on the same line can be found. If there are multiple patterns, they are
|
||||
all tried on the remainder of the line, but patterns that follow the one that
|
||||
matched are not tried on the earlier part of the line.
|
||||
</P>
|
||||
<P>
|
||||
This is the same behaviour as GNU grep, but it does mean that the order in
|
||||
which multiple patterns are specified can affect the output when one of the
|
||||
above options is used.
|
||||
</P>
|
||||
<P>
|
||||
Patterns that can match an empty string are accepted, but empty string
|
||||
matches are not recognized. An example is the pattern "(super)?(man)?", in
|
||||
which all components are optional. This pattern finds all occurrences of both
|
||||
"super" and "man"; the output differs from matching with "super|man" when only
|
||||
the matching substrings are being shown.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
|
||||
<b>pcregrep</b> uses the value to set a locale when calling the PCRE library.
|
||||
The <b>--locale</b> option can be used to override this.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
|
||||
<P>
|
||||
It is possible to compile <b>pcregrep</b> so that it uses <b>libz</b> or
|
||||
<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
|
||||
respectively. You can find out whether your binary has support for one or both
|
||||
of these file types by running it with the <b>--help</b> option. If the
|
||||
appropriate support is not present, files are treated as plain text. The
|
||||
standard input is always so treated.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
<b>--</b>
|
||||
This terminate the list of options. It is useful if the next item on the
|
||||
command line starts with a hyphen but is not an option. This allows for the
|
||||
processing of patterns and filenames that start with hyphens.
|
||||
</P>
|
||||
<P>
|
||||
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context after each matching line. If filenames
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
|
||||
guarantees to have up to 8K of following text available for context output.
|
||||
</P>
|
||||
<P>
|
||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context before each matching line. If filenames
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
|
||||
guarantees to have up to 8K of preceding text available for context output.
|
||||
</P>
|
||||
<P>
|
||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context both before and after each matching line.
|
||||
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
||||
</P>
|
||||
<P>
|
||||
<b>-c</b>, <b>--count</b>
|
||||
Do not output individual lines; instead just output a count of the number of
|
||||
lines that would otherwise have been output. If several files are given, a
|
||||
count is output for each of them. In this mode, the <b>-A</b>, <b>-B</b>, and
|
||||
<b>-C</b> options are ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>--colour</b>, <b>--color</b>
|
||||
If this option is given without any data, it is equivalent to "--colour=auto".
|
||||
If data is required, it must be given in the same shell item, separated by an
|
||||
equals sign.
|
||||
</P>
|
||||
<P>
|
||||
<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
|
||||
This option specifies under what circumstances the parts of a line that matched
|
||||
a pattern should be coloured in the output. By default, the output is not
|
||||
coloured. The value (which is optional, see above) may be "never", "always", or
|
||||
"auto". In the latter case, colouring happens only if the standard output is
|
||||
connected to a terminal. More resources are used when colouring is enabled,
|
||||
because <b>pcregrep</b> has to search for all possible matches in a line, not
|
||||
just one, in order to colour them all.
|
||||
</P>
|
||||
<P>
|
||||
The colour that is used can be specified by setting the environment variable
|
||||
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
|
||||
string of two numbers, separated by a semicolon. They are copied directly into
|
||||
the control string for setting colour on a terminal, so it is your
|
||||
responsibility to ensure that they make sense. If neither of the environment
|
||||
variables is set, the default is "1;31", which gives red.
|
||||
</P>
|
||||
<P>
|
||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||
If an input path is not a regular file or a directory, "action" specifies how
|
||||
it is to be processed. Valid values are "read" (the default) or "skip"
|
||||
(silently skip the path).
|
||||
</P>
|
||||
<P>
|
||||
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
|
||||
If an input path is a directory, "action" specifies how it is to be processed.
|
||||
Valid values are "read" (the default), "recurse" (equivalent to the <b>-r</b>
|
||||
option), or "skip" (silently skip the path). In the default case, directories
|
||||
are read as if they were ordinary files. In some operating systems the effect
|
||||
of reading a directory like this is an immediate end-of-file.
|
||||
</P>
|
||||
<P>
|
||||
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
|
||||
Specify a pattern to be matched. This option can be used multiple times in
|
||||
order to specify several patterns. It can also be used as a way of specifying a
|
||||
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
|
||||
pattern is taken from the command line; all arguments are treated as file
|
||||
names. There is an overall maximum of 100 patterns. They are applied to each
|
||||
line in the order in which they are defined until one matches (or fails to
|
||||
match if <b>-v</b> is used). If <b>-f</b> is used with <b>-e</b>, the command line
|
||||
patterns are matched first, followed by the patterns from the file, independent
|
||||
of the order in which these options are specified. Note that multiple use of
|
||||
<b>-e</b> is not the same as a single pattern with alternatives. For example,
|
||||
X|Y finds the first character in a line that is X or Y, whereas if the two
|
||||
patterns are given separately, <b>pcregrep</b> finds X if it is present, even if
|
||||
it follows Y in the line. It finds Y only if there is no X in the line. This
|
||||
really matters only if you are using <b>-o</b> to show the part(s) of the line
|
||||
that matched.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exclude</b>=<i>pattern</i>
|
||||
When <b>pcregrep</b> is searching the files in a directory as a consequence of
|
||||
the <b>-r</b> (recursive search) option, any regular files whose names match the
|
||||
pattern are excluded. Subdirectories are not excluded by this option; they are
|
||||
searched recursively, subject to the <b>--exclude_dir</b> and
|
||||
<b>--include_dir</b> options. The pattern is a PCRE regular expression, and is
|
||||
matched against the final component of the file name (not the entire path). If
|
||||
a file name matches both <b>--include</b> and <b>--exclude</b>, it is excluded.
|
||||
There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exclude_dir</b>=<i>pattern</i>
|
||||
When <b>pcregrep</b> is searching the contents of a directory as a consequence
|
||||
of the <b>-r</b> (recursive search) option, any subdirectories whose names match
|
||||
the pattern are excluded. (Note that the \fP--exclude\fP option does not affect
|
||||
subdirectories.) The pattern is a PCRE regular expression, and is matched
|
||||
against the final component of the name (not the entire path). If a
|
||||
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
|
||||
is excluded. There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>-F</b>, <b>--fixed-strings</b>
|
||||
Interpret each pattern as a list of fixed strings, separated by newlines,
|
||||
instead of as a regular expression. The <b>-w</b> (match as a word) and <b>-x</b>
|
||||
(match whole line) options can be used with <b>-F</b>. They apply to each of the
|
||||
fixed strings. A line is selected if any of the fixed strings are found in it
|
||||
(subject to <b>-w</b> or <b>-x</b>, if present).
|
||||
</P>
|
||||
<P>
|
||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
||||
Read a number of patterns from the file, one per line, and match them against
|
||||
each line of input. A data line is output if any of the patterns match it. The
|
||||
filename can be given as "-" to refer to the standard input. When <b>-f</b> is
|
||||
used, patterns specified on the command line using <b>-e</b> may also be
|
||||
present; they are tested before the file's patterns. However, no other pattern
|
||||
is taken from the command line; all arguments are treated as file names. There
|
||||
is an overall maximum of 100 patterns. Trailing white space is removed from
|
||||
each line, and blank lines are ignored. An empty file contains no patterns and
|
||||
therefore matches nothing. See also the comments about multiple patterns versus
|
||||
a single pattern with alternatives in the description of <b>-e</b> above.
|
||||
</P>
|
||||
<P>
|
||||
<b>--file-offsets</b>
|
||||
Instead of showing lines or parts of lines that match, show each match as an
|
||||
offset from the start of the file and a length, separated by a comma. In this
|
||||
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
|
||||
options are ignored. If there is more than one match in a line, each of them is
|
||||
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
|
||||
and <b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-H</b>, <b>--with-filename</b>
|
||||
Force the inclusion of the filename at the start of output lines when searching
|
||||
a single file. By default, the filename is not shown in this case. For matching
|
||||
lines, the filename is followed by a colon; for context lines, a hyphen
|
||||
separator is used. If a line number is also being output, it follows the file
|
||||
name.
|
||||
</P>
|
||||
<P>
|
||||
<b>-h</b>, <b>--no-filename</b>
|
||||
Suppress the output filenames when searching multiple files. By default,
|
||||
filenames are shown when multiple files are searched. For matching lines, the
|
||||
filename is followed by a colon; for context lines, a hyphen separator is used.
|
||||
If a line number is also being output, it follows the file name.
|
||||
</P>
|
||||
<P>
|
||||
<b>--help</b>
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit.
|
||||
</P>
|
||||
<P>
|
||||
<b>-i</b>, <b>--ignore-case</b>
|
||||
Ignore upper/lower case distinctions during comparisons.
|
||||
</P>
|
||||
<P>
|
||||
<b>--include</b>=<i>pattern</i>
|
||||
When <b>pcregrep</b> is searching the files in a directory as a consequence of
|
||||
the <b>-r</b> (recursive search) option, only those regular files whose names
|
||||
match the pattern are included. Subdirectories are always included and searched
|
||||
recursively, subject to the \fP--include_dir\fP and <b>--exclude_dir</b>
|
||||
options. The pattern is a PCRE regular expression, and is matched against the
|
||||
final component of the file name (not the entire path). If a file name matches
|
||||
both <b>--include</b> and <b>--exclude</b>, it is excluded. There is no short
|
||||
form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--include_dir</b>=<i>pattern</i>
|
||||
When <b>pcregrep</b> is searching the contents of a directory as a consequence
|
||||
of the <b>-r</b> (recursive search) option, only those subdirectories whose
|
||||
names match the pattern are included. (Note that the <b>--include</b> option
|
||||
does not affect subdirectories.) The pattern is a PCRE regular expression, and
|
||||
is matched against the final component of the name (not the entire path). If a
|
||||
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
|
||||
is excluded. There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>-L</b>, <b>--files-without-match</b>
|
||||
Instead of outputting lines from the files, just output the names of the files
|
||||
that do not contain any lines that would have been output. Each file name is
|
||||
output once, on a separate line.
|
||||
</P>
|
||||
<P>
|
||||
<b>-l</b>, <b>--files-with-matches</b>
|
||||
Instead of outputting lines from the files, just output the names of the files
|
||||
containing lines that would have been output. Each file name is output
|
||||
once, on a separate line. Searching stops as soon as a matching line is found
|
||||
in a file.
|
||||
</P>
|
||||
<P>
|
||||
<b>--label</b>=<i>name</i>
|
||||
This option supplies a name to be used for the standard input when file names
|
||||
are being output. If not supplied, "(standard input)" is used. There is no
|
||||
short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--line-offsets</b>
|
||||
Instead of showing lines or parts of lines that match, show each match as a
|
||||
line number, the offset from the start of the line, and a length. The line
|
||||
number is terminated by a colon (as usual; see the <b>-n</b> option), and the
|
||||
offset and length are separated by a comma. In this mode, no context is shown.
|
||||
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
|
||||
more than one match in a line, each of them is shown separately. This option is
|
||||
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>--locale</b>=<i>locale-name</i>
|
||||
This option specifies a locale to be used for pattern matching. It overrides
|
||||
the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
|
||||
locale is specified, the PCRE library's default (usually the "C" locale) is
|
||||
used. There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>-M</b>, <b>--multiline</b>
|
||||
Allow patterns to match more than one line. When this option is given, patterns
|
||||
may usefully contain literal newline characters and internal occurrences of ^
|
||||
and $ characters. The output for any one match may consist of more than one
|
||||
line. When this option is set, the PCRE library is called in "multiline" mode.
|
||||
There is a limit to the number of lines that can be matched, imposed by the way
|
||||
that <b>pcregrep</b> buffers the input file as it scans it. However,
|
||||
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document
|
||||
(whichever is the shorter) are available for forward matching, and similarly
|
||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
||||
are guaranteed to be available for lookbehind assertions.
|
||||
</P>
|
||||
<P>
|
||||
<b>-N</b> <i>newline-type</i>, <b>--newline=</b><i>newline-type</i>
|
||||
The PCRE library supports five different conventions for indicating
|
||||
the ends of lines. They are the single-character sequences CR (carriage return)
|
||||
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
||||
which recognizes any of the preceding three types, and an "any" convention, in
|
||||
which any Unicode line ending sequence is assumed to end a line. The Unicode
|
||||
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
||||
(formfeed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
||||
PS (paragraph separator, U+2029).
|
||||
<br>
|
||||
<br>
|
||||
When the PCRE library is built, a default line-ending sequence is specified.
|
||||
This is normally the standard sequence for the operating system. Unless
|
||||
otherwise specified by this option, <b>pcregrep</b> uses the library's default.
|
||||
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
||||
makes it possible to use <b>pcregrep</b> on files that have come from other
|
||||
environments without having to modify their line endings. If the data that is
|
||||
being scanned does not agree with the convention set by this option,
|
||||
<b>pcregrep</b> may behave in strange ways.
|
||||
</P>
|
||||
<P>
|
||||
<b>-n</b>, <b>--line-number</b>
|
||||
Precede each output line by its line number in the file, followed by a colon
|
||||
for matching lines or a hyphen for context lines. If the filename is also being
|
||||
output, it precedes the line number. This option is forced if
|
||||
<b>--line-offsets</b> is used.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b>, <b>--only-matching</b>
|
||||
Show only the part of the line that matched a pattern. In this mode, no
|
||||
context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are
|
||||
ignored. If there is more than one match in a line, each of them is shown
|
||||
separately. If <b>-o</b> is combined with <b>-v</b> (invert the sense of the
|
||||
match to find non-matching lines), no output is generated, but the return code
|
||||
is set appropriately. This option is mutually exclusive with
|
||||
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-q</b>, <b>--quiet</b>
|
||||
Work quietly, that is, display nothing except error messages. The exit
|
||||
status indicates whether or not any matches were found.
|
||||
</P>
|
||||
<P>
|
||||
<b>-r</b>, <b>--recursive</b>
|
||||
If any given path is a directory, recursively scan the files it contains,
|
||||
taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
|
||||
directory is read as a normal file; in some operating systems this gives an
|
||||
immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
|
||||
option to "recurse".
|
||||
</P>
|
||||
<P>
|
||||
<b>-s</b>, <b>--no-messages</b>
|
||||
Suppress error messages about non-existent or unreadable files. Such files are
|
||||
quietly skipped. However, the return code is still 2, even if matches were
|
||||
found in other files.
|
||||
</P>
|
||||
<P>
|
||||
<b>-u</b>, <b>--utf-8</b>
|
||||
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
|
||||
with UTF-8 support. Both patterns and subject lines must be valid strings of
|
||||
UTF-8 characters.
|
||||
</P>
|
||||
<P>
|
||||
<b>-V</b>, <b>--version</b>
|
||||
Write the version numbers of <b>pcregrep</b> and the PCRE library that is being
|
||||
used to the standard error stream.
|
||||
</P>
|
||||
<P>
|
||||
<b>-v</b>, <b>--invert-match</b>
|
||||
Invert the sense of the match, so that lines which do <i>not</i> match any of
|
||||
the patterns are the ones that are found.
|
||||
</P>
|
||||
<P>
|
||||
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
|
||||
Force the patterns to match only whole words. This is equivalent to having \b
|
||||
at the start and end of the pattern.
|
||||
</P>
|
||||
<P>
|
||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||
Force the patterns to be anchored (each must start matching at the beginning of
|
||||
a line) and in addition, require them to match entire lines. This is
|
||||
equivalent to having ^ and $ characters at the start and end of each
|
||||
alternative branch in every pattern.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||
<P>
|
||||
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
||||
order, for a locale. The first one that is set is used. This can be overridden
|
||||
by the <b>--locale</b> option. If no locale is set, the PCRE library's default
|
||||
(usually the "C" locale) is used.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">NEWLINES</a><br>
|
||||
<P>
|
||||
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
|
||||
different newline conventions from the default. However, the setting of this
|
||||
option does not affect the way in which <b>pcregrep</b> writes information to
|
||||
the standard error and output streams. It uses the string "\n" in C
|
||||
<b>printf()</b> calls to indicate newlines, relying on the C I/O library to
|
||||
convert this to an appropriate sequence if the output is sent to a file.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
|
||||
<P>
|
||||
The majority of short and long forms of <b>pcregrep</b>'s options are the same
|
||||
as in the GNU <b>grep</b> program. Any long option of the form
|
||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||
(PCRE terminology). However, the <b>--locale</b>, <b>-M</b>, <b>--multiline</b>,
|
||||
<b>-u</b>, and <b>--utf-8</b> options are specific to <b>pcregrep</b>.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">OPTIONS WITH DATA</a><br>
|
||||
<P>
|
||||
There are four different ways in which an option with data can be specified.
|
||||
If a short form option is used, the data may follow immediately, or in the next
|
||||
command line item. For example:
|
||||
<pre>
|
||||
-f/some/file
|
||||
-f /some/file
|
||||
</pre>
|
||||
If a long form option is used, the data may appear in the same command line
|
||||
item, separated by an equals character, or (with one exception) it may appear
|
||||
in the next command line item. For example:
|
||||
<pre>
|
||||
--file=/some/file
|
||||
--file /some/file
|
||||
</pre>
|
||||
Note, however, that if you want to supply a file name beginning with ~ as data
|
||||
in a shell command, and have the shell expand ~ to a home directory, you must
|
||||
separate the file name from the option, because the shell does not treat ~
|
||||
specially unless it is at the start of an item.
|
||||
</P>
|
||||
<P>
|
||||
The exception to the above is the <b>--colour</b> (or <b>--color</b>) option,
|
||||
for which the data is optional. If this option does have data, it must be given
|
||||
in the first form, using an equals character. Otherwise it will be assumed that
|
||||
it has no data.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">MATCHING ERRORS</a><br>
|
||||
<P>
|
||||
It is possible to supply a regular expression that takes a very long time to
|
||||
fail to match certain lines. Such patterns normally involve nested indefinite
|
||||
repeats, for example: (a+)*\d when matched against a line of a's with no final
|
||||
digit. The PCRE matching function has a resource limit that causes it to abort
|
||||
in these circumstances. If this happens, <b>pcregrep</b> outputs an error
|
||||
message and the line that caused the problem to the standard error stream. If
|
||||
there are more than 20 such errors, <b>pcregrep</b> gives up.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">DIAGNOSTICS</a><br>
|
||||
<P>
|
||||
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
|
||||
for syntax errors and non-existent or inacessible files (even if matches were
|
||||
found in other files) or too many matching errors. Using the <b>-s</b> option to
|
||||
suppress error messages about inaccessble files does not affect the return
|
||||
code.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcrepattern</b>(3), <b>pcretest</b>(1).
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 March 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,224 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrematching specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrematching man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE MATCHING ALGORITHMS</a>
|
||||
<li><a name="TOC2" href="#SEC2">REGULAR EXPRESSIONS AS TREES</a>
|
||||
<li><a name="TOC3" href="#SEC3">THE STANDARD MATCHING ALGORITHM</a>
|
||||
<li><a name="TOC4" href="#SEC4">THE ALTERNATIVE MATCHING ALGORITHM</a>
|
||||
<li><a name="TOC5" href="#SEC5">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
|
||||
<li><a name="TOC6" href="#SEC6">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
|
||||
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
|
||||
<li><a name="TOC8" href="#SEC8">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE MATCHING ALGORITHMS</a><br>
|
||||
<P>
|
||||
This document describes the two different algorithms that are available in PCRE
|
||||
for matching a compiled regular expression against a given subject string. The
|
||||
"standard" algorithm is the one provided by the <b>pcre_exec()</b> function.
|
||||
This works in the same was as Perl's matching function, and provides a
|
||||
Perl-compatible matching operation.
|
||||
</P>
|
||||
<P>
|
||||
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b> function;
|
||||
this operates in a different way, and is not Perl-compatible. It has advantages
|
||||
and disadvantages compared with the standard algorithm, and these are described
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
When there is only one possible way in which a given subject string can match a
|
||||
pattern, the two algorithms give the same answer. A difference arises, however,
|
||||
when there are multiple possibilities. For example, if the pattern
|
||||
<pre>
|
||||
^<.*>
|
||||
</pre>
|
||||
is matched against the string
|
||||
<pre>
|
||||
<something> <something else> <something further>
|
||||
</pre>
|
||||
there are three possible answers. The standard algorithm finds only one of
|
||||
them, whereas the alternative algorithm finds all three.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">REGULAR EXPRESSIONS AS TREES</a><br>
|
||||
<P>
|
||||
The set of strings that are matched by a regular expression can be represented
|
||||
as a tree structure. An unlimited repetition in the pattern makes the tree of
|
||||
infinite size, but it is still a tree. Matching the pattern to a given subject
|
||||
string (from a given starting point) can be thought of as a search of the tree.
|
||||
There are two ways to search a tree: depth-first and breadth-first, and these
|
||||
correspond to the two matching algorithms provided by PCRE.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">THE STANDARD MATCHING ALGORITHM</a><br>
|
||||
<P>
|
||||
In the terminology of Jeffrey Friedl's book "Mastering Regular
|
||||
Expressions", the standard algorithm is an "NFA algorithm". It conducts a
|
||||
depth-first search of the pattern tree. That is, it proceeds along a single
|
||||
path through the tree, checking that the subject matches what is required. When
|
||||
there is a mismatch, the algorithm tries any alternatives at the current point,
|
||||
and if they all fail, it backs up to the previous branch point in the tree, and
|
||||
tries the next alternative branch at that level. This often involves backing up
|
||||
(moving to the left) in the subject string as well. The order in which
|
||||
repetition branches are tried is controlled by the greedy or ungreedy nature of
|
||||
the quantifier.
|
||||
</P>
|
||||
<P>
|
||||
If a leaf node is reached, a matching string has been found, and at that point
|
||||
the algorithm stops. Thus, if there is more than one possible match, this
|
||||
algorithm returns the first one that it finds. Whether this is the shortest,
|
||||
the longest, or some intermediate length depends on the way the greedy and
|
||||
ungreedy repetition quantifiers are specified in the pattern.
|
||||
</P>
|
||||
<P>
|
||||
Because it ends up with a single path through the tree, it is relatively
|
||||
straightforward for this algorithm to keep track of the substrings that are
|
||||
matched by portions of the pattern in parentheses. This provides support for
|
||||
capturing parentheses and back references.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">THE ALTERNATIVE MATCHING ALGORITHM</a><br>
|
||||
<P>
|
||||
This algorithm conducts a breadth-first search of the tree. Starting from the
|
||||
first matching point in the subject, it scans the subject string from left to
|
||||
right, once, character by character, and as it does this, it remembers all the
|
||||
paths through the tree that represent valid matches. In Friedl's terminology,
|
||||
this is a kind of "DFA algorithm", though it is not implemented as a
|
||||
traditional finite state machine (it keeps multiple states active
|
||||
simultaneously).
|
||||
</P>
|
||||
<P>
|
||||
The scan continues until either the end of the subject is reached, or there are
|
||||
no more unterminated paths. At this point, terminated paths represent the
|
||||
different matching possibilities (if there are none, the match has failed).
|
||||
Thus, if there is more than one possible match, this algorithm finds all of
|
||||
them, and in particular, it finds the longest. In PCRE, there is an option to
|
||||
stop the algorithm after the first match (which is necessarily the shortest)
|
||||
has been found.
|
||||
</P>
|
||||
<P>
|
||||
Note that all the matches that are found start at the same point in the
|
||||
subject. If the pattern
|
||||
<pre>
|
||||
cat(er(pillar)?)
|
||||
</pre>
|
||||
is matched against the string "the caterpillar catchment", the result will be
|
||||
the three strings "cat", "cater", and "caterpillar" that start at the fourth
|
||||
character of the subject. The algorithm does not automatically move on to find
|
||||
matches that start at later positions.
|
||||
</P>
|
||||
<P>
|
||||
There are a number of features of PCRE regular expressions that are not
|
||||
supported by the alternative matching algorithm. They are as follows:
|
||||
</P>
|
||||
<P>
|
||||
1. Because the algorithm finds all possible matches, the greedy or ungreedy
|
||||
nature of repetition quantifiers is not relevant. Greedy and ungreedy
|
||||
quantifiers are treated in exactly the same way. However, possessive
|
||||
quantifiers can make a difference when what follows could also match what is
|
||||
quantified, for example in a pattern like this:
|
||||
<pre>
|
||||
^a++\w!
|
||||
</pre>
|
||||
This pattern matches "aaab!" but not "aaa!", which would be matched by a
|
||||
non-possessive quantifier. Similarly, if an atomic group is present, it is
|
||||
matched as if it were a standalone pattern at the current point, and the
|
||||
longest match is then "locked in" for the rest of the overall pattern.
|
||||
</P>
|
||||
<P>
|
||||
2. When dealing with multiple paths through the tree simultaneously, it is not
|
||||
straightforward to keep track of captured substrings for the different matching
|
||||
possibilities, and PCRE's implementation of this algorithm does not attempt to
|
||||
do this. This means that no captured substrings are available.
|
||||
</P>
|
||||
<P>
|
||||
3. Because no substrings are captured, back references within the pattern are
|
||||
not supported, and cause errors if encountered.
|
||||
</P>
|
||||
<P>
|
||||
4. For the same reason, conditional expressions that use a backreference as the
|
||||
condition or test for a specific group recursion are not supported.
|
||||
</P>
|
||||
<P>
|
||||
5. Because many paths through the tree may be active, the \K escape sequence,
|
||||
which resets the start of the match when encountered (but may be on some paths
|
||||
and not on others), is not supported. It causes an error if encountered.
|
||||
</P>
|
||||
<P>
|
||||
6. Callouts are supported, but the value of the <i>capture_top</i> field is
|
||||
always 1, and the value of the <i>capture_last</i> field is always -1.
|
||||
</P>
|
||||
<P>
|
||||
7. The \C escape sequence, which (in the standard algorithm) matches a single
|
||||
byte, even in UTF-8 mode, is not supported because the alternative algorithm
|
||||
moves through the subject string one character at a time, for all active paths
|
||||
through the tree.
|
||||
</P>
|
||||
<P>
|
||||
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
||||
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
||||
<P>
|
||||
Using the alternative matching algorithm provides the following advantages:
|
||||
</P>
|
||||
<P>
|
||||
1. All possible matches (at a single point in the subject) are automatically
|
||||
found, and in particular, the longest match is found. To find more than one
|
||||
match using the standard algorithm, you have to do kludgy things with
|
||||
callouts.
|
||||
</P>
|
||||
<P>
|
||||
2. There is much better support for partial matching. The restrictions on the
|
||||
content of the pattern that apply when using the standard algorithm for partial
|
||||
matching do not apply to the alternative algorithm. For non-anchored patterns,
|
||||
the starting position of a partial match is available.
|
||||
</P>
|
||||
<P>
|
||||
3. Because the alternative algorithm scans the subject string just once, and
|
||||
never needs to backtrack, it is possible to pass very long subject strings to
|
||||
the matching function in several pieces, checking for partial matching each
|
||||
time.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
||||
<P>
|
||||
The alternative algorithm suffers from a number of disadvantages:
|
||||
</P>
|
||||
<P>
|
||||
1. It is substantially slower than the standard algorithm. This is partly
|
||||
because it has to search for all possible matches, but is also because it is
|
||||
less susceptible to optimization.
|
||||
</P>
|
||||
<P>
|
||||
2. Capturing parentheses and back references are not supported.
|
||||
</P>
|
||||
<P>
|
||||
3. Although atomic groups are supported, their use does not provide the
|
||||
performance advantage that it does for the standard algorithm.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 19 April 2008
|
||||
<br>
|
||||
Copyright © 1997-2008 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,242 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrepartial specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrepartial man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
|
||||
<li><a name="TOC2" href="#SEC2">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a>
|
||||
<li><a name="TOC3" href="#SEC3">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
|
||||
<li><a name="TOC4" href="#SEC4">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
|
||||
<P>
|
||||
In normal use of PCRE, if the subject string that is passed to
|
||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> matches as far as it goes, but is
|
||||
too short to match the entire pattern, PCRE_ERROR_NOMATCH is returned. There
|
||||
are circumstances where it might be helpful to distinguish this case from other
|
||||
cases in which there is no match.
|
||||
</P>
|
||||
<P>
|
||||
Consider, for example, an application where a human is required to type in data
|
||||
for a field with specific formatting requirements. An example might be a date
|
||||
in the form <i>ddmmmyy</i>, defined by this pattern:
|
||||
<pre>
|
||||
^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$
|
||||
</pre>
|
||||
If the application sees the user's keystrokes one by one, and can check that
|
||||
what has been typed so far is potentially valid, it is able to raise an error
|
||||
as soon as a mistake is made, possibly beeping and not reflecting the
|
||||
character that has been typed. This immediate feedback is likely to be a better
|
||||
user interface than a check that is delayed until the entire string has been
|
||||
entered.
|
||||
</P>
|
||||
<P>
|
||||
PCRE supports the concept of partial matching by means of the PCRE_PARTIAL
|
||||
option, which can be set when calling <b>pcre_exec()</b> or
|
||||
<b>pcre_dfa_exec()</b>. When this flag is set for <b>pcre_exec()</b>, the return
|
||||
code PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if at any time
|
||||
during the matching process the last part of the subject string matched part of
|
||||
the pattern. Unfortunately, for non-anchored matching, it is not possible to
|
||||
obtain the position of the start of the partial match. No captured data is set
|
||||
when PCRE_ERROR_PARTIAL is returned.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE_PARTIAL is set for <b>pcre_dfa_exec()</b>, the return code
|
||||
PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if the end of the
|
||||
subject is reached, there have been no complete matches, but there is still at
|
||||
least one matching possibility. The portion of the string that provided the
|
||||
partial match is set as the first matching string.
|
||||
</P>
|
||||
<P>
|
||||
Using PCRE_PARTIAL disables one of PCRE's optimizations. PCRE remembers the
|
||||
last literal byte in a pattern, and abandons matching immediately if such a
|
||||
byte is not present in the subject string. This optimization cannot be used
|
||||
for a subject string that might match only partially.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a><br>
|
||||
<P>
|
||||
Because of the way certain internal optimizations are implemented in the
|
||||
<b>pcre_exec()</b> function, the PCRE_PARTIAL option cannot be used with all
|
||||
patterns. These restrictions do not apply when <b>pcre_dfa_exec()</b> is used.
|
||||
For <b>pcre_exec()</b>, repeated single characters such as
|
||||
<pre>
|
||||
a{2,4}
|
||||
</pre>
|
||||
and repeated single metasequences such as
|
||||
<pre>
|
||||
\d+
|
||||
</pre>
|
||||
are not permitted if the maximum number of occurrences is greater than one.
|
||||
Optional items such as \d? (where the maximum is one) are permitted.
|
||||
Quantifiers with any values are permitted after parentheses, so the invalid
|
||||
examples above can be coded thus:
|
||||
<pre>
|
||||
(a){2,4}
|
||||
(\d)+
|
||||
</pre>
|
||||
These constructions run more slowly, but for the kinds of application that are
|
||||
envisaged for this facility, this is not felt to be a major restriction.
|
||||
</P>
|
||||
<P>
|
||||
If PCRE_PARTIAL is set for a pattern that does not conform to the restrictions,
|
||||
<b>pcre_exec()</b> returns the error code PCRE_ERROR_BADPARTIAL (-13).
|
||||
You can use the PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out
|
||||
if a compiled pattern can be used for partial matching.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
|
||||
<P>
|
||||
If the escape sequence \P is present in a <b>pcretest</b> data line, the
|
||||
PCRE_PARTIAL flag is used for the match. Here is a run of <b>pcretest</b> that
|
||||
uses the date example quoted above:
|
||||
<pre>
|
||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||
data> 25jun04\P
|
||||
0: 25jun04
|
||||
1: jun
|
||||
data> 25dec3\P
|
||||
Partial match
|
||||
data> 3ju\P
|
||||
Partial match
|
||||
data> 3juj\P
|
||||
No match
|
||||
data> j\P
|
||||
No match
|
||||
</pre>
|
||||
The first data string is matched completely, so <b>pcretest</b> shows the
|
||||
matched substrings. The remaining four strings do not match the complete
|
||||
pattern, but the first two are partial matches. The same test, using
|
||||
<b>pcre_dfa_exec()</b> matching (by means of the \D escape sequence), produces
|
||||
the following output:
|
||||
<pre>
|
||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||
data> 25jun04\P\D
|
||||
0: 25jun04
|
||||
data> 23dec3\P\D
|
||||
Partial match: 23dec3
|
||||
data> 3ju\P\D
|
||||
Partial match: 3ju
|
||||
data> 3juj\P\D
|
||||
No match
|
||||
data> j\P\D
|
||||
No match
|
||||
</pre>
|
||||
Notice that in this case the portion of the string that was matched is made
|
||||
available.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a><br>
|
||||
<P>
|
||||
When a partial match has been found using <b>pcre_dfa_exec()</b>, it is possible
|
||||
to continue the match by providing additional subject data and calling
|
||||
<b>pcre_dfa_exec()</b> again with the same compiled regular expression, this
|
||||
time setting the PCRE_DFA_RESTART option. You must also pass the same working
|
||||
space as before, because this is where details of the previous partial match
|
||||
are stored. Here is an example using <b>pcretest</b>, using the \R escape
|
||||
sequence to set the PCRE_DFA_RESTART option (\P and \D are as above):
|
||||
<pre>
|
||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||
data> 23ja\P\D
|
||||
Partial match: 23ja
|
||||
data> n05\R\D
|
||||
0: n05
|
||||
</pre>
|
||||
The first call has "23ja" as the subject, and requests partial matching; the
|
||||
second call has "n05" as the subject for the continued (restarted) match.
|
||||
Notice that when the match is complete, only the last part is shown; PCRE does
|
||||
not retain the previously partially-matched string. It is up to the calling
|
||||
program to do that if it needs to.
|
||||
</P>
|
||||
<P>
|
||||
You can set PCRE_PARTIAL with PCRE_DFA_RESTART to continue partial matching
|
||||
over multiple segments. This facility can be used to pass very long subject
|
||||
strings to <b>pcre_dfa_exec()</b>. However, some care is needed for certain
|
||||
types of pattern.
|
||||
</P>
|
||||
<P>
|
||||
1. If the pattern contains tests for the beginning or end of a line, you need
|
||||
to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropriate, when the
|
||||
subject string for any call does not contain the beginning or end of a line.
|
||||
</P>
|
||||
<P>
|
||||
2. If the pattern contains backward assertions (including \b or \B), you need
|
||||
to arrange for some overlap in the subject strings to allow for this. For
|
||||
example, you could pass the subject in chunks that are 500 bytes long, but in
|
||||
a buffer of 700 bytes, with the starting offset set to 200 and the previous 200
|
||||
bytes at the start of the buffer.
|
||||
</P>
|
||||
<P>
|
||||
3. Matching a subject string that is split into multiple segments does not
|
||||
always produce exactly the same result as matching over one single long string.
|
||||
The difference arises when there are multiple matching possibilities, because a
|
||||
partial match result is given only when there are no completed matches in a
|
||||
call to <b>pcre_dfa_exec()</b>. This means that as soon as the shortest match has
|
||||
been found, continuation to a new subject segment is no longer possible.
|
||||
Consider this <b>pcretest</b> example:
|
||||
<pre>
|
||||
re> /dog(sbody)?/
|
||||
data> do\P\D
|
||||
Partial match: do
|
||||
data> gsb\R\P\D
|
||||
0: g
|
||||
data> dogsbody\D
|
||||
0: dogsbody
|
||||
1: dog
|
||||
</pre>
|
||||
The pattern matches the words "dog" or "dogsbody". When the subject is
|
||||
presented in several parts ("do" and "gsb" being the first two) the match stops
|
||||
when "dog" has been found, and it is not possible to continue. On the other
|
||||
hand, if "dogsbody" is presented as a single string, both matches are found.
|
||||
</P>
|
||||
<P>
|
||||
Because of this phenomenon, it does not usually make sense to end a pattern
|
||||
that is going to be matched in this way with a variable repeat.
|
||||
</P>
|
||||
<P>
|
||||
4. Patterns that contain alternatives at the top level which do not all
|
||||
start with the same pattern item may not work as expected. For example,
|
||||
consider this pattern:
|
||||
<pre>
|
||||
1234|3789
|
||||
</pre>
|
||||
If the first part of the subject is "ABC123", a partial match of the first
|
||||
alternative is found at offset 3. There is no partial match for the second
|
||||
alternative, because such a match does not start at the same point in the
|
||||
subject string. Attempting to continue with the string "789" does not yield a
|
||||
match because only those alternatives that match at one point in the subject
|
||||
are remembered. The problem arises because the start of the second alternative
|
||||
matches within the first alternative. There is no problem with anchored
|
||||
patterns or patterns such as:
|
||||
<pre>
|
||||
1234|ABCD
|
||||
</pre>
|
||||
where no string can be a partial match for both alternatives.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 June 2007
|
||||
<br>
|
||||
Copyright © 1997-2007 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
File diff suppressed because it is too large
Load Diff
|
@ -1,173 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcreperform specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcreperform man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
PCRE PERFORMANCE
|
||||
</b><br>
|
||||
<P>
|
||||
Two aspects of performance are discussed below: memory usage and processing
|
||||
time. The way you express your pattern as a regular expression can affect both
|
||||
of them.
|
||||
</P>
|
||||
<br><b>
|
||||
MEMORY USAGE
|
||||
</b><br>
|
||||
<P>
|
||||
Patterns are compiled by PCRE into a reasonably efficient byte code, so that
|
||||
most simple patterns do not use much memory. However, there is one case where
|
||||
memory usage can be unexpectedly large. When a parenthesized subpattern has a
|
||||
quantifier with a minimum greater than 1 and/or a limited maximum, the whole
|
||||
subpattern is repeated in the compiled code. For example, the pattern
|
||||
<pre>
|
||||
(abc|def){2,4}
|
||||
</pre>
|
||||
is compiled as if it were
|
||||
<pre>
|
||||
(abc|def)(abc|def)((abc|def)(abc|def)?)?
|
||||
</pre>
|
||||
(Technical aside: It is done this way so that backtrack points within each of
|
||||
the repetitions can be independently maintained.)
|
||||
</P>
|
||||
<P>
|
||||
For regular expressions whose quantifiers use only small numbers, this is not
|
||||
usually a problem. However, if the numbers are large, and particularly if such
|
||||
repetitions are nested, the memory usage can become an embarrassment. For
|
||||
example, the very simple pattern
|
||||
<pre>
|
||||
((ab){1,1000}c){1,3}
|
||||
</pre>
|
||||
uses 51K bytes when compiled. When PCRE is compiled with its default internal
|
||||
pointer size of two bytes, the size limit on a compiled pattern is 64K, and
|
||||
this is reached with the above pattern if the outer repetition is increased
|
||||
from 3 to 4. PCRE can be compiled to use larger internal pointers and thus
|
||||
handle larger compiled patterns, but it is better to try to rewrite your
|
||||
pattern to use less memory if you can.
|
||||
</P>
|
||||
<P>
|
||||
One way of reducing the memory usage for such patterns is to make use of PCRE's
|
||||
<a href="pcrepattern.html#subpatternsassubroutines">"subroutine"</a>
|
||||
facility. Re-writing the above pattern as
|
||||
<pre>
|
||||
((ab)(?2){0,999}c)(?1){0,2}
|
||||
</pre>
|
||||
reduces the memory requirements to 18K, and indeed it remains under 20K even
|
||||
with the outer repetition increased to 100. However, this pattern is not
|
||||
exactly equivalent, because the "subroutine" calls are treated as
|
||||
<a href="pcrepattern.html#atomicgroup">atomic groups</a>
|
||||
into which there can be no backtracking if there is a subsequent matching
|
||||
failure. Therefore, PCRE cannot do this kind of rewriting automatically.
|
||||
Furthermore, there is a noticeable loss of speed when executing the modified
|
||||
pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
|
||||
speed is acceptable, this kind of rewriting will allow you to process patterns
|
||||
that PCRE cannot otherwise handle.
|
||||
</P>
|
||||
<br><b>
|
||||
PROCESSING TIME
|
||||
</b><br>
|
||||
<P>
|
||||
Certain items in regular expression patterns are processed more efficiently
|
||||
than others. It is more efficient to use a character class like [aeiou] than a
|
||||
set of single-character alternatives such as (a|e|i|o|u). In general, the
|
||||
simplest construction that provides the required behaviour is usually the most
|
||||
efficient. Jeffrey Friedl's book contains a lot of useful general discussion
|
||||
about optimizing regular expressions for efficient performance. This document
|
||||
contains a few observations about PCRE.
|
||||
</P>
|
||||
<P>
|
||||
Using Unicode character properties (the \p, \P, and \X escapes) is slow,
|
||||
because PCRE has to scan a structure that contains data for over fifteen
|
||||
thousand characters whenever it needs a character's property. If you can find
|
||||
an alternative pattern that does not use character properties, it will probably
|
||||
be faster.
|
||||
</P>
|
||||
<P>
|
||||
When a pattern begins with .* not in parentheses, or in parentheses that are
|
||||
not the subject of a backreference, and the PCRE_DOTALL option is set, the
|
||||
pattern is implicitly anchored by PCRE, since it can match only at the start of
|
||||
a subject string. However, if PCRE_DOTALL is not set, PCRE cannot make this
|
||||
optimization, because the . metacharacter does not then match a newline, and if
|
||||
the subject string contains newlines, the pattern may match from the character
|
||||
immediately following one of them instead of from the very start. For example,
|
||||
the pattern
|
||||
<pre>
|
||||
.*second
|
||||
</pre>
|
||||
matches the subject "first\nand second" (where \n stands for a newline
|
||||
character), with the match starting at the seventh character. In order to do
|
||||
this, PCRE has to retry the match starting after every newline in the subject.
|
||||
</P>
|
||||
<P>
|
||||
If you are using such a pattern with subject strings that do not contain
|
||||
newlines, the best performance is obtained by setting PCRE_DOTALL, or starting
|
||||
the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE
|
||||
from having to scan along the subject looking for a newline to restart at.
|
||||
</P>
|
||||
<P>
|
||||
Beware of patterns that contain nested indefinite repeats. These can take a
|
||||
long time to run when applied to a string that does not match. Consider the
|
||||
pattern fragment
|
||||
<pre>
|
||||
^(a+)*
|
||||
</pre>
|
||||
This can match "aaaa" in 16 different ways, and this number increases very
|
||||
rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4
|
||||
times, and for each of those cases other than 0 or 4, the + repeats can match
|
||||
different numbers of times.) When the remainder of the pattern is such that the
|
||||
entire match is going to fail, PCRE has in principle to try every possible
|
||||
variation, and this can take an extremely long time, even for relatively short
|
||||
strings.
|
||||
</P>
|
||||
<P>
|
||||
An optimization catches some of the more simple cases such as
|
||||
<pre>
|
||||
(a+)*b
|
||||
</pre>
|
||||
where a literal character follows. Before embarking on the standard matching
|
||||
procedure, PCRE checks that there is a "b" later in the subject string, and if
|
||||
there is not, it fails the match immediately. However, when there is no
|
||||
following literal this optimization cannot be used. You can see the difference
|
||||
by comparing the behaviour of
|
||||
<pre>
|
||||
(a+)*\d
|
||||
</pre>
|
||||
with the pattern above. The former gives a failure almost instantly when
|
||||
applied to a whole line of "a" characters, whereas the latter takes an
|
||||
appreciable time with strings longer than about 20 characters.
|
||||
</P>
|
||||
<P>
|
||||
In many cases, the solution to this kind of performance issue is to use an
|
||||
atomic group or a possessive quantifier.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><b>
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 06 March 2007
|
||||
<br>
|
||||
Copyright © 1997-2007 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,266 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcreposix specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcreposix man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
||||
<li><a name="TOC5" href="#SEC5">MATCHING A PATTERN</a>
|
||||
<li><a name="TOC6" href="#SEC6">ERROR MESSAGES</a>
|
||||
<li><a name="TOC7" href="#SEC7">MEMORY USAGE</a>
|
||||
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
||||
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
|
||||
<P>
|
||||
<b>#include <pcreposix.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
||||
<b>int <i>cflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
||||
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void regfree(regex_t *<i>preg</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
This set of functions provides a POSIX-style API to the PCRE regular expression
|
||||
package. See the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation for a description of PCRE's native API, which contains much
|
||||
additional functionality.
|
||||
</P>
|
||||
<P>
|
||||
The functions described here are just wrapper functions that ultimately call
|
||||
the PCRE native API. Their prototypes are defined in the <b>pcreposix.h</b>
|
||||
header file, and on Unix systems the library itself is called
|
||||
<b>pcreposix.a</b>, so can be accessed by adding <b>-lpcreposix</b> to the
|
||||
command for linking an application that uses them. Because the POSIX functions
|
||||
call the native ones, it is also necessary to add <b>-lpcre</b>.
|
||||
</P>
|
||||
<P>
|
||||
I have implemented only those POSIX option bits that can be reasonably mapped
|
||||
to PCRE native options. In addition, the option REG_EXTENDED is defined with
|
||||
the value zero. This has no effect, but since programs that are written to the
|
||||
POSIX interface often use it, this makes it easier to slot in PCRE as a
|
||||
replacement library. Other POSIX options are not even defined.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE is called via these functions, it is only the API that is POSIX-like
|
||||
in style. The syntax and semantics of the regular expressions themselves are
|
||||
still those of Perl, subject to the setting of various PCRE options, as
|
||||
described below. "POSIX-like in style" means that the API approximates to the
|
||||
POSIX definition; it is not fully POSIX-compatible, and in multi-byte encoding
|
||||
domains it is probably even less compatible.
|
||||
</P>
|
||||
<P>
|
||||
The header for these functions is supplied as <b>pcreposix.h</b> to avoid any
|
||||
potential clash with other POSIX libraries. It can, of course, be renamed or
|
||||
aliased as <b>regex.h</b>, which is the "correct" name. It provides two
|
||||
structure types, <i>regex_t</i> for compiled internal forms, and
|
||||
<i>regmatch_t</i> for returning captured substrings. It also defines some
|
||||
constants whose names start with "REG_"; these are used for setting options and
|
||||
identifying error codes.
|
||||
</P>
|
||||
<P>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||
<P>
|
||||
The function <b>regcomp()</b> is called to compile a pattern into an
|
||||
internal form. The pattern is a C string terminated by a binary zero, and
|
||||
is passed in the argument <i>pattern</i>. The <i>preg</i> argument is a pointer
|
||||
to a <b>regex_t</b> structure that is used as a base for storing information
|
||||
about the compiled regular expression.
|
||||
</P>
|
||||
<P>
|
||||
The argument <i>cflags</i> is either zero, or contains one or more of the bits
|
||||
defined by the following macros:
|
||||
<pre>
|
||||
REG_DOTALL
|
||||
</pre>
|
||||
The PCRE_DOTALL option is set when the regular expression is passed for
|
||||
compilation to the native function. Note that REG_DOTALL is not part of the
|
||||
POSIX standard.
|
||||
<pre>
|
||||
REG_ICASE
|
||||
</pre>
|
||||
The PCRE_CASELESS option is set when the regular expression is passed for
|
||||
compilation to the native function.
|
||||
<pre>
|
||||
REG_NEWLINE
|
||||
</pre>
|
||||
The PCRE_MULTILINE option is set when the regular expression is passed for
|
||||
compilation to the native function. Note that this does <i>not</i> mimic the
|
||||
defined POSIX behaviour for REG_NEWLINE (see the following section).
|
||||
<pre>
|
||||
REG_NOSUB
|
||||
</pre>
|
||||
The PCRE_NO_AUTO_CAPTURE option is set when the regular expression is passed
|
||||
for compilation to the native function. In addition, when a pattern that is
|
||||
compiled with this flag is passed to <b>regexec()</b> for matching, the
|
||||
<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
|
||||
are returned.
|
||||
<pre>
|
||||
REG_UTF8
|
||||
</pre>
|
||||
The PCRE_UTF8 option is set when the regular expression is passed for
|
||||
compilation to the native function. This causes the pattern itself and all data
|
||||
strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF8
|
||||
is not part of the POSIX standard.
|
||||
</P>
|
||||
<P>
|
||||
In the absence of these flags, no options are passed to the native function.
|
||||
This means the the regex is compiled with PCRE default semantics. In
|
||||
particular, the way it handles newline characters in the subject string is the
|
||||
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
|
||||
<i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
|
||||
newlines are matched by . (they aren't) or by a negative class such as [^a]
|
||||
(they are).
|
||||
</P>
|
||||
<P>
|
||||
The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
|
||||
<i>preg</i> structure is filled in on success, and one member of the structure
|
||||
is public: <i>re_nsub</i> contains the number of capturing subpatterns in
|
||||
the regular expression. Various error codes are defined in the header file.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
|
||||
<P>
|
||||
This area is not simple, because POSIX and Perl take different views of things.
|
||||
It is not possible to get PCRE to obey POSIX semantics, but then PCRE was never
|
||||
intended to be a POSIX engine. The following table lists the different
|
||||
possibilities for matching newline characters in PCRE:
|
||||
<pre>
|
||||
Default Change with
|
||||
|
||||
. matches newline no PCRE_DOTALL
|
||||
newline matches [^a] yes not changeable
|
||||
$ matches \n at end yes PCRE_DOLLARENDONLY
|
||||
$ matches \n in middle no PCRE_MULTILINE
|
||||
^ matches \n in middle no PCRE_MULTILINE
|
||||
</pre>
|
||||
This is the equivalent table for POSIX:
|
||||
<pre>
|
||||
Default Change with
|
||||
|
||||
. matches newline yes REG_NEWLINE
|
||||
newline matches [^a] yes REG_NEWLINE
|
||||
$ matches \n at end no REG_NEWLINE
|
||||
$ matches \n in middle no REG_NEWLINE
|
||||
^ matches \n in middle no REG_NEWLINE
|
||||
</pre>
|
||||
PCRE's behaviour is the same as Perl's, except that there is no equivalent for
|
||||
PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way to stop
|
||||
newline from matching [^a].
|
||||
</P>
|
||||
<P>
|
||||
The default POSIX newline handling can be obtained by setting PCRE_DOTALL and
|
||||
PCRE_DOLLAR_ENDONLY, but there is no way to make PCRE behave exactly as for the
|
||||
REG_NEWLINE action.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">MATCHING A PATTERN</a><br>
|
||||
<P>
|
||||
The function <b>regexec()</b> is called to match a compiled pattern <i>preg</i>
|
||||
against a given <i>string</i>, which is by default terminated by a zero byte
|
||||
(but see REG_STARTEND below), subject to the options in <i>eflags</i>. These can
|
||||
be:
|
||||
<pre>
|
||||
REG_NOTBOL
|
||||
</pre>
|
||||
The PCRE_NOTBOL option is set when calling the underlying PCRE matching
|
||||
function.
|
||||
<pre>
|
||||
REG_NOTEMPTY
|
||||
</pre>
|
||||
The PCRE_NOTEMPTY option is set when calling the underlying PCRE matching
|
||||
function. Note that REG_NOTEMPTY is not part of the POSIX standard. However,
|
||||
setting this option can give more POSIX-like behaviour in some situations.
|
||||
<pre>
|
||||
REG_NOTEOL
|
||||
</pre>
|
||||
The PCRE_NOTEOL option is set when calling the underlying PCRE matching
|
||||
function.
|
||||
<pre>
|
||||
REG_STARTEND
|
||||
</pre>
|
||||
The string is considered to start at <i>string</i> + <i>pmatch[0].rm_so</i> and
|
||||
to have a terminating NUL located at <i>string</i> + <i>pmatch[0].rm_eo</i>
|
||||
(there need not actually be a NUL at that location), regardless of the value of
|
||||
<i>nmatch</i>. This is a BSD extension, compatible with but not specified by
|
||||
IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software
|
||||
intended to be portable to other systems. Note that a non-zero <i>rm_so</i> does
|
||||
not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not
|
||||
how it is matched.
|
||||
</P>
|
||||
<P>
|
||||
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
|
||||
strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
|
||||
<b>regexec()</b> are ignored.
|
||||
</P>
|
||||
<P>
|
||||
Otherwise,the portion of the string that was matched, and also any captured
|
||||
substrings, are returned via the <i>pmatch</i> argument, which points to an
|
||||
array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the
|
||||
members <i>rm_so</i> and <i>rm_eo</i>. These contain the offset to the first
|
||||
character of each substring and the offset to the first character after the end
|
||||
of each substring, respectively. The 0th element of the vector relates to the
|
||||
entire portion of <i>string</i> that was matched; subsequent elements relate to
|
||||
the capturing subpatterns of the regular expression. Unused entries in the
|
||||
array have both structure members set to -1.
|
||||
</P>
|
||||
<P>
|
||||
A successful match yields a zero return; various error codes are defined in the
|
||||
header file, of which REG_NOMATCH is the "expected" failure code.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">ERROR MESSAGES</a><br>
|
||||
<P>
|
||||
The <b>regerror()</b> function maps a non-zero errorcode from either
|
||||
<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
|
||||
NULL, the error should have arisen from the use of that structure. A message
|
||||
terminated by a binary zero is placed in <i>errbuf</i>. The length of the
|
||||
message, including the zero, is limited to <i>errbuf_size</i>. The yield of the
|
||||
function is the size of buffer needed to hold the whole message.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
|
||||
<P>
|
||||
Compiling a regular expression causes memory to be allocated and associated
|
||||
with the <i>preg</i> structure. The function <b>regfree()</b> frees all such
|
||||
memory, after which <i>preg</i> may no longer be used as a compiled expression.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 March 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,148 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcreprecompile specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcreprecompile man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SAVING AND RE-USING PRECOMPILED PCRE PATTERNS</a>
|
||||
<li><a name="TOC2" href="#SEC2">SAVING A COMPILED PATTERN</a>
|
||||
<li><a name="TOC3" href="#SEC3">RE-USING A PRECOMPILED PATTERN</a>
|
||||
<li><a name="TOC4" href="#SEC4">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE PATTERNS</a><br>
|
||||
<P>
|
||||
If you are running an application that uses a large number of regular
|
||||
expression patterns, it may be useful to store them in a precompiled form
|
||||
instead of having to compile them every time the application is run.
|
||||
If you are not using any private character tables (see the
|
||||
<a href="pcre_maketables.html"><b>pcre_maketables()</b></a>
|
||||
documentation), this is relatively straightforward. If you are using private
|
||||
tables, it is a little bit more complicated.
|
||||
</P>
|
||||
<P>
|
||||
If you save compiled patterns to a file, you can copy them to a different host
|
||||
and run them there. This works even if the new host has the opposite endianness
|
||||
to the one on which the patterns were compiled. There may be a small
|
||||
performance penalty, but it should be insignificant. However, compiling regular
|
||||
expressions with one version of PCRE for use with a different version is not
|
||||
guaranteed to work and may cause crashes.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br>
|
||||
<P>
|
||||
The value returned by <b>pcre_compile()</b> points to a single block of memory
|
||||
that holds the compiled pattern and associated data. You can find the length of
|
||||
this block in bytes by calling <b>pcre_fullinfo()</b> with an argument of
|
||||
PCRE_INFO_SIZE. You can then save the data in any appropriate manner. Here is
|
||||
sample code that compiles a pattern and writes it to a file. It assumes that
|
||||
the variable <i>fd</i> refers to a file that is open for output:
|
||||
<pre>
|
||||
int erroroffset, rc, size;
|
||||
char *error;
|
||||
pcre *re;
|
||||
|
||||
re = pcre_compile("my pattern", 0, &error, &erroroffset, NULL);
|
||||
if (re == NULL) { ... handle errors ... }
|
||||
rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
|
||||
if (rc < 0) { ... handle errors ... }
|
||||
rc = fwrite(re, 1, size, fd);
|
||||
if (rc != size) { ... handle errors ... }
|
||||
</pre>
|
||||
In this example, the bytes that comprise the compiled pattern are copied
|
||||
exactly. Note that this is binary data that may contain any of the 256 possible
|
||||
byte values. On systems that make a distinction between binary and non-binary
|
||||
data, be sure that the file is opened for binary output.
|
||||
</P>
|
||||
<P>
|
||||
If you want to write more than one pattern to a file, you will have to devise a
|
||||
way of separating them. For binary data, preceding each pattern with its length
|
||||
is probably the most straightforward approach. Another possibility is to write
|
||||
out the data in hexadecimal instead of binary, one pattern to a line.
|
||||
</P>
|
||||
<P>
|
||||
Saving compiled patterns in a file is only one possible way of storing them for
|
||||
later use. They could equally well be saved in a database, or in the memory of
|
||||
some daemon process that passes them via sockets to the processes that want
|
||||
them.
|
||||
</P>
|
||||
<P>
|
||||
If the pattern has been studied, it is also possible to save the study data in
|
||||
a similar way to the compiled pattern itself. When studying generates
|
||||
additional information, <b>pcre_study()</b> returns a pointer to a
|
||||
<b>pcre_extra</b> data block. Its format is defined in the
|
||||
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
||||
in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation. The <i>study_data</i> field points to the binary study data, and
|
||||
this is what you must save (not the <b>pcre_extra</b> block itself). The length
|
||||
of the study data can be obtained by calling <b>pcre_fullinfo()</b> with an
|
||||
argument of PCRE_INFO_STUDYSIZE. Remember to check that <b>pcre_study()</b> did
|
||||
return a non-NULL value before trying to save the study data.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||
<P>
|
||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||
memory, you pass its pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in
|
||||
the usual way. This should work even on another host, and even if that host has
|
||||
the opposite endianness to the one where the pattern was compiled.
|
||||
</P>
|
||||
<P>
|
||||
However, if you passed a pointer to custom character tables when the pattern
|
||||
was compiled (the <i>tableptr</i> argument of <b>pcre_compile()</b>), you must
|
||||
now pass a similar pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>,
|
||||
because the value saved with the compiled pattern will obviously be nonsense. A
|
||||
field in a <b>pcre_extra()</b> block is used to pass this data, as described in
|
||||
the
|
||||
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
||||
in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
If you did not provide custom character tables when the pattern was compiled,
|
||||
the pointer in the compiled pattern is NULL, which causes <b>pcre_exec()</b> to
|
||||
use PCRE's internal tables. Thus, you do not need to take any special action at
|
||||
run time in this case.
|
||||
</P>
|
||||
<P>
|
||||
If you saved study data with the compiled pattern, you need to create your own
|
||||
<b>pcre_extra</b> data block and set the <i>study_data</i> field to point to the
|
||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
||||
<b>pcre_extra</b> block to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in the
|
||||
usual way.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
|
||||
<P>
|
||||
In general, it is safest to recompile all saved patterns when you update to a
|
||||
new PCRE release, though not all updates actually require this. Recompiling is
|
||||
definitely needed for release 7.2.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 13 June 2007
|
||||
<br>
|
||||
Copyright © 1997-2007 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,96 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcresample specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcresample man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
PCRE SAMPLE PROGRAM
|
||||
</b><br>
|
||||
<P>
|
||||
A simple, complete demonstration program, to get you started with using PCRE,
|
||||
is supplied in the file <i>pcredemo.c</i> in the PCRE distribution.
|
||||
</P>
|
||||
<P>
|
||||
The program compiles the regular expression that is its first argument, and
|
||||
matches it against the subject string in its second argument. No PCRE options
|
||||
are set, and default character tables are used. If matching succeeds, the
|
||||
program outputs the portion of the subject that matched, together with the
|
||||
contents of any captured substrings.
|
||||
</P>
|
||||
<P>
|
||||
If the -g option is given on the command line, the program then goes on to
|
||||
check for further matches of the same regular expression in the same subject
|
||||
string. The logic is a little bit tricky because of the possibility of matching
|
||||
an empty string. Comments in the code explain what is going on.
|
||||
</P>
|
||||
<P>
|
||||
If PCRE is installed in the standard include and library directories for your
|
||||
system, you should be able to compile the demonstration program using this
|
||||
command:
|
||||
<pre>
|
||||
gcc -o pcredemo pcredemo.c -lpcre
|
||||
</pre>
|
||||
If PCRE is installed elsewhere, you may need to add additional options to the
|
||||
command line. For example, on a Unix-like system that has PCRE installed in
|
||||
<i>/usr/local</i>, you can compile the demonstration program using a command
|
||||
like this:
|
||||
<pre>
|
||||
gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre
|
||||
</pre>
|
||||
Once you have compiled the demonstration program, you can run simple tests like
|
||||
this:
|
||||
<pre>
|
||||
./pcredemo 'cat|dog' 'the cat sat on the mat'
|
||||
./pcredemo -g 'cat|dog' 'the dog sat on the cat'
|
||||
</pre>
|
||||
Note that there is a much more comprehensive test program, called
|
||||
<a href="pcretest.html"><b>pcretest</b>,</a>
|
||||
which supports many more facilities for testing regular expressions and the
|
||||
PCRE library. The <b>pcredemo</b> program is provided as a simple coding
|
||||
example.
|
||||
</P>
|
||||
<P>
|
||||
On some operating systems (e.g. Solaris), when PCRE is not installed in the
|
||||
standard library directory, you may get an error like this when you try to run
|
||||
<b>pcredemo</b>:
|
||||
<pre>
|
||||
ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
|
||||
</pre>
|
||||
This is caused by the way shared library support works on those systems. You
|
||||
need to add
|
||||
<pre>
|
||||
-R/usr/local/lib
|
||||
</pre>
|
||||
(for example) to the compile command to get round this problem.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><b>
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 23 January 2008
|
||||
<br>
|
||||
Copyright © 1997-2008 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,172 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcrestack specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcrestack man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
PCRE DISCUSSION OF STACK USAGE
|
||||
</b><br>
|
||||
<P>
|
||||
When you call <b>pcre_exec()</b>, it makes use of an internal function called
|
||||
<b>match()</b>. This calls itself recursively at branch points in the pattern,
|
||||
in order to remember the state of the match so that it can back up and try a
|
||||
different alternative if the first one fails. As matching proceeds deeper and
|
||||
deeper into the tree of possibilities, the recursion depth increases.
|
||||
</P>
|
||||
<P>
|
||||
Not all calls of <b>match()</b> increase the recursion depth; for an item such
|
||||
as a* it may be called several times at the same level, after matching
|
||||
different numbers of a's. Furthermore, in a number of cases where the result of
|
||||
the recursive call would immediately be passed back as the result of the
|
||||
current call (a "tail recursion"), the function is just restarted instead.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre_dfa_exec()</b> function operates in an entirely different way, and
|
||||
hardly uses recursion at all. The limit on its complexity is the amount of
|
||||
workspace it is given. The comments that follow do NOT apply to
|
||||
<b>pcre_dfa_exec()</b>; they are relevant only for <b>pcre_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
You can set limits on the number of times that <b>match()</b> is called, both in
|
||||
total and recursively. If the limit is exceeded, an error occurs. For details,
|
||||
see the
|
||||
<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a>
|
||||
in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
Each time that <b>match()</b> is actually called recursively, it uses memory
|
||||
from the process stack. For certain kinds of pattern and data, very large
|
||||
amounts of stack may be needed, despite the recognition of "tail recursion".
|
||||
You can often reduce the amount of recursion, and therefore the amount of stack
|
||||
used, by modifying the pattern that is being matched. Consider, for example,
|
||||
this pattern:
|
||||
<pre>
|
||||
([^<]|<(?!inet))+
|
||||
</pre>
|
||||
It matches from wherever it starts until it encounters "<inet" or the end of
|
||||
the data, and is the kind of pattern that might be used when processing an XML
|
||||
file. Each iteration of the outer parentheses matches either one character that
|
||||
is not "<" or a "<" that is not followed by "inet". However, each time a
|
||||
parenthesis is processed, a recursion occurs, so this formulation uses a stack
|
||||
frame for each matched character. For a long string, a lot of stack is
|
||||
required. Consider now this rewritten pattern, which matches exactly the same
|
||||
strings:
|
||||
<pre>
|
||||
([^<]++|<(?!inet))+
|
||||
</pre>
|
||||
This uses very much less stack, because runs of characters that do not contain
|
||||
"<" are "swallowed" in one item inside the parentheses. Recursion happens only
|
||||
when a "<" character that is not followed by "inet" is encountered (and we
|
||||
assume this is relatively rare). A possessive quantifier is used to stop any
|
||||
backtracking into the runs of non-"<" characters, but that is not related to
|
||||
stack usage.
|
||||
</P>
|
||||
<P>
|
||||
This example shows that one way of avoiding stack problems when matching long
|
||||
subject strings is to write repeated parenthesized subpatterns to match more
|
||||
than one character whenever possible.
|
||||
</P>
|
||||
<br><b>
|
||||
Compiling PCRE to use heap instead of stack
|
||||
</b><br>
|
||||
<P>
|
||||
In environments where stack memory is constrained, you might want to compile
|
||||
PCRE to use heap memory instead of stack for remembering back-up points. This
|
||||
makes it run a lot more slowly, however. Details of how to do this are given in
|
||||
the
|
||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||
documentation. When built in this way, instead of using the stack, PCRE obtains
|
||||
and frees memory by calling the functions that are pointed to by the
|
||||
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables. By default, these
|
||||
point to <b>malloc()</b> and <b>free()</b>, but you can replace the pointers to
|
||||
cause PCRE to use your own functions. Since the block sizes are always the
|
||||
same, and are always freed in reverse order, it may be possible to implement
|
||||
customized memory handlers that are more efficient than the standard functions.
|
||||
</P>
|
||||
<br><b>
|
||||
Limiting PCRE's stack usage
|
||||
</b><br>
|
||||
<P>
|
||||
PCRE has an internal counter that can be used to limit the depth of recursion,
|
||||
and thus cause <b>pcre_exec()</b> to give an error code before it runs out of
|
||||
stack. By default, the limit is very large, and unlikely ever to operate. It
|
||||
can be changed when PCRE is built, and it can also be set when
|
||||
<b>pcre_exec()</b> is called. For details of these interfaces, see the
|
||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||
and
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
As a very rough rule of thumb, you should reckon on about 500 bytes per
|
||||
recursion. Thus, if you want to limit your stack usage to 8Mb, you
|
||||
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
|
||||
support around 128000 recursions. The <b>pcretest</b> test program has a command
|
||||
line option (<b>-S</b>) that can be used to increase the size of its stack.
|
||||
</P>
|
||||
<br><b>
|
||||
Changing stack size in Unix-like systems
|
||||
</b><br>
|
||||
<P>
|
||||
In Unix-like environments, there is not often a problem with the stack unless
|
||||
very long strings are involved, though the default limit on stack size varies
|
||||
from system to system. Values from 8Mb to 64Mb are common. You can find your
|
||||
default limit by running the command:
|
||||
<pre>
|
||||
ulimit -s
|
||||
</pre>
|
||||
Unfortunately, the effect of running out of stack is often SIGSEGV, though
|
||||
sometimes a more explicit error message is given. You can normally increase the
|
||||
limit on stack size by code such as this:
|
||||
<pre>
|
||||
struct rlimit rlim;
|
||||
getrlimit(RLIMIT_STACK, &rlim);
|
||||
rlim.rlim_cur = 100*1024*1024;
|
||||
setrlimit(RLIMIT_STACK, &rlim);
|
||||
</pre>
|
||||
This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
|
||||
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
|
||||
do this before calling <b>pcre_exec()</b>.
|
||||
</P>
|
||||
<br><b>
|
||||
Changing stack size in Mac OS X
|
||||
</b><br>
|
||||
<P>
|
||||
Using <b>setrlimit()</b>, as described above, should also work on Mac OS X. It
|
||||
is also possible to set a stack size when linking a program. There is a
|
||||
discussion about stack sizes in Mac OS X at this web site:
|
||||
<a href="http://developer.apple.com/qa/qa2005/qa1419.html">http://developer.apple.com/qa/qa2005/qa1419.html.</a>
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><b>
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 09 July 2008
|
||||
<br>
|
||||
Copyright © 1997-2008 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,473 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcresyntax specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcresyntax man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a>
|
||||
<li><a name="TOC2" href="#SEC2">QUOTING</a>
|
||||
<li><a name="TOC3" href="#SEC3">CHARACTERS</a>
|
||||
<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
|
||||
<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a>
|
||||
<li><a name="TOC6" href="#SEC6">SCRIPT NAMES FOR \p AND \P</a>
|
||||
<li><a name="TOC7" href="#SEC7">CHARACTER CLASSES</a>
|
||||
<li><a name="TOC8" href="#SEC8">QUANTIFIERS</a>
|
||||
<li><a name="TOC9" href="#SEC9">ANCHORS AND SIMPLE ASSERTIONS</a>
|
||||
<li><a name="TOC10" href="#SEC10">MATCH POINT RESET</a>
|
||||
<li><a name="TOC11" href="#SEC11">ALTERNATION</a>
|
||||
<li><a name="TOC12" href="#SEC12">CAPTURING</a>
|
||||
<li><a name="TOC13" href="#SEC13">ATOMIC GROUPS</a>
|
||||
<li><a name="TOC14" href="#SEC14">COMMENT</a>
|
||||
<li><a name="TOC15" href="#SEC15">OPTION SETTING</a>
|
||||
<li><a name="TOC16" href="#SEC16">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||
<li><a name="TOC17" href="#SEC17">BACKREFERENCES</a>
|
||||
<li><a name="TOC18" href="#SEC18">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||
<li><a name="TOC19" href="#SEC19">CONDITIONAL PATTERNS</a>
|
||||
<li><a name="TOC20" href="#SEC20">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTIONS</a>
|
||||
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC23" href="#SEC23">CALLOUTS</a>
|
||||
<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
|
||||
<li><a name="TOC25" href="#SEC25">AUTHOR</a>
|
||||
<li><a name="TOC26" href="#SEC26">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
|
||||
<P>
|
||||
The full syntax and semantics of the regular expressions that are supported by
|
||||
PCRE are described in the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
documentation. This document contains just a quick-reference summary of the
|
||||
syntax.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\x where x is non-alphanumeric is a literal x
|
||||
\Q...\E treat enclosed characters as literal
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">CHARACTERS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\a alarm, that is, the BEL character (hex 07)
|
||||
\cx "control-x", where x is any character
|
||||
\e escape (hex 1B)
|
||||
\f formfeed (hex 0C)
|
||||
\n newline (hex 0A)
|
||||
\r carriage return (hex 0D)
|
||||
\t tab (hex 09)
|
||||
\ddd character with octal code ddd, or backreference
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh..
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
. any character except newline;
|
||||
in dotall mode, any character whatsoever
|
||||
\C one byte, even in UTF-8 mode (best avoided)
|
||||
\d a decimal digit
|
||||
\D a character that is not a decimal digit
|
||||
\h a horizontal whitespace character
|
||||
\H a character that is not a horizontal whitespace character
|
||||
\p{<i>xx</i>} a character with the <i>xx</i> property
|
||||
\P{<i>xx</i>} a character without the <i>xx</i> property
|
||||
\R a newline sequence
|
||||
\s a whitespace character
|
||||
\S a character that is not a whitespace character
|
||||
\v a vertical whitespace character
|
||||
\V a character that is not a vertical whitespace character
|
||||
\w a "word" character
|
||||
\W a "non-word" character
|
||||
\X an extended Unicode sequence
|
||||
</pre>
|
||||
In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
C Other
|
||||
Cc Control
|
||||
Cf Format
|
||||
Cn Unassigned
|
||||
Co Private use
|
||||
Cs Surrogate
|
||||
|
||||
L Letter
|
||||
Ll Lower case letter
|
||||
Lm Modifier letter
|
||||
Lo Other letter
|
||||
Lt Title case letter
|
||||
Lu Upper case letter
|
||||
L& Ll, Lu, or Lt
|
||||
|
||||
M Mark
|
||||
Mc Spacing mark
|
||||
Me Enclosing mark
|
||||
Mn Non-spacing mark
|
||||
|
||||
N Number
|
||||
Nd Decimal number
|
||||
Nl Letter number
|
||||
No Other number
|
||||
|
||||
P Punctuation
|
||||
Pc Connector punctuation
|
||||
Pd Dash punctuation
|
||||
Pe Close punctuation
|
||||
Pf Final punctuation
|
||||
Pi Initial punctuation
|
||||
Po Other punctuation
|
||||
Ps Open punctuation
|
||||
|
||||
S Symbol
|
||||
Sc Currency symbol
|
||||
Sk Modifier symbol
|
||||
Sm Mathematical symbol
|
||||
So Other symbol
|
||||
|
||||
Z Separator
|
||||
Zl Line separator
|
||||
Zp Paragraph separator
|
||||
Zs Space separator
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||
<P>
|
||||
Arabic,
|
||||
Armenian,
|
||||
Balinese,
|
||||
Bengali,
|
||||
Bopomofo,
|
||||
Braille,
|
||||
Buginese,
|
||||
Buhid,
|
||||
Canadian_Aboriginal,
|
||||
Carian,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
Cypriot,
|
||||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Ethiopic,
|
||||
Georgian,
|
||||
Glagolitic,
|
||||
Gothic,
|
||||
Greek,
|
||||
Gujarati,
|
||||
Gurmukhi,
|
||||
Han,
|
||||
Hangul,
|
||||
Hanunoo,
|
||||
Hebrew,
|
||||
Hiragana,
|
||||
Inherited,
|
||||
Kannada,
|
||||
Katakana,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khmer,
|
||||
Lao,
|
||||
Latin,
|
||||
Lepcha,
|
||||
Limbu,
|
||||
Linear_B,
|
||||
Lycian,
|
||||
Lydian,
|
||||
Malayalam,
|
||||
Mongolian,
|
||||
Myanmar,
|
||||
New_Tai_Lue,
|
||||
Nko,
|
||||
Ogham,
|
||||
Old_Italic,
|
||||
Old_Persian,
|
||||
Ol_Chiki,
|
||||
Oriya,
|
||||
Osmanya,
|
||||
Phags_Pa,
|
||||
Phoenician,
|
||||
Rejang,
|
||||
Runic,
|
||||
Saurashtra,
|
||||
Shavian,
|
||||
Sinhala,
|
||||
Sudanese,
|
||||
Syloti_Nagri,
|
||||
Syriac,
|
||||
Tagalog,
|
||||
Tagbanwa,
|
||||
Tai_Le,
|
||||
Tamil,
|
||||
Telugu,
|
||||
Thaana,
|
||||
Thai,
|
||||
Tibetan,
|
||||
Tifinagh,
|
||||
Ugaritic,
|
||||
Vai,
|
||||
Yi.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">CHARACTER CLASSES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
[...] positive character class
|
||||
[^...] negative character class
|
||||
[x-y] range (can be used for hex characters)
|
||||
[[:xxx:]] positive POSIX named set
|
||||
[[:^xxx:]] negative POSIX named set
|
||||
|
||||
alnum alphanumeric
|
||||
alpha alphabetic
|
||||
ascii 0-127
|
||||
blank space or tab
|
||||
cntrl control character
|
||||
digit decimal digit
|
||||
graph printing, excluding space
|
||||
lower lower case letter
|
||||
print printing, including space
|
||||
punct printing, excluding alphanumeric
|
||||
space whitespace
|
||||
upper upper case letter
|
||||
word same as \w
|
||||
xdigit hexadecimal digit
|
||||
</pre>
|
||||
In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
||||
\Q...\E inside a character class.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">QUANTIFIERS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
? 0 or 1, greedy
|
||||
?+ 0 or 1, possessive
|
||||
?? 0 or 1, lazy
|
||||
* 0 or more, greedy
|
||||
*+ 0 or more, possessive
|
||||
*? 0 or more, lazy
|
||||
+ 1 or more, greedy
|
||||
++ 1 or more, possessive
|
||||
+? 1 or more, lazy
|
||||
{n} exactly n
|
||||
{n,m} at least n, no more than m, greedy
|
||||
{n,m}+ at least n, no more than m, possessive
|
||||
{n,m}? at least n, no more than m, lazy
|
||||
{n,} n or more, greedy
|
||||
{n,}+ n or more, possessive
|
||||
{n,}? n or more, lazy
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\b word boundary (only ASCII letters recognized)
|
||||
\B not a word boundary
|
||||
^ start of subject
|
||||
also after internal newline in multiline mode
|
||||
\A start of subject
|
||||
$ end of subject
|
||||
also before newline at end of subject
|
||||
also before internal newline in multiline mode
|
||||
\Z end of subject
|
||||
also before newline at end of subject
|
||||
\z end of subject
|
||||
\G first matching position in subject
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">MATCH POINT RESET</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\K reset start of match
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">ALTERNATION</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
expr|expr|expr...
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">CAPTURING</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(...) capturing group
|
||||
(?<name>...) named capturing group (Perl)
|
||||
(?'name'...) named capturing group (Perl)
|
||||
(?P<name>...) named capturing group (Python)
|
||||
(?:...) non-capturing group
|
||||
(?|...) non-capturing group; reset group numbers for
|
||||
capturing groups in each alternative
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">ATOMIC GROUPS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?>...) atomic, non-capturing group
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">COMMENT</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?#....) comment (not nestable)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">OPTION SETTING</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?i) caseless
|
||||
(?J) allow duplicate names
|
||||
(?m) multiline
|
||||
(?s) single line (dotall)
|
||||
(?U) default ungreedy (lazy)
|
||||
(?x) extended (ignore white space)
|
||||
(?-...) unset option(s)
|
||||
</pre>
|
||||
The following is recognized only at the start of a pattern or after one of the
|
||||
newline-setting options with similar syntax:
|
||||
<pre>
|
||||
(*UTF8) set UTF-8 mode
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?=...) positive look ahead
|
||||
(?!...) negative look ahead
|
||||
(?<=...) positive look behind
|
||||
(?<!...) negative look behind
|
||||
</pre>
|
||||
Each top-level branch of a look behind must be of a fixed length.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">BACKREFERENCES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\n reference by number (can be ambiguous)
|
||||
\gn reference by number
|
||||
\g{n} reference by number
|
||||
\g{-n} relative reference by number
|
||||
\k<name> reference by name (Perl)
|
||||
\k'name' reference by name (Perl)
|
||||
\g{name} reference by name (Perl)
|
||||
\k{name} reference by name (.NET)
|
||||
(?P=name) reference by name (Python)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?R) recurse whole pattern
|
||||
(?n) call subpattern by absolute number
|
||||
(?+n) call subpattern by relative number
|
||||
(?-n) call subpattern by relative number
|
||||
(?&name) call subpattern by name (Perl)
|
||||
(?P>name) call subpattern by name (Python)
|
||||
\g<name> call subpattern by name (Oniguruma)
|
||||
\g'name' call subpattern by name (Oniguruma)
|
||||
\g<n> call subpattern by absolute number (Oniguruma)
|
||||
\g'n' call subpattern by absolute number (Oniguruma)
|
||||
\g<+n> call subpattern by relative number (PCRE extension)
|
||||
\g'+n' call subpattern by relative number (PCRE extension)
|
||||
\g<-n> call subpattern by relative number (PCRE extension)
|
||||
\g'-n' call subpattern by relative number (PCRE extension)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?(condition)yes-pattern)
|
||||
(?(condition)yes-pattern|no-pattern)
|
||||
|
||||
(?(n)... absolute reference condition
|
||||
(?(+n)... relative reference condition
|
||||
(?(-n)... relative reference condition
|
||||
(?(<name>)... named reference condition (Perl)
|
||||
(?('name')... named reference condition (Perl)
|
||||
(?(name)... named reference condition (PCRE)
|
||||
(?(R)... overall recursion condition
|
||||
(?(Rn)... specific group recursion condition
|
||||
(?(R&name)... specific recursion condition
|
||||
(?(DEFINE)... define subpattern for reference
|
||||
(?(assert)... assertion condition
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<P>
|
||||
The following act immediately they are reached:
|
||||
<pre>
|
||||
(*ACCEPT) force successful match
|
||||
(*FAIL) force backtrack; synonym (*F)
|
||||
</pre>
|
||||
The following act only when a subsequent match failure causes a backtrack to
|
||||
reach them. They all force a match failure, but they differ in what happens
|
||||
afterwards. Those that advance the start-of-match point do so only if the
|
||||
pattern is not anchored.
|
||||
<pre>
|
||||
(*COMMIT) overall failure, no advance of starting point
|
||||
(*PRUNE) advance to next starting character
|
||||
(*SKIP) advance start to current matching position
|
||||
(*THEN) local failure, backtrack to next alternation
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTIONS</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after a
|
||||
(*BSR_...) or (*UTF8) option.
|
||||
<pre>
|
||||
(*CR) carriage return only
|
||||
(*LF) linefeed only
|
||||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after a
|
||||
(*...) option that sets the newline convention or UTF-8 mode.
|
||||
<pre>
|
||||
(*BSR_ANYCRLF) CR, LF, or CRLF
|
||||
(*BSR_UNICODE) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?C) callout
|
||||
(?Cn) callout with data n
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
|
||||
<b>pcrematching</b>(3), <b>pcre</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 April 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,712 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcretest specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcretest man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">OPTIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">DESCRIPTION</a>
|
||||
<li><a name="TOC4" href="#SEC4">PATTERN MODIFIERS</a>
|
||||
<li><a name="TOC5" href="#SEC5">DATA LINES</a>
|
||||
<li><a name="TOC6" href="#SEC6">THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC7" href="#SEC7">DEFAULT OUTPUT FROM PCRETEST</a>
|
||||
<li><a name="TOC8" href="#SEC8">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC9" href="#SEC9">RESTARTING AFTER A PARTIAL MATCH</a>
|
||||
<li><a name="TOC10" href="#SEC10">CALLOUTS</a>
|
||||
<li><a name="TOC11" href="#SEC11">NON-PRINTING CHARACTERS</a>
|
||||
<li><a name="TOC12" href="#SEC12">SAVING AND RELOADING COMPILED PATTERNS</a>
|
||||
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
|
||||
<li><a name="TOC14" href="#SEC14">AUTHOR</a>
|
||||
<li><a name="TOC15" href="#SEC15">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcretest [options] [source] [destination]</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcretest</b> was written as a test program for the PCRE regular expression
|
||||
library itself, but it can also be used for experimenting with regular
|
||||
expressions. This document describes the features of the test program; for
|
||||
details of the regular expressions themselves, see the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
documentation. For details of the PCRE library function calls and their
|
||||
options, see the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
<b>-b</b>
|
||||
Behave as if each regex has the <b>/B</b> (show bytecode) modifier; the internal
|
||||
form is output after compilation.
|
||||
</P>
|
||||
<P>
|
||||
<b>-C</b>
|
||||
Output the version number of the PCRE library, and all available information
|
||||
about the optional features that are included, and then exit.
|
||||
</P>
|
||||
<P>
|
||||
<b>-d</b>
|
||||
Behave as if each regex has the <b>/D</b> (debug) modifier; the internal
|
||||
form and information about the compiled pattern is output after compilation;
|
||||
<b>-d</b> is equivalent to <b>-b -i</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-dfa</b>
|
||||
Behave as if each data line contains the \D escape sequence; this causes the
|
||||
alternative matching function, <b>pcre_dfa_exec()</b>, to be used instead of the
|
||||
standard <b>pcre_exec()</b> function (more detail is given below).
|
||||
</P>
|
||||
<P>
|
||||
<b>-help</b>
|
||||
Output a brief summary these options and then exit.
|
||||
</P>
|
||||
<P>
|
||||
<b>-i</b>
|
||||
Behave as if each regex has the <b>/I</b> modifier; information about the
|
||||
compiled pattern is given after compilation.
|
||||
</P>
|
||||
<P>
|
||||
<b>-M</b>
|
||||
Behave as if each data line contains the \M escape sequence; this causes
|
||||
PCRE to discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings by
|
||||
calling <b>pcre_exec()</b> repeatedly with different limits.
|
||||
</P>
|
||||
<P>
|
||||
<b>-m</b>
|
||||
Output the size of each compiled pattern after it has been compiled. This is
|
||||
equivalent to adding <b>/M</b> to each regular expression. For compatibility
|
||||
with earlier versions of pcretest, <b>-s</b> is a synonym for <b>-m</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b> <i>osize</i>
|
||||
Set the number of elements in the output vector that is used when calling
|
||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> to be <i>osize</i>. The default value
|
||||
is 45, which is enough for 14 capturing subexpressions for <b>pcre_exec()</b> or
|
||||
22 different matches for <b>pcre_dfa_exec()</b>. The vector size can be
|
||||
changed for individual matching calls by including \O in the data line (see
|
||||
below).
|
||||
</P>
|
||||
<P>
|
||||
<b>-p</b>
|
||||
Behave as if each regex has the <b>/P</b> modifier; the POSIX wrapper API is
|
||||
used to call PCRE. None of the other options has any effect when <b>-p</b> is
|
||||
set.
|
||||
</P>
|
||||
<P>
|
||||
<b>-q</b>
|
||||
Do not output the version number of <b>pcretest</b> at the start of execution.
|
||||
</P>
|
||||
<P>
|
||||
<b>-S</b> <i>size</i>
|
||||
On Unix-like systems, set the size of the runtime stack to <i>size</i>
|
||||
megabytes.
|
||||
</P>
|
||||
<P>
|
||||
<b>-t</b>
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
|
||||
<b>-t</b>, because you will then get the size output a zillion times, and the
|
||||
timing will be distorted. You can control the number of iterations that are
|
||||
used for timing by following <b>-t</b> with a number (as a separate item on the
|
||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
||||
to iterate 500000 times.
|
||||
</P>
|
||||
<P>
|
||||
<b>-tm</b>
|
||||
This is like <b>-t</b> except that it times only the matching phase, not the
|
||||
compile or study phases.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
||||
writes to the second. If it is given only one filename argument, it reads from
|
||||
that file and writes to stdout. Otherwise, it reads from stdin and writes to
|
||||
stdout, and prompts for each line of input, using "re>" to prompt for regular
|
||||
expressions, and "data>" to prompt for data lines.
|
||||
</P>
|
||||
<P>
|
||||
When <b>pcretest</b> is built, a configuration option can specify that it should
|
||||
be linked with the <b>libreadline</b> library. When this is done, if the input
|
||||
is from a terminal, it is read using the <b>readline()</b> function. This
|
||||
provides line-editing and history facilities. The output from the <b>-help</b>
|
||||
option states whether or not <b>readline()</b> will be used.
|
||||
</P>
|
||||
<P>
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern.
|
||||
</P>
|
||||
<P>
|
||||
Each data line is matched separately and independently. If you want to do
|
||||
multi-line matches, you have to use the \n escape sequence (or \r or \r\n,
|
||||
etc., depending on the newline setting) in a single line of input to encode the
|
||||
newline sequences. There is no limit on the length of data lines; the input
|
||||
buffer is automatically extended if it is too small.
|
||||
</P>
|
||||
<P>
|
||||
An empty line signals the end of the data lines, at which point a new regular
|
||||
expression is read. The regular expressions are given enclosed in any
|
||||
non-alphanumeric delimiters other than backslash, for example:
|
||||
<pre>
|
||||
/(a|bc)x+yz/
|
||||
</pre>
|
||||
White space before the initial delimiter is ignored. A regular expression may
|
||||
be continued over several input lines, in which case the newline characters are
|
||||
included within it. It is possible to include the delimiter within the pattern
|
||||
by escaping it, for example
|
||||
<pre>
|
||||
/abc\/def/
|
||||
</pre>
|
||||
If you do so, the escape and the delimiter form part of the pattern, but since
|
||||
delimiters are always non-alphanumeric, this does not affect its interpretation.
|
||||
If the terminating delimiter is immediately followed by a backslash, for
|
||||
example,
|
||||
<pre>
|
||||
/abc/\
|
||||
</pre>
|
||||
then a backslash is added to the end of the pattern. This is done to provide a
|
||||
way of testing the error condition that arises if a pattern finishes with a
|
||||
backslash, because
|
||||
<pre>
|
||||
/abc\/
|
||||
</pre>
|
||||
is interpreted as the first line of a pattern that starts with "abc/", causing
|
||||
pcretest to read the next line as a continuation of the regular expression.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||
<P>
|
||||
A pattern may be followed by any number of modifiers, which are mostly single
|
||||
characters. Following Perl usage, these are referred to below as, for example,
|
||||
"the <b>/i</b> modifier", even though the delimiter of the pattern need not
|
||||
always be a slash, and no slash is used when writing modifiers. Whitespace may
|
||||
appear between the final pattern delimiter and the first modifier, and between
|
||||
the modifiers themselves.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/i</b>, <b>/m</b>, <b>/s</b>, and <b>/x</b> modifiers set the PCRE_CASELESS,
|
||||
PCRE_MULTILINE, PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
|
||||
<b>pcre_compile()</b> is called. These four modifier letters have the same
|
||||
effect as they do in Perl. For example:
|
||||
<pre>
|
||||
/caseless/i
|
||||
</pre>
|
||||
The following table shows additional modifiers for setting PCRE options that do
|
||||
not correspond to anything in Perl:
|
||||
<pre>
|
||||
<b>/A</b> PCRE_ANCHORED
|
||||
<b>/C</b> PCRE_AUTO_CALLOUT
|
||||
<b>/E</b> PCRE_DOLLAR_ENDONLY
|
||||
<b>/f</b> PCRE_FIRSTLINE
|
||||
<b>/J</b> PCRE_DUPNAMES
|
||||
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
||||
<b>/U</b> PCRE_UNGREEDY
|
||||
<b>/X</b> PCRE_EXTRA
|
||||
<b>/<JS></b> PCRE_JAVASCRIPT_COMPAT
|
||||
<b>/<cr></b> PCRE_NEWLINE_CR
|
||||
<b>/<lf></b> PCRE_NEWLINE_LF
|
||||
<b>/<crlf></b> PCRE_NEWLINE_CRLF
|
||||
<b>/<anycrlf></b> PCRE_NEWLINE_ANYCRLF
|
||||
<b>/<any></b> PCRE_NEWLINE_ANY
|
||||
<b>/<bsr_anycrlf></b> PCRE_BSR_ANYCRLF
|
||||
<b>/<bsr_unicode></b> PCRE_BSR_UNICODE
|
||||
</pre>
|
||||
Those specifying line ending sequences are literal strings as shown, but the
|
||||
letters can be in either case. This example sets multiline matching with CRLF
|
||||
as the line ending sequence:
|
||||
<pre>
|
||||
/^abc/m<crlf>
|
||||
</pre>
|
||||
Details of the meanings of these PCRE options are given in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><b>
|
||||
Finding all matches in a string
|
||||
</b><br>
|
||||
<P>
|
||||
Searching for all possible matches within each subject string can be requested
|
||||
by the <b>/g</b> or <b>/G</b> modifier. After finding a match, PCRE is called
|
||||
again to search the remainder of the subject string. The difference between
|
||||
<b>/g</b> and <b>/G</b> is that the former uses the <i>startoffset</i> argument to
|
||||
<b>pcre_exec()</b> to start searching at a new point within the entire string
|
||||
(which is in effect what Perl does), whereas the latter passes over a shortened
|
||||
substring. This makes a difference to the matching process if the pattern
|
||||
begins with a lookbehind assertion (including \b or \B).
|
||||
</P>
|
||||
<P>
|
||||
If any call to <b>pcre_exec()</b> in a <b>/g</b> or <b>/G</b> sequence matches an
|
||||
empty string, the next call is done with the PCRE_NOTEMPTY and PCRE_ANCHORED
|
||||
flags set in order to search for another, non-empty, match at the same point.
|
||||
If this second match fails, the start offset is advanced by one, and the normal
|
||||
match is retried. This imitates the way Perl handles such cases when using the
|
||||
<b>/g</b> modifier or the <b>split()</b> function.
|
||||
</P>
|
||||
<br><b>
|
||||
Other modifiers
|
||||
</b><br>
|
||||
<P>
|
||||
There are yet more modifiers for controlling the way <b>pcretest</b>
|
||||
operates.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/+</b> modifier requests that as well as outputting the substring that
|
||||
matched the entire pattern, pcretest should in addition output the remainder of
|
||||
the subject string. This is useful for tests where the subject contains
|
||||
multiple copies of the same substring.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/B</b> modifier is a debugging feature. It requests that <b>pcretest</b>
|
||||
output a representation of the compiled byte code after compilation. Normally
|
||||
this information contains length and offset values; however, if <b>/Z</b> is
|
||||
also present, this data is replaced by spaces. This is a special feature for
|
||||
use in the automatic test scripts; it ensures that the same output is generated
|
||||
for different internal link sizes.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/L</b> modifier must be followed directly by the name of a locale, for
|
||||
example,
|
||||
<pre>
|
||||
/pattern/Lfr_FR
|
||||
</pre>
|
||||
For this reason, it must be the last modifier. The given locale is set,
|
||||
<b>pcre_maketables()</b> is called to build a set of character tables for the
|
||||
locale, and this is then passed to <b>pcre_compile()</b> when compiling the
|
||||
regular expression. Without an <b>/L</b> modifier, NULL is passed as the tables
|
||||
pointer; that is, <b>/L</b> applies only to the expression on which it appears.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
|
||||
compiled pattern (whether it is anchored, has a fixed first character, and
|
||||
so on). It does this by calling <b>pcre_fullinfo()</b> after compiling a
|
||||
pattern. If the pattern is studied, the results of that are also output.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/D</b> modifier is a PCRE debugging feature, and is equivalent to
|
||||
<b>/BI</b>, that is, both the <b>/B</b> and the <b>/I</b> modifiers.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/F</b> modifier causes <b>pcretest</b> to flip the byte order of the
|
||||
fields in the compiled pattern that contain 2-byte and 4-byte numbers. This
|
||||
facility is for testing the feature in PCRE that allows it to execute patterns
|
||||
that were compiled on a host with a different endianness. This feature is not
|
||||
available when the POSIX interface to PCRE is being used, that is, when the
|
||||
<b>/P</b> pattern modifier is specified. See also the section about saving and
|
||||
reloading compiled patterns below.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/S</b> modifier causes <b>pcre_study()</b> to be called after the
|
||||
expression has been compiled, and the results used when the expression is
|
||||
matched.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/M</b> modifier causes the size of memory block used to hold the compiled
|
||||
pattern to be output.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/P</b> modifier causes <b>pcretest</b> to call PCRE via the POSIX wrapper
|
||||
API rather than its native API. When this is done, all other modifiers except
|
||||
<b>/i</b>, <b>/m</b>, and <b>/+</b> are ignored. REG_ICASE is set if <b>/i</b> is
|
||||
present, and REG_NEWLINE is set if <b>/m</b> is present. The wrapper functions
|
||||
force PCRE_DOLLAR_ENDONLY always, and PCRE_DOTALL unless REG_NEWLINE is set.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/8</b> modifier causes <b>pcretest</b> to call PCRE with the PCRE_UTF8
|
||||
option set. This turns on support for UTF-8 character handling in PCRE,
|
||||
provided that it was compiled with this support enabled. This modifier also
|
||||
causes any non-printing characters in output strings to be printed using the
|
||||
\x{hh...} notation if they are valid UTF-8 sequences.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>/?</b> modifier is used with <b>/8</b>, it causes <b>pcretest</b> to
|
||||
call <b>pcre_compile()</b> with the PCRE_NO_UTF8_CHECK option, to suppress the
|
||||
checking of the string for UTF-8 validity.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">DATA LINES</a><br>
|
||||
<P>
|
||||
Before each data line is passed to <b>pcre_exec()</b>, leading and trailing
|
||||
whitespace is removed, and it is then scanned for \ escapes. Some of these are
|
||||
pretty esoteric features, intended for checking out some of the more
|
||||
complicated features of PCRE. If you are just testing "ordinary" regular
|
||||
expressions, you probably don't need any of these. The following escapes are
|
||||
recognized:
|
||||
<pre>
|
||||
\a alarm (BEL, \x07)
|
||||
\b backspace (\x08)
|
||||
\e escape (\x27)
|
||||
\f formfeed (\x0c)
|
||||
\n newline (\x0a)
|
||||
\qdd set the PCRE_MATCH_LIMIT limit to dd (any number of digits)
|
||||
\r carriage return (\x0d)
|
||||
\t tab (\x09)
|
||||
\v vertical tab (\x0b)
|
||||
\nnn octal character (up to 3 octal digits)
|
||||
\xhh hexadecimal character (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character, any number of digits in UTF-8 mode
|
||||
\A pass the PCRE_ANCHORED option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\B pass the PCRE_NOTBOL option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\Cdd call pcre_copy_substring() for substring dd after a successful match (number less than 32)
|
||||
\Cname call pcre_copy_named_substring() for substring "name" after a successful match (name termin-
|
||||
ated by next non alphanumeric character)
|
||||
\C+ show the current captured substrings at callout time
|
||||
\C- do not supply a callout function
|
||||
\C!n return 1 instead of 0 when callout number n is reached
|
||||
\C!n!m return 1 instead of 0 when callout number n is reached for the nth time
|
||||
\C*n pass the number n (may be negative) as callout data; this is used as the callout return value
|
||||
\D use the <b>pcre_dfa_exec()</b> match function
|
||||
\F only shortest match for <b>pcre_dfa_exec()</b>
|
||||
\Gdd call pcre_get_substring() for substring dd after a successful match (number less than 32)
|
||||
\Gname call pcre_get_named_substring() for substring "name" after a successful match (name termin-
|
||||
ated by next non-alphanumeric character)
|
||||
\L call pcre_get_substringlist() after a successful match
|
||||
\M discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings
|
||||
\N pass the PCRE_NOTEMPTY option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\Odd set the size of the output vector passed to <b>pcre_exec()</b> to dd (any number of digits)
|
||||
\P pass the PCRE_PARTIAL option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\Qdd set the PCRE_MATCH_LIMIT_RECURSION limit to dd (any number of digits)
|
||||
\R pass the PCRE_DFA_RESTART option to <b>pcre_dfa_exec()</b>
|
||||
\S output details of memory get/free calls during matching
|
||||
\Z pass the PCRE_NOTEOL option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\? pass the PCRE_NO_UTF8_CHECK option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\>dd start the match at offset dd (any number of digits);
|
||||
this sets the <i>startoffset</i> argument for <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\<cr> pass the PCRE_NEWLINE_CR option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\<lf> pass the PCRE_NEWLINE_LF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\<crlf> pass the PCRE_NEWLINE_CRLF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\<anycrlf> pass the PCRE_NEWLINE_ANYCRLF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
\<any> pass the PCRE_NEWLINE_ANY option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
|
||||
</pre>
|
||||
The escapes that specify line ending sequences are literal strings, exactly as
|
||||
shown. No more than one newline setting should be present in any data line.
|
||||
</P>
|
||||
<P>
|
||||
A backslash followed by anything else just escapes the anything else. If
|
||||
the very last character is a backslash, it is ignored. This gives a way of
|
||||
passing an empty line as data, since a real empty line terminates the data
|
||||
input.
|
||||
</P>
|
||||
<P>
|
||||
If \M is present, <b>pcretest</b> calls <b>pcre_exec()</b> several times, with
|
||||
different values in the <i>match_limit</i> and <i>match_limit_recursion</i>
|
||||
fields of the <b>pcre_extra</b> data structure, until it finds the minimum
|
||||
numbers for each parameter that allow <b>pcre_exec()</b> to complete. The
|
||||
<i>match_limit</i> number is a measure of the amount of backtracking that takes
|
||||
place, and checking it out can be instructive. For most simple matches, the
|
||||
number is quite small, but for patterns with very large numbers of matching
|
||||
possibilities, it can become large very quickly with increasing length of
|
||||
subject string. The <i>match_limit_recursion</i> number is a measure of how much
|
||||
stack (or, if PCRE is compiled with NO_RECURSE, how much heap) memory is needed
|
||||
to complete the match attempt.
|
||||
</P>
|
||||
<P>
|
||||
When \O is used, the value specified may be higher or lower than the size set
|
||||
by the <b>-O</b> command line option (or defaulted to 45); \O applies only to
|
||||
the call of <b>pcre_exec()</b> for the line in which it appears.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>/P</b> modifier was present on the pattern, causing the POSIX wrapper
|
||||
API to be used, the only option-setting sequences that have any effect are \B
|
||||
and \Z, causing REG_NOTBOL and REG_NOTEOL, respectively, to be passed to
|
||||
<b>regexec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The use of \x{hh...} to represent UTF-8 characters is not dependent on the use
|
||||
of the <b>/8</b> modifier on the pattern. It is recognized always. There may be
|
||||
any number of hexadecimal digits inside the braces. The result is from one to
|
||||
six bytes, encoded according to the original UTF-8 rules of RFC 2279. This
|
||||
allows for values in the range 0 to 0x7FFFFFFF. Note that not all of those are
|
||||
valid Unicode code points, or indeed valid UTF-8 characters according to the
|
||||
later rules in RFC 3629.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<P>
|
||||
By default, <b>pcretest</b> uses the standard PCRE matching function,
|
||||
<b>pcre_exec()</b> to match each data line. From release 6.0, PCRE supports an
|
||||
alternative matching function, <b>pcre_dfa_test()</b>, which operates in a
|
||||
different way, and has some restrictions. The differences between the two
|
||||
functions are described in the
|
||||
<a href="pcrematching.html"><b>pcrematching</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
If a data line contains the \D escape sequence, or if the command line
|
||||
contains the <b>-dfa</b> option, the alternative matching function is called.
|
||||
This function finds all possible matches at a given point. If, however, the \F
|
||||
escape sequence is present in the data line, it stops after the first match is
|
||||
found. This is always the shortest possible match.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
|
||||
<P>
|
||||
This section describes the output when the normal matching function,
|
||||
<b>pcre_exec()</b>, is being used.
|
||||
</P>
|
||||
<P>
|
||||
When a match succeeds, pcretest outputs the list of captured substrings that
|
||||
<b>pcre_exec()</b> returns, starting with number 0 for the string that matched
|
||||
the whole pattern. Otherwise, it outputs "No match" or "Partial match"
|
||||
when <b>pcre_exec()</b> returns PCRE_ERROR_NOMATCH or PCRE_ERROR_PARTIAL,
|
||||
respectively, and otherwise the PCRE negative error number. Here is an example
|
||||
of an interactive <b>pcretest</b> run.
|
||||
<pre>
|
||||
$ pcretest
|
||||
PCRE version 7.0 30-Nov-2006
|
||||
|
||||
re> /^abc(\d+)/
|
||||
data> abc123
|
||||
0: abc123
|
||||
1: 123
|
||||
data> xyz
|
||||
No match
|
||||
</pre>
|
||||
Note that unset capturing substrings that are not followed by one that is set
|
||||
are not returned by <b>pcre_exec()</b>, and are not shown by <b>pcretest</b>. In
|
||||
the following example, there are two capturing substrings, but when the first
|
||||
data line is matched, the second, unset substring is not shown. An "internal"
|
||||
unset substring is shown as "<unset>", as for the second data line.
|
||||
<pre>
|
||||
re> /(a)|(b)/
|
||||
data> a
|
||||
0: a
|
||||
1: a
|
||||
data> b
|
||||
0: b
|
||||
1: <unset>
|
||||
2: b
|
||||
</pre>
|
||||
If the strings contain any non-printing characters, they are output as \0x
|
||||
escapes, or as \x{...} escapes if the <b>/8</b> modifier was present on the
|
||||
pattern. See below for the definition of non-printing characters. If the
|
||||
pattern has the <b>/+</b> modifier, the output for substring 0 is followed by
|
||||
the the rest of the subject string, identified by "0+" like this:
|
||||
<pre>
|
||||
re> /cat/+
|
||||
data> cataract
|
||||
0: cat
|
||||
0+ aract
|
||||
</pre>
|
||||
If the pattern has the <b>/g</b> or <b>/G</b> modifier, the results of successive
|
||||
matching attempts are output in sequence, like this:
|
||||
<pre>
|
||||
re> /\Bi(\w\w)/g
|
||||
data> Mississippi
|
||||
0: iss
|
||||
1: ss
|
||||
0: iss
|
||||
1: ss
|
||||
0: ipp
|
||||
1: pp
|
||||
</pre>
|
||||
"No match" is output only if the first match attempt fails.
|
||||
</P>
|
||||
<P>
|
||||
If any of the sequences <b>\C</b>, <b>\G</b>, or <b>\L</b> are present in a
|
||||
data line that is successfully matched, the substrings extracted by the
|
||||
convenience functions are output with C, G, or L after the string number
|
||||
instead of a colon. This is in addition to the normal full list. The string
|
||||
length (that is, the return from the extraction function) is given in
|
||||
parentheses after each string for <b>\C</b> and <b>\G</b>.
|
||||
</P>
|
||||
<P>
|
||||
Note that whereas patterns can be continued over several lines (a plain ">"
|
||||
prompt is used for continuations), data lines may not. However newlines can be
|
||||
included in data by means of the \n escape (or \r, \r\n, etc., depending on
|
||||
the newline sequence setting).
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<P>
|
||||
When the alternative matching function, <b>pcre_dfa_exec()</b>, is used (by
|
||||
means of the \D escape sequence or the <b>-dfa</b> command line option), the
|
||||
output consists of a list of all the matches that start at the first point in
|
||||
the subject where there is at least one match. For example:
|
||||
<pre>
|
||||
re> /(tang|tangerine|tan)/
|
||||
data> yellow tangerine\D
|
||||
0: tangerine
|
||||
1: tang
|
||||
2: tan
|
||||
</pre>
|
||||
(Using the normal matching function on this data finds only "tang".) The
|
||||
longest matching string is always given first (and numbered zero).
|
||||
</P>
|
||||
<P>
|
||||
If <b>/g</b> is present on the pattern, the search for further matches resumes
|
||||
at the end of the longest match. For example:
|
||||
<pre>
|
||||
re> /(tang|tangerine|tan)/g
|
||||
data> yellow tangerine and tangy sultana\D
|
||||
0: tangerine
|
||||
1: tang
|
||||
2: tan
|
||||
0: tang
|
||||
1: tan
|
||||
0: tan
|
||||
</pre>
|
||||
Since the matching function does not support substring capture, the escape
|
||||
sequences that are concerned with captured substrings are not relevant.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
|
||||
<P>
|
||||
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
|
||||
indicating that the subject partially matched the pattern, you can restart the
|
||||
match with additional subject data by means of the \R escape sequence. For
|
||||
example:
|
||||
<pre>
|
||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||
data> 23ja\P\D
|
||||
Partial match: 23ja
|
||||
data> n05\R\D
|
||||
0: n05
|
||||
</pre>
|
||||
For further information about partial matching, see the
|
||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
|
||||
is called during matching. This works with both matching functions. By default,
|
||||
the called function displays the callout number, the start and current
|
||||
positions in the text at the callout time, and the next pattern item to be
|
||||
tested. For example, the output
|
||||
<pre>
|
||||
--->pqrabcdef
|
||||
0 ^ ^ \d
|
||||
</pre>
|
||||
indicates that callout number 0 occurred for a match attempt starting at the
|
||||
fourth character of the subject string, when the pointer was at the seventh
|
||||
character of the data, and when the next pattern item was \d. Just one
|
||||
circumflex is output if the start and current positions are the same.
|
||||
</P>
|
||||
<P>
|
||||
Callouts numbered 255 are assumed to be automatic callouts, inserted as a
|
||||
result of the <b>/C</b> pattern modifier. In this case, instead of showing the
|
||||
callout number, the offset in the pattern, preceded by a plus, is output. For
|
||||
example:
|
||||
<pre>
|
||||
re> /\d?[A-E]\*/C
|
||||
data> E*
|
||||
--->E*
|
||||
+0 ^ \d?
|
||||
+3 ^ [A-E]
|
||||
+8 ^^ \*
|
||||
+10 ^ ^
|
||||
0: E*
|
||||
</pre>
|
||||
The callout function in <b>pcretest</b> returns zero (carry on matching) by
|
||||
default, but you can use a \C item in a data line (as described above) to
|
||||
change this.
|
||||
</P>
|
||||
<P>
|
||||
Inserting callouts can be helpful when using <b>pcretest</b> to check
|
||||
complicated regular expressions. For further information about callouts, see
|
||||
the
|
||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
||||
<P>
|
||||
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
|
||||
bytes other than 32-126 are always treated as non-printing characters are are
|
||||
therefore shown as hex escapes.
|
||||
</P>
|
||||
<P>
|
||||
When <b>pcretest</b> is outputting text that is a matched part of a subject
|
||||
string, it behaves in the same way, unless a different locale has been set for
|
||||
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
|
||||
function to distinguish printing and non-printing characters.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
|
||||
<P>
|
||||
The facilities described in this section are not available when the POSIX
|
||||
inteface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
|
||||
specified.
|
||||
</P>
|
||||
<P>
|
||||
When the POSIX interface is not in use, you can cause <b>pcretest</b> to write a
|
||||
compiled pattern to a file, by following the modifiers with > and a file name.
|
||||
For example:
|
||||
<pre>
|
||||
/pattern/im >/some/file
|
||||
</pre>
|
||||
See the
|
||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||
documentation for a discussion about saving and re-using compiled patterns.
|
||||
</P>
|
||||
<P>
|
||||
The data that is written is binary. The first eight bytes are the length of the
|
||||
compiled pattern data followed by the length of the optional study data, each
|
||||
written as four bytes in big-endian order (most significant byte first). If
|
||||
there is no study data (either the pattern was not studied, or studying did not
|
||||
return any data), the second length is zero. The lengths are followed by an
|
||||
exact copy of the compiled pattern. If there is additional study data, this
|
||||
follows immediately after the compiled pattern. After writing the file,
|
||||
<b>pcretest</b> expects to read a new pattern.
|
||||
</P>
|
||||
<P>
|
||||
A saved pattern can be reloaded into <b>pcretest</b> by specifing < and a file
|
||||
name instead of a pattern. The name of the file must not contain a < character,
|
||||
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by <
|
||||
characters.
|
||||
For example:
|
||||
<pre>
|
||||
re> </some/file
|
||||
Compiled regex loaded from /some/file
|
||||
No study data
|
||||
</pre>
|
||||
When the pattern has been loaded, <b>pcretest</b> proceeds to read data lines in
|
||||
the usual way.
|
||||
</P>
|
||||
<P>
|
||||
You can copy a file written by <b>pcretest</b> to a different host and reload it
|
||||
there, even if the new host has opposite endianness to the one on which the
|
||||
pattern was compiled. For example, you can compile on an i86 machine and run on
|
||||
a SPARC machine.
|
||||
</P>
|
||||
<P>
|
||||
File names for saving and reloading can be absolute or relative, but note that
|
||||
the shell facility of expanding a file name that starts with a tilde (~) is not
|
||||
available.
|
||||
</P>
|
||||
<P>
|
||||
The ability to save and reload files in <b>pcretest</b> is intended for testing
|
||||
and experimentation. It is not intended for production use because only a
|
||||
single pattern can be written to a file. Furthermore, there is no facility for
|
||||
supplying custom character tables for use with a reloaded pattern. If the
|
||||
original pattern was compiled with custom tables, an attempt to match a subject
|
||||
string using a reloaded pattern is likely to cause <b>pcretest</b> to crash.
|
||||
Finally, if you attempt to load a file that is not in the correct format, the
|
||||
result is undefined.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
|
||||
<b>pcrepartial</b>(d), <b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 10 March 2009
|
||||
<br>
|
||||
Copyright © 1997-2009 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -1,140 +0,0 @@
|
|||
<html>
|
||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||
the PCRE documentation. When the HTML documents are built from the man
|
||||
page versions, the entire doc/html directory is emptied, this file is then
|
||||
copied into doc/html/index.html, and the remaining files therein are
|
||||
created by the 132html script.
|
||||
-->
|
||||
<head>
|
||||
<title>PCRE specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||
<p>
|
||||
The HTML documentation for PCRE comprises the following pages:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr><td><a href="pcre.html">pcre</a></td>
|
||||
<td> Introductory page</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||
<td> PCRE's native API</td></tr>
|
||||
|
||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||
<td> Options for building PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||
<td> The <i>callout</i> facility</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecompat.html">pcrecompat</a></td>
|
||||
<td> Compability with Perl</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
||||
<td> The C++ wrapper for the PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
||||
<td> The <b>pcregrep</b> command</td></tr>
|
||||
|
||||
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
||||
<td> Discussion of the two matching algorithms</td></tr>
|
||||
|
||||
<tr><td><a href="pcrepartial.html">pcrepartial</a></td>
|
||||
<td> Using PCRE for partial matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcrepattern.html">pcrepattern</a></td>
|
||||
<td> Specification of the regular expressions supported by PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcreperform.html">pcreperform</a></td>
|
||||
<td> Some comments on performance</td></tr>
|
||||
|
||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||
<td> The POSIX API to the PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||
<td> How to save and re-use compiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcresample.html">pcresample</a></td>
|
||||
<td> Description of the sample program</td></tr>
|
||||
|
||||
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
||||
<td> Discussion of PCRE's stack usage</td></tr>
|
||||
|
||||
<tr><td><a href="pcresyntax.html">pcresyntax</a></td>
|
||||
<td> Syntax quick-reference summary</td></tr>
|
||||
|
||||
<tr><td><a href="pcretest.html">pcretest</a></td>
|
||||
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
There are also individual pages that summarize the interface for each function
|
||||
in the library:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
|
||||
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
||||
<td> Compile a regular expression</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_compile2.html">pcre_compile2</a></td>
|
||||
<td> Compile a regular expression (alternate interface)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_config.html">pcre_config</a></td>
|
||||
<td> Show build-time configuration options</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_copy_named_substring.html">pcre_copy_named_substring</a></td>
|
||||
<td> Extract named substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_copy_substring.html">pcre_copy_substring</a></td>
|
||||
<td> Extract numbered substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_dfa_exec.html">pcre_dfa_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||
<td> Free extracted substring</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring_list.html">pcre_free_substring_list</a></td>
|
||||
<td> Free list of extracted substrings</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_fullinfo.html">pcre_fullinfo</a></td>
|
||||
<td> Extract information about a pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_named_substring.html">pcre_get_named_substring</a></td>
|
||||
<td> Extract named substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||
<td> Convert captured string name to number</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||
<td> Extract numbered substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||
<td> Extract all substrings into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||
<td> Obsolete information extraction function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
||||
<td> Build character tables in current locale</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
||||
<td> Maintain reference count in compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
||||
<td> Study a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
||||
<td> Return PCRE version and release date</td></tr>
|
||||
</table>
|
||||
|
||||
</html>
|
|
@ -1,73 +0,0 @@
|
|||
.TH PCRE-CONFIG 1
|
||||
.SH NAME
|
||||
pcre-config - program to return PCRE configuration
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||
.ti +5n
|
||||
.B [--libs-posix] [--cflags] [--cflags-posix]
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
\fBpcre-config\fP returns the configuration of the installed PCRE
|
||||
libraries and the options required to compile a program to use them.
|
||||
.
|
||||
.
|
||||
.SH OPTIONS
|
||||
.rs
|
||||
.TP 10
|
||||
\fB--prefix\fP
|
||||
Writes the directory prefix used in the PCRE installation for architecture
|
||||
independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some
|
||||
systems) to the standard output.
|
||||
.TP 10
|
||||
\fB--exec-prefix\fP
|
||||
Writes the directory prefix used in the PCRE installation for architecture
|
||||
dependent files (normally the same as \fB--prefix\fP) to the standard output.
|
||||
.TP 10
|
||||
\fB--version\fP
|
||||
Writes the version number of the installed PCRE libraries to the standard
|
||||
output.
|
||||
.TP 10
|
||||
\fB--libs\fP
|
||||
Writes to the standard output the command line options required to link
|
||||
with PCRE (\fB-lpcre\fP on many systems).
|
||||
.TP 10
|
||||
\fB--libs-posix\fP
|
||||
Writes to the standard output the command line options required to link with
|
||||
the PCRE posix emulation library (\fB-lpcreposix\fP \fB-lpcre\fP on many
|
||||
systems).
|
||||
.TP 10
|
||||
\fB--cflags\fP
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use PCRE (this may include some \fB-I\fP options, but is blank on
|
||||
many systems).
|
||||
.TP 10
|
||||
\fB--cflags-posix\fP
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use the PCRE posix emulation library (this may include some \fB-I\fP
|
||||
options, but is blank on many systems).
|
||||
.
|
||||
.
|
||||
.SH "SEE ALSO"
|
||||
.rs
|
||||
.sp
|
||||
\fBpcre(3)\fP
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
.rs
|
||||
.sp
|
||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
||||
system. It has been slightly revised as a generic PCRE man page.
|
||||
.
|
||||
.
|
||||
.SH REVISION
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 18 April 2007
|
||||
.fi
|
|
@ -1,67 +0,0 @@
|
|||
PCRE-CONFIG(1) PCRE-CONFIG(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
pcre-config - program to return PCRE configuration
|
||||
|
||||
SYNOPSIS
|
||||
|
||||
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||
[--libs-posix] [--cflags] [--cflags-posix]
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
pcre-config returns the configuration of the installed PCRE libraries
|
||||
and the options required to compile a program to use them.
|
||||
|
||||
|
||||
OPTIONS
|
||||
|
||||
--prefix Writes the directory prefix used in the PCRE installation for
|
||||
architecture independent files (/usr on many systems,
|
||||
/usr/local on some systems) to the standard output.
|
||||
|
||||
--exec-prefix
|
||||
Writes the directory prefix used in the PCRE installation for
|
||||
architecture dependent files (normally the same as --prefix)
|
||||
to the standard output.
|
||||
|
||||
--version Writes the version number of the installed PCRE libraries to
|
||||
the standard output.
|
||||
|
||||
--libs Writes to the standard output the command line options
|
||||
required to link with PCRE (-lpcre on many systems).
|
||||
|
||||
--libs-posix
|
||||
Writes to the standard output the command line options
|
||||
required to link with the PCRE posix emulation library
|
||||
(-lpcreposix -lpcre on many systems).
|
||||
|
||||
--cflags Writes to the standard output the command line options
|
||||
required to compile files that use PCRE (this may include
|
||||
some -I options, but is blank on many systems).
|
||||
|
||||
--cflags-posix
|
||||
Writes to the standard output the command line options
|
||||
required to compile files that use the PCRE posix emulation
|
||||
library (this may include some -I options, but is blank on
|
||||
many systems).
|
||||
|
||||
|
||||
SEE ALSO
|
||||
|
||||
pcre(3)
|
||||
|
||||
|
||||
AUTHOR
|
||||
|
||||
This manual page was originally written by Mark Baker for the Debian
|
||||
GNU/Linux system. It has been slightly revised as a generic PCRE man
|
||||
page.
|
||||
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 18 April 2007
|
|
@ -1,296 +0,0 @@
|
|||
.TH PCRE 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH INTRODUCTION
|
||||
.rs
|
||||
.sp
|
||||
The PCRE library is a set of functions that implement regular expression
|
||||
pattern matching using the same syntax and semantics as Perl, with just a few
|
||||
differences. Certain features that appeared in Python and PCRE before they
|
||||
appeared in Perl are also available using the Python syntax. There is also some
|
||||
support for certain .NET and Oniguruma syntax items, and there is an option for
|
||||
requesting some minor changes that give better JavaScript compatibility.
|
||||
.P
|
||||
The current implementation of PCRE (release 7.x) corresponds approximately with
|
||||
Perl 5.10, including support for UTF-8 encoded strings and Unicode general
|
||||
category properties. However, UTF-8 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 5.1.
|
||||
.P
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
alternative matching function that matches the same compiled patterns in a
|
||||
different way. In certain circumstances, the alternative function has some
|
||||
advantages. For a discussion of the two matching algorithms, see the
|
||||
.\" HREF
|
||||
\fBpcrematching\fP
|
||||
.\"
|
||||
page.
|
||||
.P
|
||||
PCRE is written in C and released as a C library. A number of people have
|
||||
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
||||
have provided a comprehensive C++ wrapper. This is now included as part of the
|
||||
PCRE distribution. The
|
||||
.\" HREF
|
||||
\fBpcrecpp\fP
|
||||
.\"
|
||||
page has details of this interface. Other people's contributions can be found
|
||||
in the \fIContrib\fR directory at the primary FTP site, which is:
|
||||
.sp
|
||||
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
||||
.\" </a>
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
|
||||
.P
|
||||
Details of exactly which Perl regular expression features are and are not
|
||||
supported by PCRE are given in separate documents. See the
|
||||
.\" HREF
|
||||
\fBpcrepattern\fR
|
||||
.\"
|
||||
and
|
||||
.\" HREF
|
||||
\fBpcrecompat\fR
|
||||
.\"
|
||||
pages. There is a syntax summary in the
|
||||
.\" HREF
|
||||
\fBpcresyntax\fR
|
||||
.\"
|
||||
page.
|
||||
.P
|
||||
Some features of PCRE can be included, excluded, or changed when the library is
|
||||
built. The
|
||||
.\" HREF
|
||||
\fBpcre_config()\fR
|
||||
.\"
|
||||
function makes it possible for a client to discover which features are
|
||||
available. The features themselves are described in the
|
||||
.\" HREF
|
||||
\fBpcrebuild\fP
|
||||
.\"
|
||||
page. Documentation about building PCRE for various operating systems can be
|
||||
found in the \fBREADME\fP file in the source distribution.
|
||||
.P
|
||||
The library contains a number of undocumented internal functions and data
|
||||
tables that are used by more than one of the exported external functions, but
|
||||
which are not intended for use by external callers. Their names all begin with
|
||||
"_pcre_", which hopefully will not provoke any name clashes. In some
|
||||
environments, it is possible to control which external symbols are exported
|
||||
when a shared library is built, and in these cases the undocumented symbols are
|
||||
not exported.
|
||||
.
|
||||
.
|
||||
.SH "USER DOCUMENTATION"
|
||||
.rs
|
||||
.sp
|
||||
The user documentation for PCRE comprises a number of different sections. In
|
||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||
each is a separate page, linked from the index page. In the plain text format,
|
||||
all the sections are concatenated, for ease of searching. The sections are as
|
||||
follows:
|
||||
.sp
|
||||
pcre this document
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcreapi details of PCRE's native C API
|
||||
pcrebuild options for building PCRE
|
||||
pcrecallout details of the callout feature
|
||||
pcrecompat discussion of Perl compatibility
|
||||
pcrecpp details of the C++ wrapper
|
||||
pcregrep description of the \fBpcregrep\fP command
|
||||
pcrematching discussion of the two matching algorithms
|
||||
pcrepartial details of the partial matching facility
|
||||
.\" JOIN
|
||||
pcrepattern syntax and semantics of supported
|
||||
regular expressions
|
||||
pcresyntax quick syntax reference
|
||||
pcreperform discussion of performance issues
|
||||
pcreposix the POSIX-compatible C API
|
||||
pcreprecompile details of saving and re-using precompiled patterns
|
||||
pcresample discussion of the sample program
|
||||
pcrestack discussion of stack usage
|
||||
pcretest description of the \fBpcretest\fP testing command
|
||||
.sp
|
||||
In addition, in the "man" and HTML formats, there is a short page for each
|
||||
C library function, listing its arguments and results.
|
||||
.
|
||||
.
|
||||
.SH LIMITATIONS
|
||||
.rs
|
||||
.sp
|
||||
There are some size limitations in PCRE but it is hoped that they will never in
|
||||
practice be relevant.
|
||||
.P
|
||||
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
|
||||
compiled with the default internal linkage size of 2. If you want to process
|
||||
regular expressions that are truly enormous, you can compile PCRE with an
|
||||
internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
|
||||
distribution and the
|
||||
.\" HREF
|
||||
\fBpcrebuild\fP
|
||||
.\"
|
||||
documentation for details). In these cases the limit is substantially larger.
|
||||
However, the speed of execution is slower.
|
||||
.P
|
||||
All values in repeating quantifiers must be less than 65536.
|
||||
.P
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns.
|
||||
.P
|
||||
The maximum length of name for a named subpattern is 32 characters, and the
|
||||
maximum number of named subpatterns is 10000.
|
||||
.P
|
||||
The maximum length of a subject string is the largest positive number that an
|
||||
integer variable can hold. However, when using the traditional matching
|
||||
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
|
||||
This means that the available stack space may limit the size of a subject
|
||||
string that can be processed by certain patterns. For a discussion of stack
|
||||
issues, see the
|
||||
.\" HREF
|
||||
\fBpcrestack\fP
|
||||
.\"
|
||||
documentation.
|
||||
.
|
||||
.\" HTML <a name="utf8support"></a>
|
||||
.
|
||||
.
|
||||
.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
|
||||
.rs
|
||||
.sp
|
||||
From release 3.3, PCRE has had some support for character strings encoded in
|
||||
the UTF-8 format. For release 4.0 this was greatly extended to cover most
|
||||
common requirements, and in release 5.0 additional support for Unicode general
|
||||
category properties was added.
|
||||
.P
|
||||
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
|
||||
the code, and, in addition, you must call
|
||||
.\" HREF
|
||||
\fBpcre_compile()\fP
|
||||
.\"
|
||||
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
|
||||
(*UTF8). When either of these is the case, both the pattern and any subject
|
||||
strings that are matched against it are treated as UTF-8 strings instead of
|
||||
just strings of bytes.
|
||||
.P
|
||||
If you compile PCRE with UTF-8 support, but do not use it at run time, the
|
||||
library will be a bit bigger, but the additional run time overhead is limited
|
||||
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
|
||||
.P
|
||||
If PCRE is built with Unicode character property support (which implies UTF-8
|
||||
support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
|
||||
The available properties that can be tested are limited to the general
|
||||
category properties such as Lu for an upper case letter or Nd for a decimal
|
||||
number, the Unicode script names such as Arabic or Han, and the derived
|
||||
properties Any and L&. A full list is given in the
|
||||
.\" HREF
|
||||
\fBpcrepattern\fP
|
||||
.\"
|
||||
documentation. Only the short names for properties are supported. For example,
|
||||
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
|
||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||
compatibility with Perl 5.6. PCRE does not support this.
|
||||
.
|
||||
.\" HTML <a name="utf8strings"></a>
|
||||
.
|
||||
.SS "Validity of UTF-8 strings"
|
||||
.rs
|
||||
.sp
|
||||
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions. From
|
||||
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
|
||||
themselves derived from the Unicode specification. Earlier releases of PCRE
|
||||
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
|
||||
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
|
||||
U+10FFFF, excluding U+D800 to U+DFFF.
|
||||
.P
|
||||
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
|
||||
Unicode Standard says this: "The Low Surrogate Area does not contain any
|
||||
character assignments, consequently no character code charts or namelists are
|
||||
provided for this area. Surrogates are reserved for use with UTF-16 and then
|
||||
must be used in pairs." The code points that are encoded by UTF-16 pairs are
|
||||
available as independent code points in the UTF-8 encoding. (In other words,
|
||||
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
|
||||
UTF-8.)
|
||||
.P
|
||||
If an invalid UTF-8 string is passed to PCRE, an error return
|
||||
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
|
||||
your strings are valid, and therefore want to skip these checks in order to
|
||||
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
|
||||
at run time, PCRE assumes that the pattern or subject it is given
|
||||
(respectively) contains only valid UTF-8 codes. In this case, it does not
|
||||
diagnose an invalid UTF-8 string.
|
||||
.P
|
||||
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
|
||||
happens depends on why the string is invalid. If the string conforms to the
|
||||
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
|
||||
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
|
||||
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
|
||||
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
|
||||
the result is undefined. Your program may crash.
|
||||
.P
|
||||
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
|
||||
encoded in a UTF-8-like manner as per the old RFC, you can set
|
||||
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
|
||||
situation, you will have to apply your own validity check.
|
||||
.
|
||||
.SS "General comments about UTF-8 mode"
|
||||
.rs
|
||||
.sp
|
||||
1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
|
||||
UTF-8 character if the value is greater than 127.
|
||||
.P
|
||||
2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
|
||||
characters for values greater than \e177.
|
||||
.P
|
||||
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
|
||||
bytes, for example: \ex{100}{3}.
|
||||
.P
|
||||
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
|
||||
.P
|
||||
5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
|
||||
but its use can lead to some strange effects. This facility is not available in
|
||||
the alternative matching function, \fBpcre_dfa_exec()\fP.
|
||||
.P
|
||||
6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
|
||||
test characters of any code value, but the characters that PCRE recognizes as
|
||||
digits, spaces, or word characters remain the same set as before, all with
|
||||
values less than 256. This remains true even when PCRE includes Unicode
|
||||
property support, because to do otherwise would slow down PCRE in many common
|
||||
cases. If you really want to test for a wider sense of, say, "digit", you
|
||||
must use Unicode property tests such as \ep{Nd}. Note that this also applies to
|
||||
\eb, because it is defined in terms of \ew and \eW.
|
||||
.P
|
||||
7. Similarly, characters that match the POSIX named character classes are all
|
||||
low-valued characters.
|
||||
.P
|
||||
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
|
||||
(\eh, \eH, \ev, and \eV) do match all the appropriate Unicode characters.
|
||||
.P
|
||||
9. Case-insensitive matching applies only to characters whose values are less
|
||||
than 128, unless PCRE is built with Unicode property support. Even when Unicode
|
||||
property support is available, PCRE still uses its own character tables when
|
||||
checking the case of low-valued characters, so as not to degrade performance.
|
||||
The Unicode property information is used only for characters with higher
|
||||
values. Even when Unicode property support is available, PCRE supports
|
||||
case-insensitive matching only when there is a one-to-one mapping between a
|
||||
letter's cases. There are a small number of many-to-one mappings in Unicode;
|
||||
these are not supported by PCRE.
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Philip Hazel
|
||||
University Computing Service
|
||||
Cambridge CB2 3QH, England.
|
||||
.fi
|
||||
.P
|
||||
Putting an actual email address here seems to have been a spam magnet, so I've
|
||||
taken it away. If you want to email me, use my two initials, followed by the
|
||||
two digits 10, at the domain cam.ac.uk.
|
||||
.
|
||||
.
|
||||
.SH REVISION
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 11 April 2009
|
||||
Copyright (c) 1997-2009 University of Cambridge.
|
||||
.fi
|
File diff suppressed because it is too large
Load Diff
|
@ -1,77 +0,0 @@
|
|||
.TH PCRE_COMPILE 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function compiles a regular expression into an internal form. It is the
|
||||
same as \fBpcre_compile2()\fP, except for the absence of the \fIerrorcodeptr\fP
|
||||
argument. Its arguments are:
|
||||
.sp
|
||||
\fIpattern\fR A zero-terminated string containing the
|
||||
regular expression to be compiled
|
||||
\fIoptions\fR Zero or more option bits
|
||||
\fIerrptr\fR Where to put an error message
|
||||
\fIerroffset\fR Offset in pattern where error was found
|
||||
\fItableptr\fR Pointer to character tables, or NULL to
|
||||
use the built-in default
|
||||
.sp
|
||||
The option bits are:
|
||||
.sp
|
||||
PCRE_ANCHORED Force pattern anchoring
|
||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||
PCRE_CASELESS Do caseless matching
|
||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE_DOTALL . matches anything including NL
|
||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE_EXTENDED Ignore whitespace and # comments
|
||||
PCRE_EXTRA PCRE extra features
|
||||
(not much use currently)
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||
PCRE_UTF8 Run in UTF-8 mode
|
||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||
validity (only relevant if
|
||||
PCRE_UTF8 is set)
|
||||
.sp
|
||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
||||
PCRE_NO_UTF8_CHECK.
|
||||
.P
|
||||
The yield of the function is a pointer to a private data structure that
|
||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||
compiling regular expressions with one version of PCRE for use with a different
|
||||
version is not guaranteed to work and may cause crashes.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fR
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fR
|
||||
.\"
|
||||
page.
|
|
@ -1,77 +0,0 @@
|
|||
.TH PCRE_COMPILE2 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function compiles a regular expression into an internal form. It is the
|
||||
same as \fBpcre_compile()\fP, except for the addition of the \fIerrorcodeptr\fP
|
||||
argument. The arguments are:
|
||||
|
||||
.sp
|
||||
\fIpattern\fR A zero-terminated string containing the
|
||||
regular expression to be compiled
|
||||
\fIoptions\fR Zero or more option bits
|
||||
\fIerrorcodeptr\fP Where to put an error code
|
||||
\fIerrptr\fR Where to put an error message
|
||||
\fIerroffset\fR Offset in pattern where error was found
|
||||
\fItableptr\fR Pointer to character tables, or NULL to
|
||||
use the built-in default
|
||||
.sp
|
||||
The option bits are:
|
||||
.sp
|
||||
PCRE_ANCHORED Force pattern anchoring
|
||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE_CASELESS Do caseless matching
|
||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE_DOTALL . matches anything including NL
|
||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE_EXTENDED Ignore whitespace and # comments
|
||||
PCRE_EXTRA PCRE extra features
|
||||
(not much use currently)
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||
PCRE_UTF8 Run in UTF-8 mode
|
||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||
validity (only relevant if
|
||||
PCRE_UTF8 is set)
|
||||
.sp
|
||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
||||
PCRE_NO_UTF8_CHECK.
|
||||
.P
|
||||
The yield of the function is a pointer to a private data structure that
|
||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||
compiling regular expressions with one version of PCRE for use with a different
|
||||
version is not guaranteed to work and may cause crashes.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fR
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fR
|
||||
.\"
|
||||
page.
|
|
@ -1,57 +0,0 @@
|
|||
.TH PCRE_CONFIG 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function makes it possible for a client program to find out which optional
|
||||
features are available in the version of the PCRE library it is using. Its
|
||||
arguments are as follows:
|
||||
.sp
|
||||
\fIwhat\fR A code specifying what information is required
|
||||
\fIwhere\fR Points to where to put the data
|
||||
.sp
|
||||
The available codes are:
|
||||
.sp
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
PCRE_CONFIG_NEWLINE Value of the default newline sequence:
|
||||
13 (0x000d) for CR
|
||||
10 (0x000a) for LF
|
||||
3338 (0x0d0a) for CRLF
|
||||
-2 for ANYCRLF
|
||||
-1 for ANY
|
||||
PCRE_CONFIG_BSR Indicates what \eR matches by default:
|
||||
0 all Unicode line endings
|
||||
1 CR, LF, or CRLF only
|
||||
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
||||
Threshold of return slots, above
|
||||
which \fBmalloc()\fR is used by
|
||||
the POSIX API
|
||||
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
||||
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)
|
||||
PCRE_CONFIG_UNICODE_PROPERTIES
|
||||
Availability of Unicode property support
|
||||
(1=yes 0=no)
|
||||
.sp
|
||||
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fR
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fR
|
||||
.\"
|
||||
page.
|
|
@ -1,43 +0,0 @@
|
|||
.TH PCRE_COPY_NAMED_SUBSTRING 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for extracting a captured substring, identified
|
||||
by name, into a given buffer. The arguments are:
|
||||
.sp
|
||||
\fIcode\fP Pattern that was successfully matched
|
||||
\fIsubject\fP Subject that has been successfully matched
|
||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
||||
\fIstringname\fP Name of the required substring
|
||||
\fIbuffer\fP Buffer to receive the string
|
||||
\fIbuffersize\fP Size of buffer
|
||||
.sp
|
||||
The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was
|
||||
too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,40 +0,0 @@
|
|||
.TH PCRE_COPY_SUBSTRING 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for extracting a captured substring into a given
|
||||
buffer. The arguments are:
|
||||
.sp
|
||||
\fIsubject\fP Subject that has been successfully matched
|
||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
||||
\fIstringnumber\fP Number of the required substring
|
||||
\fIbuffer\fP Buffer to receive the string
|
||||
\fIbuffersize\fP Size of buffer
|
||||
.sp
|
||||
The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was
|
||||
too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,89 +0,0 @@
|
|||
.TH PCRE_DFA_EXEC 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using an alternative matching algorithm that scans the subject string
|
||||
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
|
||||
matching function is \fBpcre_exec()\fP. The arguments for this function are:
|
||||
.sp
|
||||
\fIcode\fP Points to the compiled pattern
|
||||
\fIextra\fP Points to an associated \fBpcre_extra\fP structure,
|
||||
or is NULL
|
||||
\fIsubject\fP Points to the subject string
|
||||
\fIlength\fP Length of the subject string, in bytes
|
||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
||||
start matching
|
||||
\fIoptions\fP Option bits
|
||||
\fIovector\fP Points to a vector of ints for result offsets
|
||||
\fIovecsize\fP Number of elements in the vector
|
||||
\fIworkspace\fP Points to a vector of ints used as working space
|
||||
\fIwscount\fP Number of elements in the vector
|
||||
.sp
|
||||
The options are:
|
||||
.sp
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NOTBOL Subject is not the beginning of a line
|
||||
PCRE_NOTEOL Subject is not the end of a line
|
||||
PCRE_NOTEMPTY An empty string is not a valid match
|
||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||
validity (only relevant if PCRE_UTF8
|
||||
was set at compile time)
|
||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
||||
PCRE_DFA_SHORTEST Return only the shortest match
|
||||
PCRE_DFA_RESTART This is a restart after a partial match
|
||||
.sp
|
||||
There are restrictions on what may appear in a pattern when using this matching
|
||||
function. Details are given in the
|
||||
.\" HREF
|
||||
\fBpcrematching\fP
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
A \fBpcre_extra\fP structure contains the following fields:
|
||||
.sp
|
||||
\fIflags\fP Bits indicating which fields are set
|
||||
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP
|
||||
\fImatch_limit\fP Limit on internal resource use
|
||||
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
||||
\fIcallout_data\fP Opaque data passed back to callouts
|
||||
\fItables\fP Points to character tables or is NULL
|
||||
.sp
|
||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
||||
PCRE_EXTRA_TABLES. For this matching function, the \fImatch_limit\fP and
|
||||
\fImatch_limit_recursion\fP fields are not used, and must not be set.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,81 +0,0 @@
|
|||
.TH PCRE_EXEC 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using a matching algorithm that is similar to Perl's. It returns
|
||||
offsets to captured substrings. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP Points to the compiled pattern
|
||||
\fIextra\fP Points to an associated \fBpcre_extra\fP structure,
|
||||
or is NULL
|
||||
\fIsubject\fP Points to the subject string
|
||||
\fIlength\fP Length of the subject string, in bytes
|
||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
||||
start matching
|
||||
\fIoptions\fP Option bits
|
||||
\fIovector\fP Points to a vector of ints for result offsets
|
||||
\fIovecsize\fP Number of elements in the vector (a multiple of 3)
|
||||
.sp
|
||||
The options are:
|
||||
.sp
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NOTBOL Subject is not the beginning of a line
|
||||
PCRE_NOTEOL Subject is not the end of a line
|
||||
PCRE_NOTEMPTY An empty string is not a valid match
|
||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||
validity (only relevant if PCRE_UTF8
|
||||
was set at compile time)
|
||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
||||
.sp
|
||||
There are restrictions on what may appear in a pattern when partial matching is
|
||||
requested. For details, see the
|
||||
.\" HREF
|
||||
\fBpcrepartial\fP
|
||||
.\"
|
||||
page.
|
||||
.P
|
||||
A \fBpcre_extra\fP structure contains the following fields:
|
||||
.sp
|
||||
\fIflags\fP Bits indicating which fields are set
|
||||
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP
|
||||
\fImatch_limit\fP Limit on internal resource use
|
||||
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
||||
\fIcallout_data\fP Opaque data passed back to callouts
|
||||
\fItables\fP Points to character tables or is NULL
|
||||
.sp
|
||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
||||
PCRE_EXTRA_TABLES.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,27 +0,0 @@
|
|||
.TH PCRE_FREE_SUBSTRING 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to \fBpcre_get_substring()\fP or \fBpcre_get_named_substring()\fP. Its
|
||||
only argument is a pointer to the string.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,27 +0,0 @@
|
|||
.TH PCRE_FREE_SUBSTRING_LIST 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to \fBpcre_get_substring_list()\fP. Its only argument is a pointer to the
|
||||
list of string pointers.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,59 +0,0 @@
|
|||
.TH PCRE_FULLINFO 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function returns information about a compiled pattern. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP Compiled regular expression
|
||||
\fIextra\fP Result of \fBpcre_study()\fP or NULL
|
||||
\fIwhat\fP What information is required
|
||||
\fIwhere\fP Where to put the information
|
||||
.sp
|
||||
The following information is available:
|
||||
.sp
|
||||
PCRE_INFO_BACKREFMAX Number of highest back reference
|
||||
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
||||
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
||||
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or
|
||||
-1 for start of string
|
||||
or after newline, or
|
||||
-2 otherwise
|
||||
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying)
|
||||
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE_INFO_LASTLITERAL Literal last byte required
|
||||
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
||||
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
||||
PCRE_INFO_NAMETABLE Pointer to name table
|
||||
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
||||
PCRE_INFO_OPTIONS Option bits used for compilation
|
||||
PCRE_INFO_SIZE Size of compiled pattern
|
||||
PCRE_INFO_STUDYSIZE Size of study data
|
||||
.sp
|
||||
The yield of the function is zero on success or:
|
||||
.sp
|
||||
PCRE_ERROR_NULL the argument \fIcode\fP was NULL
|
||||
the argument \fIwhere\fP was NULL
|
||||
PCRE_ERROR_BADMAGIC the "magic number" was not found
|
||||
PCRE_ERROR_BADOPTION the value of \fIwhat\fP was invalid
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,45 +0,0 @@
|
|||
.TH PCRE_GET_NAMED_SUBSTRING 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for extracting a captured substring by name. The
|
||||
arguments are:
|
||||
.sp
|
||||
\fIcode\fP Compiled pattern
|
||||
\fIsubject\fP Subject that has been successfully matched
|
||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
||||
\fIstringname\fP Name of the required substring
|
||||
\fIstringptr\fP Where to put the string pointer
|
||||
.sp
|
||||
The memory in which the substring is placed is obtained by calling
|
||||
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can
|
||||
be used to free it when it is no longer needed. The yield of the function is
|
||||
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
|
||||
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,37 +0,0 @@
|
|||
.TH PCRE_GET_STRINGNUMBER 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This convenience function finds the number of a named substring capturing
|
||||
parenthesis in a compiled pattern. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP Compiled regular expression
|
||||
\fIname\fP Name whose number is required
|
||||
.sp
|
||||
The yield of the function is the number of the parenthesis if the name is
|
||||
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
||||
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
|
||||
\fBpcre_get_stringnumber()\fP. You can obtain the complete list by calling
|
||||
\fBpcre_get_stringtable_entries()\fP.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,40 +0,0 @@
|
|||
.TH PCRE_GET_STRINGTABLE_ENTRIES 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This convenience function finds, for a compiled pattern, the first and last
|
||||
entries for a given name in the table that translates capturing parenthesis
|
||||
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
|
||||
\fInot\fP set), it is usually easier to use \fBpcre_get_stringnumber()\fP
|
||||
instead.
|
||||
.sp
|
||||
\fIcode\fP Compiled regular expression
|
||||
\fIname\fP Name whose entries required
|
||||
\fIfirst\fP Where to return a pointer to the first entry
|
||||
\fIlast\fP Where to return a pointer to the last entry
|
||||
.sp
|
||||
The yield of the function is the length of each entry, or
|
||||
PCRE_ERROR_NOSUBSTRING if none are found.
|
||||
.P
|
||||
There is a complete description of the PCRE native API, including the format of
|
||||
the table entries, in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page, and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,42 +0,0 @@
|
|||
.TH PCRE_GET_SUBSTRING 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for extracting a captured substring. The
|
||||
arguments are:
|
||||
.sp
|
||||
\fIsubject\fP Subject that has been successfully matched
|
||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
||||
\fIstringnumber\fP Number of the required substring
|
||||
\fIstringptr\fP Where to put the string pointer
|
||||
.sp
|
||||
The memory in which the substring is placed is obtained by calling
|
||||
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can
|
||||
be used to free it when it is no longer needed. The yield of the function is
|
||||
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not
|
||||
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,41 +0,0 @@
|
|||
.TH PCRE_GET_SUBSTRING_LIST 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This is a convenience function for extracting a list of all the captured
|
||||
substrings. The arguments are:
|
||||
.sp
|
||||
\fIsubject\fP Subject that has been successfully matched
|
||||
\fIovector\fP Offset vector that \fBpcre_exec\fP used
|
||||
\fIstringcount\fP Value returned by \fBpcre_exec\fP
|
||||
\fIlistptr\fP Where to put a pointer to the list
|
||||
.sp
|
||||
The memory in which the substrings and the list are placed is obtained by
|
||||
calling \fBpcre_malloc()\fP. The convenience function
|
||||
\fBpcre_free_substring_list()\fP can be used to free it when it is no longer
|
||||
needed. A pointer to a list of pointers is put in the variable whose address is
|
||||
in \fIlistptr\fP. The list is terminated by a NULL pointer. The yield of the
|
||||
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could
|
||||
not be obtained.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,26 +0,0 @@
|
|||
.TH PCRE_INFO 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
|
||||
.B *\fIfirstcharptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is obsolete. You should be using \fBpcre_fullinfo()\fP instead.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,29 +0,0 @@
|
|||
.TH PCRE_MAKETABLES 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B const unsigned char *pcre_maketables(void);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function builds a set of character tables for character values less than
|
||||
256. These can be passed to \fBpcre_compile()\fP to override PCRE's internal,
|
||||
built-in tables (which were made by \fBpcre_maketables()\fP when PCRE was
|
||||
compiled). You might want to do this if you are using a non-standard locale.
|
||||
The function yields a pointer to the tables.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,32 +0,0 @@
|
|||
.TH PCRE_REFCOUNT 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is used to maintain a reference count inside a data block that
|
||||
contains a compiled pattern. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP Compiled regular expression
|
||||
\fIadjust\fP Adjustment to reference value
|
||||
.sp
|
||||
The yield of the function is the adjusted reference value, which is constrained
|
||||
to lie between 0 and 65535.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,42 +0,0 @@
|
|||
.TH PCRE_STUDY 3
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function studies a compiled pattern, to see if additional information can
|
||||
be extracted that might speed up matching. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP A compiled regular expression
|
||||
\fIoptions\fP Options for \fBpcre_study()\fP
|
||||
\fIerrptr\fP Where to put an error message
|
||||
.sp
|
||||
If the function succeeds, it returns a value that can be passed to
|
||||
\fBpcre_exec()\fP via its \fIextra\fP argument.
|
||||
.P
|
||||
If the function returns NULL, either it could not find any additional
|
||||
information, or there was an error. You can tell the difference by looking at
|
||||
the error value. It is NULL in first case.
|
||||
.P
|
||||
There are currently no options defined; the value of the second argument should
|
||||
always be zero.
|
||||
.P
|
||||
There is a complete description of the PCRE native API in the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
page.
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue