mirror of
https://github.com/libgit2/libgit2.git
synced 2026-06-21 22:16:24 +00:00
deps: include pcre2
Ye olde PCRE (8.45) was end-of-lifed several years ago. For our bundled regular expression implementation, we want to include the new, still-maintained PCRE2 framework. Include PCRE2 v10.47.
This commit is contained in:
110
deps/pcre2/CMakeLists.txt
vendored
Normal file
110
deps/pcre2/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
#
|
||||
# Static configuration
|
||||
#
|
||||
|
||||
set(SUPPORT_PCRE2_8 1)
|
||||
set(SUPPORT_UNICODE 1)
|
||||
set(PCRE2_NEWLINE "LF")
|
||||
set(NEWLINE_DEFAULT "2")
|
||||
set(PCRE2_LINK_SIZE "2")
|
||||
set(PCRE2_MAX_VARLOOKBEHIND "255")
|
||||
set(PCRE2_PARENS_NEST_LIMIT "250")
|
||||
set(PCRE2_HEAP_LIMIT "20000000")
|
||||
set(PCRE2_MATCH_LIMIT "10000000")
|
||||
set(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT")
|
||||
|
||||
|
||||
#
|
||||
# Dynamic configuration
|
||||
#
|
||||
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckSymbolExists)
|
||||
include(CheckIncludeFile)
|
||||
|
||||
check_include_file(assert.h HAVE_ASSERT_H)
|
||||
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
|
||||
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
|
||||
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||
check_include_file(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) # glibc 2.27
|
||||
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) # glibc 2.17
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
|
||||
HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
)
|
||||
|
||||
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#include <stddef.h>
|
||||
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
|
||||
]=]
|
||||
HAVE_BUILTIN_MUL_OVERFLOW
|
||||
)
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
|
||||
HAVE_BUILTIN_UNREACHABLE
|
||||
)
|
||||
|
||||
|
||||
# Output files
|
||||
|
||||
configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h @ONLY)
|
||||
|
||||
|
||||
# Source code
|
||||
|
||||
set(PCRE2_HEADERS pcre2.h)
|
||||
set(PCRE2_SOURCES
|
||||
pcre2_auto_possess.c
|
||||
pcre2_chartables.c
|
||||
pcre2_chkdint.c
|
||||
pcre2_compile.c
|
||||
pcre2_compile_cgroup.c
|
||||
pcre2_compile_class.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_extuni.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
pcre2_match_data.c
|
||||
pcre2_match_next.c
|
||||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_script_run.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substitute.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
pcre2_xclass.c
|
||||
)
|
||||
|
||||
|
||||
# Build setup
|
||||
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
add_definitions(-DPCRE2_CODE_UNIT_WIDTH=8)
|
||||
|
||||
if(MSVC)
|
||||
add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
add_library(pcre2 OBJECT ${PCRE2_HEADERS} ${PCRE2_SOURCES})
|
||||
104
deps/pcre2/LICENCE.md
vendored
Normal file
104
deps/pcre2/LICENCE.md
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
PCRE2 Licence
|
||||
=============
|
||||
|
||||
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|
||||
|---------|-------|
|
||||
|
||||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
|
||||
licence, as specified below, with one exemption for certain binary
|
||||
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
|
||||
is distributed under the same terms as the software itself. The data in the
|
||||
testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
optimize pattern matching. This is an optional feature that can be omitted when
|
||||
the library is built. The just-in-time compiler is separately licensed under the
|
||||
"2-clause BSD" licence.
|
||||
|
||||
|
||||
COPYRIGHT
|
||||
---------
|
||||
|
||||
### The basic library functions
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 2007-2024 Philip Hazel
|
||||
All rights reserved.
|
||||
|
||||
### PCRE2 Just-In-Time compilation support
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright (c) 2010-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
### Stack-less Just-In-Time compiler
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright (c) 2009-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
The code in the `deps/sljit` directory has its own LICENSE file.
|
||||
|
||||
### All other contributions
|
||||
|
||||
Many other contributors have participated in the authorship of PCRE2. As PCRE2
|
||||
has never required a Contributor Licensing Agreement, or other copyright
|
||||
assignment agreement, all contributions have copyright retained by each
|
||||
original contributor or their employer.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notices,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notices, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
|
||||
------------------------------------------
|
||||
|
||||
The second condition in the BSD licence (covering binary redistributions) does
|
||||
not apply all the way down a chain of software. If binary package A includes
|
||||
PCRE2, it must respect the condition, but if package B is software that
|
||||
includes package A, the condition is not imposed on package B unless it uses
|
||||
PCRE2 independently.
|
||||
55
deps/pcre2/config.h.in
vendored
Normal file
55
deps/pcre2/config.h.in
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_ASSERT_H 1
|
||||
#cmakedefine HAVE_BUILTIN_ASSUME 1
|
||||
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
|
||||
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
|
||||
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_MEMFD_CREATE 1
|
||||
#cmakedefine HAVE_SECURE_GETENV 1
|
||||
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine DISABLE_PERCENT_ZT 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBEDIT 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine EBCDIC_IGNORING_COMPILER 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
|
||||
|
||||
#define MAX_NAME_SIZE 128
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
||||
1079
deps/pcre2/pcre2.h
vendored
Normal file
1079
deps/pcre2/pcre2.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1416
deps/pcre2/pcre2_auto_possess.c
vendored
Normal file
1416
deps/pcre2/pcre2_auto_possess.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
192
deps/pcre2/pcre2_chartables.c
vendored
Normal file
192
deps/pcre2/pcre2_chartables.c
vendored
Normal file
@@ -0,0 +1,192 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256, and
|
||||
only relevant if not in UCP mode. */
|
||||
|
||||
/* This set of tables was written in the C locale. */
|
||||
|
||||
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||
to build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||
these tables in the "C" locale by default. This happens automatically if
|
||||
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||
locale. */
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
const uint8_t PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 lower case letter
|
||||
0x08 decimal digit
|
||||
0x10 word (alphanumeric or '_')
|
||||
*/
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
|
||||
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
|
||||
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
|
||||
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre2_chartables.c */
|
||||
94
deps/pcre2/pcre2_chkdint.c
vendored
Normal file
94
deps/pcre2/pcre2_chkdint.c
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains functions to implement checked integer operation */
|
||||
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
#include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Checked Integer Multiplication *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
r A pointer to PCRE2_SIZE to store the answer
|
||||
a, b Two integers
|
||||
|
||||
Returns: Bool indicating if the operation overflows
|
||||
|
||||
It is modeled after C23's <stdckdint.h> interface
|
||||
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
|
||||
otherwise double. */
|
||||
|
||||
BOOL
|
||||
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
|
||||
{
|
||||
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
|
||||
PCRE2_SIZE m;
|
||||
|
||||
if (__builtin_mul_overflow(a, b, &m)) return TRUE;
|
||||
|
||||
*r = m;
|
||||
#else
|
||||
INT64_OR_DOUBLE m;
|
||||
|
||||
PCRE2_ASSERT(a >= 0 && b >= 0);
|
||||
|
||||
m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = (PCRE2_SIZE)m;
|
||||
#else
|
||||
if (m > PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = m;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* End of pcre2_chkdint.c */
|
||||
11343
deps/pcre2/pcre2_compile.c
vendored
Normal file
11343
deps/pcre2/pcre2_compile.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
356
deps/pcre2/pcre2_compile.h
vendored
Normal file
356
deps/pcre2/pcre2_compile.h
vendored
Normal file
@@ -0,0 +1,356 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Compile time error code numbers. They are given names so that they can more
|
||||
easily be tracked. When a new number is added, the tables called eint1 and
|
||||
eint2 in pcre2posix.c may need to be updated, and a new error text must be
|
||||
added to compile_error_texts in pcre2_error.c. Also, the error codes in
|
||||
pcre2.h.in must be updated - their values are exactly 100 greater than these
|
||||
values. */
|
||||
|
||||
enum { ERR0 = COMPILE_ERROR_BASE,
|
||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
|
||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
|
||||
ERR101, ERR102, ERR103, ERR104, ERR105, ERR106, ERR107, ERR108, ERR109, ERR110,
|
||||
ERR111, ERR112, ERR113, ERR114, ERR115, ERR116, ERR117, ERR118, ERR119, ERR120 };
|
||||
|
||||
/* Code values for parsed patterns, which are stored in a vector of 32-bit
|
||||
unsigned ints. Values less than META_END are literal data values. The coding
|
||||
for identifying the item is in the top 16-bits, leaving 16 bits for the
|
||||
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
|
||||
macros are used to manipulate parsed pattern elements.
|
||||
|
||||
NOTE: When these definitions are changed, the table of extra lengths for each
|
||||
code (meta_extra_lengths) must be updated to remain in step. */
|
||||
|
||||
#define META_END 0x80000000u /* End of pattern */
|
||||
|
||||
#define META_ALT 0x80010000u /* alternation */
|
||||
#define META_ATOMIC 0x80020000u /* atomic group */
|
||||
#define META_BACKREF 0x80030000u /* Back ref */
|
||||
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
|
||||
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
|
||||
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
|
||||
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
|
||||
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
|
||||
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
|
||||
#define META_CLASS 0x800a0000u /* start non-empty class */
|
||||
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
|
||||
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
|
||||
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
|
||||
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
|
||||
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
|
||||
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
|
||||
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
|
||||
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
|
||||
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
|
||||
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
|
||||
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
|
||||
#define META_OFFSET 0x80160000u /* Setting offset for various META
|
||||
codes (e.g. META_CAPTURE_NAME) */
|
||||
#define META_SCS 0x80170000u /* (*scan_substring:... */
|
||||
#define META_CAPTURE_NAME 0x80180000u /* Next <name> in capture lists */
|
||||
#define META_CAPTURE_NUMBER 0x80190000u /* Next digits in capture lists */
|
||||
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
|
||||
#define META_DOT 0x801b0000u /* . metacharacter */
|
||||
#define META_ESCAPE 0x801c0000u /* \d and friends */
|
||||
#define META_KET 0x801d0000u /* closing parenthesis */
|
||||
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
|
||||
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
|
||||
#define META_POSIX 0x80200000u /* POSIX class item */
|
||||
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
|
||||
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
|
||||
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
|
||||
#define META_RECURSE 0x80240000u /* Recursion */
|
||||
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
|
||||
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
|
||||
|
||||
/* These must be kept together to make it easy to check that an assertion
|
||||
is present where expected in a conditional group. */
|
||||
|
||||
#define META_LOOKAHEAD 0x80270000u /* (?= */
|
||||
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
|
||||
#define META_LOOKBEHIND 0x80290000u /* (?<= */
|
||||
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
|
||||
|
||||
/* These cannot be conditions */
|
||||
|
||||
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
|
||||
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
|
||||
|
||||
/* These must be kept in this order, with consecutive values, and the _ARG
|
||||
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
|
||||
versions. */
|
||||
|
||||
#define META_MARK 0x802d0000u /* (*MARK) */
|
||||
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
|
||||
#define META_FAIL 0x802f0000u /* (*FAIL) */
|
||||
#define META_COMMIT 0x80300000u /* These */
|
||||
#define META_COMMIT_ARG 0x80310000u /* pairs */
|
||||
#define META_PRUNE 0x80320000u /* must */
|
||||
#define META_PRUNE_ARG 0x80330000u /* be */
|
||||
#define META_SKIP 0x80340000u /* kept */
|
||||
#define META_SKIP_ARG 0x80350000u /* in */
|
||||
#define META_THEN 0x80360000u /* this */
|
||||
#define META_THEN_ARG 0x80370000u /* order */
|
||||
|
||||
/* These must be kept in groups of adjacent 3 values, and all together. */
|
||||
|
||||
#define META_ASTERISK 0x80380000u /* * */
|
||||
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
|
||||
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
|
||||
#define META_PLUS 0x803b0000u /* + */
|
||||
#define META_PLUS_PLUS 0x803c0000u /* ++ */
|
||||
#define META_PLUS_QUERY 0x803d0000u /* +? */
|
||||
#define META_QUERY 0x803e0000u /* ? */
|
||||
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
|
||||
#define META_QUERY_QUERY 0x80400000u /* ?? */
|
||||
#define META_MINMAX 0x80410000u /* {n,m} repeat */
|
||||
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
|
||||
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
|
||||
|
||||
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
|
||||
this order, and AND/NOT at the start/end. */
|
||||
|
||||
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
|
||||
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
|
||||
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
|
||||
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
|
||||
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
|
||||
|
||||
/* Convenience aliases. */
|
||||
|
||||
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||
|
||||
/* This is a special "meta code" that is used only to distinguish (*asr: from
|
||||
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
|
||||
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
|
||||
therefore no need for it to have a length entry, so use a high value. */
|
||||
|
||||
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
|
||||
|
||||
/* Macros for manipulating elements of the parsed pattern vector. */
|
||||
|
||||
#define META_CODE(x) (x & 0xffff0000u)
|
||||
#define META_DATA(x) (x & 0x0000ffffu)
|
||||
#define META_DIFF(x,y) ((x-y)>>16)
|
||||
|
||||
/* Macros to store and retrieve a PCRE2_SIZE value in the parsed pattern, which
|
||||
consists of uint32_t elements. Assume that if uint32_t can't hold it, two of
|
||||
them will be able to (i.e. assume a 64-bit world). */
|
||||
|
||||
#if PCRE2_SIZE_MAX <= UINT32_MAX
|
||||
#define PUTOFFSET(s,p) *p++ = s
|
||||
#define GETOFFSET(s,p) s = *p++
|
||||
#define GETPLUSOFFSET(s,p) s = *(++p)
|
||||
#define READPLUSOFFSET(s,p) s = p[1]
|
||||
#define SKIPOFFSET(p) p++
|
||||
#define SIZEOFFSET 1
|
||||
#else
|
||||
#define PUTOFFSET(s,p) \
|
||||
{ *p++ = (uint32_t)(s >> 32); *p++ = (uint32_t)(s & 0xffffffff); }
|
||||
#define GETOFFSET(s,p) \
|
||||
{ s = ((PCRE2_SIZE)p[0] << 32) | (PCRE2_SIZE)p[1]; p += 2; }
|
||||
#define GETPLUSOFFSET(s,p) \
|
||||
{ s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; p += 2; }
|
||||
#define READPLUSOFFSET(s,p) \
|
||||
{ s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; }
|
||||
#define SKIPOFFSET(p) p += 2
|
||||
#define SIZEOFFSET 2
|
||||
#endif
|
||||
|
||||
#ifdef PCRE2_DEBUG
|
||||
/* Compile data types. */
|
||||
#define CDATA_RECURSE_ARGS 0 /* Argument list for recurse */
|
||||
#define CDATA_CRANGE 1 /* Character range list */
|
||||
#endif
|
||||
|
||||
/* Extended class management flags. */
|
||||
|
||||
#define CLASS_IS_ECLASS 0x1
|
||||
|
||||
/* Macro for the highest character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_UCHAR_VALUE 0xffu
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAX_UCHAR_VALUE 0xffffu
|
||||
#else
|
||||
#define MAX_UCHAR_VALUE 0xffffffffu
|
||||
#endif
|
||||
|
||||
#define GET_MAX_CHAR_VALUE(utf) \
|
||||
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
|
||||
|
||||
/* Macro for setting individual bits in class bitmaps. */
|
||||
|
||||
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
|
||||
|
||||
/* Macro for 8 bit specific checks. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SELECT_VALUE8(value8, value) (value8)
|
||||
#else
|
||||
#define SELECT_VALUE8(value8, value) (value)
|
||||
#endif
|
||||
|
||||
/* Macro for aligning data. */
|
||||
#define CLIST_ALIGN_TO(base, align) \
|
||||
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
|
||||
|
||||
/* Structure for holding information about an OP_ECLASS internal operand.
|
||||
An "operand" here could be just a single OP_[X]CLASS, or it could be some
|
||||
complex expression; but it's some sequence of ECL_* codes which pushes one
|
||||
value to the stack. */
|
||||
typedef struct {
|
||||
/* The position of the operand - or NULL if (lengthptr != NULL). */
|
||||
PCRE2_UCHAR *code_start;
|
||||
PCRE2_SIZE length;
|
||||
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
|
||||
otherwise zero if the operand is not atomic. */
|
||||
uint8_t op_single_type;
|
||||
/* Regardless of whether it's a single code or not, we fully constant-fold
|
||||
the bitmap for code points < 256. */
|
||||
class_bits_storage bits;
|
||||
} eclass_op_info;
|
||||
|
||||
/* Macros for the definitions below, to prevent name collisions. */
|
||||
|
||||
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
|
||||
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
|
||||
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
|
||||
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
|
||||
#define _pcre2_compile_get_hash_from_name PCRE2_SUFFIX(_pcre2_compile_get_hash_from_name)
|
||||
#define _pcre2_compile_find_named_group PCRE2_SUFFIX(_pcre2_compile_find_named_group)
|
||||
#define _pcre2_compile_find_dupname_details PCRE2_SUFFIX(_pcre2_compile_find_dupname_details)
|
||||
#define _pcre2_compile_add_name_to_table PCRE2_SUFFIX(_pcre2_compile_add_name_to_table)
|
||||
#define _pcre2_compile_parse_scan_substr_args PCRE2_SUFFIX(_pcre2_compile_parse_scan_substr_args)
|
||||
#define _pcre2_compile_parse_recurse_args PCRE2_SUFFIX(_pcre2_compile_parse_recurse_args)
|
||||
|
||||
|
||||
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
|
||||
posix_class_maps, and posix_substitutes. They must be kept in sync. */
|
||||
|
||||
#define PC_DIGIT 7
|
||||
#define PC_GRAPH 8
|
||||
#define PC_PRINT 9
|
||||
#define PC_PUNCT 10
|
||||
#define PC_XDIGIT 13
|
||||
|
||||
extern const int PRIV(posix_class_maps)[];
|
||||
|
||||
/* Defines for hash_dup member in named_group structure. */
|
||||
|
||||
#define NAMED_GROUP_HASH_MASK ((uint16_t)0x7fff)
|
||||
#define NAMED_GROUP_IS_DUPNAME ((uint16_t)0x8000)
|
||||
|
||||
#define NAMED_GROUP_GET_HASH(ng) ((ng)->hash_dup & NAMED_GROUP_HASH_MASK)
|
||||
|
||||
/* Exported functions from pcre2_compile_class.c file: */
|
||||
|
||||
/* Set bits in classbits according to the property type */
|
||||
|
||||
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||
uint8_t *classbits);
|
||||
|
||||
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
|
||||
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
|
||||
|
||||
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
|
||||
start at a META_CLASS or META_CLASS_NOT.
|
||||
|
||||
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||
|
||||
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* Exported functions from pcre2_compile_cgroup.c file: */
|
||||
|
||||
/* Compute hash from a capture name. */
|
||||
|
||||
uint16_t PRIV(compile_get_hash_from_name)(PCRE2_SPTR name, uint32_t length);
|
||||
|
||||
/* Get the descriptor of a known named capture. */
|
||||
|
||||
named_group *PRIV(compile_find_named_group)(PCRE2_SPTR name,
|
||||
uint32_t length, compile_block *cb);
|
||||
|
||||
/* Add entires to name table in alphabetical order. */
|
||||
|
||||
uint32_t PRIV(compile_add_name_to_table)(compile_block *cb,
|
||||
named_group *ng, uint32_t tablecount);
|
||||
|
||||
/* Searches the properties of duplicated names, and returns them
|
||||
in indexptr and countptr. */
|
||||
|
||||
BOOL PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
|
||||
int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb);
|
||||
|
||||
/* Parse the arguments of recurse operations. */
|
||||
|
||||
uint32_t * PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* Parse the arguments of recurse operations. */
|
||||
|
||||
BOOL PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
|
||||
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb);
|
||||
|
||||
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_compile.h */
|
||||
632
deps/pcre2/pcre2_compile_cgroup.c
vendored
Normal file
632
deps/pcre2/pcre2_compile_cgroup.c
vendored
Normal file
@@ -0,0 +1,632 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_compile.h"
|
||||
|
||||
/*************************************************
|
||||
* Compute the hash code from a capture name *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns with a simple hash code
|
||||
computed from the name of a capture group.
|
||||
|
||||
Arguments:
|
||||
name name of the capture group
|
||||
length the length of the name
|
||||
|
||||
Returns: hash code
|
||||
*/
|
||||
|
||||
uint16_t
|
||||
PRIV(compile_get_hash_from_name)(PCRE2_SPTR name, uint32_t length)
|
||||
{
|
||||
uint16_t hash;
|
||||
|
||||
PCRE2_ASSERT(length > 0);
|
||||
|
||||
hash = (uint16_t)((name[0] & 0x7f) | ((name[length - 1] & 0xff) << 7));
|
||||
PCRE2_ASSERT(hash <= NAMED_GROUP_HASH_MASK);
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the descriptor of a known named capture *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the descriptor in the
|
||||
named group list of a known capture group.
|
||||
|
||||
Arguments:
|
||||
name name of the capture group
|
||||
length the length of the name
|
||||
|
||||
Returns: pointer to the descriptor when found,
|
||||
NULL otherwise
|
||||
*/
|
||||
|
||||
named_group *
|
||||
PRIV(compile_find_named_group)(PCRE2_SPTR name,
|
||||
uint32_t length, compile_block *cb)
|
||||
{
|
||||
uint16_t hash = PRIV(compile_get_hash_from_name)(name, length);
|
||||
named_group *ng;
|
||||
named_group *end = cb->named_groups + cb->names_found;
|
||||
|
||||
for (ng = cb->named_groups; ng < end; ng++)
|
||||
if (length == ng->length && hash == NAMED_GROUP_GET_HASH(ng) &&
|
||||
PRIV(strncmp)(name, ng->name, length) == 0) return ng;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Add an entry to the name/number table *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called between compiling passes to add an entry to the
|
||||
name/number table, maintaining alphabetical order. Checking for permitted
|
||||
and forbidden duplicates has already been done.
|
||||
|
||||
Arguments:
|
||||
cb the compile data block
|
||||
nb named group entry
|
||||
tablecount the count of names in the table so far
|
||||
|
||||
Returns: new tablecount
|
||||
*/
|
||||
|
||||
uint32_t
|
||||
PRIV(compile_add_name_to_table)(compile_block *cb,
|
||||
named_group *ng, uint32_t tablecount)
|
||||
{
|
||||
uint32_t i;
|
||||
PCRE2_SPTR name = ng->name;
|
||||
int length = ng->length;
|
||||
uint32_t duplicate_count = 1;
|
||||
|
||||
PCRE2_UCHAR *slot = cb->name_table;
|
||||
|
||||
PCRE2_ASSERT(length > 0);
|
||||
|
||||
if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0)
|
||||
{
|
||||
named_group *ng_it;
|
||||
named_group *end = cb->named_groups + cb->names_found;
|
||||
|
||||
for (ng_it = ng + 1; ng_it < end; ng_it++)
|
||||
if (ng_it->name == name) duplicate_count++;
|
||||
}
|
||||
|
||||
for (i = 0; i < tablecount; i++)
|
||||
{
|
||||
int crc = memcmp(name, slot + IMM2_SIZE, CU2BYTES(length));
|
||||
if (crc == 0 && slot[IMM2_SIZE + length] != 0)
|
||||
crc = -1; /* Current name is a substring */
|
||||
|
||||
/* Make space in the table and break the loop for an earlier name. For a
|
||||
duplicate or later name, carry on. We do this for duplicates so that in the
|
||||
simple case (when ?(| is not used) they are in order of their numbers. In all
|
||||
cases they are in the order in which they appear in the pattern. */
|
||||
|
||||
if (crc < 0)
|
||||
{
|
||||
(void)memmove(slot + cb->name_entry_size * duplicate_count, slot,
|
||||
CU2BYTES((tablecount - i) * cb->name_entry_size));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Continue the loop for a later or duplicate name */
|
||||
|
||||
slot += cb->name_entry_size;
|
||||
}
|
||||
|
||||
tablecount += duplicate_count;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
PUT2(slot, 0, ng->number);
|
||||
memcpy(slot + IMM2_SIZE, name, CU2BYTES(length));
|
||||
|
||||
/* Add a terminating zero and fill the rest of the slot with zeroes so that
|
||||
the memory is all initialized. Otherwise valgrind moans about uninitialized
|
||||
memory when saving serialized compiled patterns. */
|
||||
|
||||
memset(slot + IMM2_SIZE + length, 0,
|
||||
CU2BYTES(cb->name_entry_size - length - IMM2_SIZE));
|
||||
|
||||
if (--duplicate_count == 0) break;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
++ng;
|
||||
if (ng->name == name) break;
|
||||
}
|
||||
|
||||
slot += cb->name_entry_size;
|
||||
}
|
||||
|
||||
return tablecount;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find details of duplicate group names *
|
||||
*************************************************/
|
||||
|
||||
/* This is called from compile_branch() when it needs to know the index and
|
||||
count of duplicates in the names table when processing named backreferences,
|
||||
either directly, or as conditions.
|
||||
|
||||
Arguments:
|
||||
name points to the name
|
||||
length the length of the name
|
||||
indexptr where to put the index
|
||||
countptr where to put the count of duplicates
|
||||
errorcodeptr where to put an error code
|
||||
cb the compile block
|
||||
|
||||
Returns: TRUE if OK, FALSE if not, error code set
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
|
||||
int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb)
|
||||
{
|
||||
uint32_t i, groupnumber;
|
||||
int count;
|
||||
PCRE2_UCHAR *slot = cb->name_table;
|
||||
|
||||
/* Find the first entry in the table */
|
||||
|
||||
for (i = 0; i < cb->names_found; i++)
|
||||
{
|
||||
if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) == 0 &&
|
||||
slot[IMM2_SIZE + length] == 0) break;
|
||||
slot += cb->name_entry_size;
|
||||
}
|
||||
|
||||
/* This should not occur, because this function is called only when we know we
|
||||
have duplicate names. Give an internal error. */
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
if (i >= cb->names_found)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
*errorcodeptr = ERR53;
|
||||
cb->erroroffset = name - cb->start_pattern;
|
||||
return FALSE;
|
||||
}
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
/* Record the index and then see how many duplicates there are, updating the
|
||||
backref map and maximum back reference as we do. */
|
||||
|
||||
*indexptr = i;
|
||||
count = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
count++;
|
||||
groupnumber = GET2(slot, 0);
|
||||
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
|
||||
if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
|
||||
if (++i >= cb->names_found) break;
|
||||
slot += cb->name_entry_size;
|
||||
if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) != 0 ||
|
||||
(slot + IMM2_SIZE)[length] != 0) break;
|
||||
}
|
||||
|
||||
*countptr = count;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/* Process the capture list of scan substring and recurse
|
||||
operations. Since at least one argument must be present,
|
||||
a 0 return value represents error. */
|
||||
|
||||
static size_t
|
||||
PRIV(compile_process_capture_list)(uint32_t *pptr, PCRE2_SIZE offset,
|
||||
int *errorcodeptr, compile_block *cb)
|
||||
{
|
||||
size_t i, size = 0;
|
||||
named_group *ng;
|
||||
PCRE2_SPTR name;
|
||||
uint32_t length;
|
||||
named_group *end = cb->named_groups + cb->names_found;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
++pptr;
|
||||
|
||||
switch (META_CODE(*pptr))
|
||||
{
|
||||
case META_OFFSET:
|
||||
GETPLUSOFFSET(offset, pptr);
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NAME:
|
||||
offset += META_DATA(*pptr);
|
||||
length = *(++pptr);
|
||||
name = cb->start_pattern + offset;
|
||||
|
||||
ng = PRIV(compile_find_named_group)(name, length, cb);
|
||||
|
||||
if (ng == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
cb->erroroffset = offset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) == 0)
|
||||
{
|
||||
pptr[-1] = META_CAPTURE_NUMBER;
|
||||
pptr[0] = ng->number;
|
||||
size++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Remains only for duplicated names. */
|
||||
pptr[-1] = META_CAPTURE_NAME;
|
||||
pptr[0] = (uint32_t)(ng - cb->named_groups);
|
||||
size++;
|
||||
name = ng->name;
|
||||
|
||||
while (++ng < end)
|
||||
if (ng->name == name) size++;
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NUMBER:
|
||||
offset += META_DATA(*pptr);
|
||||
|
||||
i = *(++pptr);
|
||||
if (i > cb->bracount)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
cb->erroroffset = offset;
|
||||
return 0;
|
||||
}
|
||||
if (i > cb->top_backref) cb->top_backref = (uint16_t)i;
|
||||
size++;
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(size > 0);
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*******************************************************
|
||||
* Parse the arguments of scan substring operations *
|
||||
********************************************************/
|
||||
|
||||
/* This function parses the arguments of scan substring operations.
|
||||
|
||||
Arguments:
|
||||
pptr_start points to the current parsed pattern pointer
|
||||
offset argument starting offset in the pattern
|
||||
errorcodeptr where to put an error code
|
||||
cb the compile block
|
||||
lengthptr NULL during the real compile phase
|
||||
points to length accumulator during pre-compile phase
|
||||
|
||||
Returns: TRUE if OK, FALSE if not, error code set
|
||||
*/
|
||||
|
||||
uint32_t *
|
||||
PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint8_t *captures;
|
||||
uint8_t *capture_ptr;
|
||||
uint8_t bit;
|
||||
PCRE2_SPTR name;
|
||||
named_group *ng;
|
||||
named_group *end = cb->named_groups + cb->names_found;
|
||||
BOOL all_found;
|
||||
size_t size;
|
||||
|
||||
PCRE2_ASSERT(*pptr == META_OFFSET);
|
||||
if (PRIV(compile_process_capture_list)(pptr - 1, 0, errorcodeptr, cb) == 0)
|
||||
return NULL;
|
||||
|
||||
/* Align to bytes. Since the highest capture can
|
||||
be equal to bracount, +1 is added before the aligning. */
|
||||
size = (cb->bracount + 1 + 7) >> 3;
|
||||
captures = (uint8_t*)cb->cx->memctl.malloc(size, cb->cx->memctl.memory_data);
|
||||
if (captures == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR21;
|
||||
READPLUSOFFSET(cb->erroroffset, pptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(captures, 0, size);
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
switch (META_CODE(*pptr))
|
||||
{
|
||||
case META_OFFSET:
|
||||
pptr++;
|
||||
SKIPOFFSET(pptr);
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NAME:
|
||||
ng = cb->named_groups + pptr[1];
|
||||
PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);
|
||||
pptr += 2;
|
||||
name = ng->name;
|
||||
|
||||
all_found = TRUE;
|
||||
do
|
||||
{
|
||||
if (ng->name != name) continue;
|
||||
|
||||
capture_ptr = captures + (ng->number >> 3);
|
||||
PCRE2_ASSERT(capture_ptr < captures + size);
|
||||
bit = (uint8_t)(1 << (ng->number & 0x7));
|
||||
|
||||
if ((*capture_ptr & bit) == 0)
|
||||
{
|
||||
*capture_ptr |= bit;
|
||||
all_found = FALSE;
|
||||
}
|
||||
}
|
||||
while (++ng < end);
|
||||
|
||||
if (!all_found)
|
||||
{
|
||||
*lengthptr += 1 + 2 * IMM2_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
pptr[-2] = META_CAPTURE_NUMBER;
|
||||
pptr[-1] = 0;
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NUMBER:
|
||||
pptr += 2;
|
||||
|
||||
capture_ptr = captures + (pptr[-1] >> 3);
|
||||
PCRE2_ASSERT(capture_ptr < captures + size);
|
||||
bit = (uint8_t)(1 << (pptr[-1] & 0x7));
|
||||
|
||||
if ((*capture_ptr & bit) != 0)
|
||||
{
|
||||
pptr[-1] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
*capture_ptr |= bit;
|
||||
*lengthptr += 1 + IMM2_SIZE;
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
cb->cx->memctl.free(captures, cb->cx->memctl.memory_data);
|
||||
return pptr - 1;
|
||||
}
|
||||
|
||||
|
||||
/* Implement heapsort heapify algorithm. */
|
||||
|
||||
static void do_heapify_u16(uint16_t *captures, size_t size, size_t i)
|
||||
{
|
||||
size_t max;
|
||||
size_t left;
|
||||
size_t right;
|
||||
uint16_t tmp;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
max = i;
|
||||
left = (i << 1) + 1;
|
||||
right = left + 1;
|
||||
|
||||
if (left < size && captures[left] > captures[max]) max = left;
|
||||
if (right < size && captures[right] > captures[max]) max = right;
|
||||
if (i == max) return;
|
||||
|
||||
tmp = captures[i];
|
||||
captures[i] = captures[max];
|
||||
captures[max] = tmp;
|
||||
i = max;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Parse the arguments of recurse operations *
|
||||
*************************************************/
|
||||
|
||||
/* This function parses the arguments of recurse operations.
|
||||
|
||||
Arguments:
|
||||
pptr_start the current parsed pattern pointer
|
||||
offset argument starting offset in the pattern
|
||||
errorcodeptr where to put an error code
|
||||
cb the compile block
|
||||
lengthptr NULL during the real compile phase
|
||||
points to length accumulator during pre-compile phase
|
||||
|
||||
Returns: TRUE if OK, FALSE if not, error code set
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
|
||||
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb)
|
||||
{
|
||||
uint32_t *pptr = pptr_start;
|
||||
size_t i, size;
|
||||
PCRE2_SPTR name;
|
||||
named_group *ng;
|
||||
named_group *end = cb->named_groups + cb->names_found;
|
||||
recurse_arguments *args;
|
||||
uint16_t *captures;
|
||||
uint16_t *current;
|
||||
uint16_t *captures_end;
|
||||
uint16_t tmp;
|
||||
|
||||
/* Process all arguments, compute the required size. */
|
||||
|
||||
size = PRIV(compile_process_capture_list)(pptr, offset, errorcodeptr, cb);
|
||||
if (size == 0) return FALSE;
|
||||
|
||||
args = cb->cx->memctl.malloc(
|
||||
sizeof(recurse_arguments) + size * sizeof(uint16_t), cb->cx->memctl.memory_data);
|
||||
|
||||
if (args == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR21;
|
||||
cb->erroroffset = offset;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
args->header.next = NULL;
|
||||
#ifdef PCRE2_DEBUG
|
||||
args->header.type = CDATA_RECURSE_ARGS;
|
||||
#endif
|
||||
args->size = size;
|
||||
|
||||
/* Caching the pre-processed capture list. */
|
||||
if (cb->last_data != NULL)
|
||||
cb->last_data->next = &args->header;
|
||||
else
|
||||
cb->first_data = &args->header;
|
||||
|
||||
cb->last_data = &args->header;
|
||||
|
||||
/* Create the capture list size. */
|
||||
|
||||
captures = (uint16_t*)(args + 1);
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
++pptr;
|
||||
|
||||
switch (META_CODE(*pptr))
|
||||
{
|
||||
case META_OFFSET:
|
||||
SKIPOFFSET(pptr);
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NAME:
|
||||
ng = cb->named_groups + *(++pptr);
|
||||
PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);
|
||||
*captures++ = (uint16_t)(ng->number);
|
||||
|
||||
name = ng->name;
|
||||
|
||||
while (++ng < end)
|
||||
if (ng->name == name) *captures++ = (uint16_t)(ng->number);
|
||||
continue;
|
||||
|
||||
case META_CAPTURE_NUMBER:
|
||||
*captures++ = *(++pptr);
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(size == (size_t)(captures - (uint16_t*)(args + 1)));
|
||||
args->skip_size = (size_t)(pptr - pptr_start) - 1;
|
||||
|
||||
if (size == 1) return TRUE;
|
||||
|
||||
/* Sort captures. */
|
||||
|
||||
captures = (uint16_t*)(args + 1);
|
||||
i = (size >> 1) - 1;
|
||||
while (TRUE)
|
||||
{
|
||||
do_heapify_u16(captures, size, i);
|
||||
if (i == 0) break;
|
||||
i--;
|
||||
}
|
||||
|
||||
for (i = size - 1; i > 0; i--)
|
||||
{
|
||||
tmp = captures[0];
|
||||
captures[0] = captures[i];
|
||||
captures[i] = tmp;
|
||||
|
||||
do_heapify_u16(captures, i, 0);
|
||||
}
|
||||
|
||||
/* Remove duplicates. */
|
||||
|
||||
captures_end = captures + size;
|
||||
tmp = *captures++;
|
||||
current = captures;
|
||||
|
||||
while (current < captures_end)
|
||||
{
|
||||
if (*current != tmp)
|
||||
{
|
||||
tmp = *current;
|
||||
*captures++ = tmp;
|
||||
}
|
||||
|
||||
current++;
|
||||
}
|
||||
|
||||
args->size = (size_t)(captures - (uint16_t*)(args + 1));
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* End of pcre2_compile_cgroup.c */
|
||||
2769
deps/pcre2/pcre2_compile_class.c
vendored
Normal file
2769
deps/pcre2/pcre2_compile_class.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
250
deps/pcre2/pcre2_config.c
vendored
Normal file
250
deps/pcre2/pcre2_config.c
vendored
Normal file
@@ -0,0 +1,250 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test their values. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* If where is NULL, the length of memory required is returned.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if a numerical value is returned
|
||||
>= 0 if a string value
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(uint32_t what, void *where)
|
||||
{
|
||||
if (where == NULL) /* Requests a length */
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
case PCRE2_CONFIG_EFFECTIVE_LINKSIZE:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||
#else
|
||||
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
*((uint32_t *)where) = 0
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
+ (1 << 0)
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
+ (1 << 1)
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
+ (1 << 2)
|
||||
#endif
|
||||
;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_EFFECTIVE_LINKSIZE:
|
||||
*((uint32_t *)where) = LINK_SIZE * sizeof(PCRE2_UCHAR);
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
*((uint32_t *)where) = (uint32_t)CONFIGURED_LINK_SIZE;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
#ifdef NEVER_BACKSLASH_C
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
/* This is now obsolete. The stack is no longer used via recursion for
|
||||
handling backtracking in pcre2_match(). */
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
*((uint32_t *)where) = TABLES_LENGTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
of STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_config.c */
|
||||
557
deps/pcre2/pcre2_context.c
vendored
Normal file
557
deps/pcre2/pcre2_context.c
vendored
Normal file
@@ -0,0 +1,557 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Default malloc/free functions *
|
||||
*************************************************/
|
||||
|
||||
/* Ignore the "user data" argument in each case. */
|
||||
|
||||
static void *default_malloc(size_t size, void *data)
|
||||
{
|
||||
(void)data;
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void default_free(void *block, void *data)
|
||||
{
|
||||
(void)data;
|
||||
free(block);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
|
||||
extern void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
{
|
||||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
newmemctl->malloc = default_malloc;
|
||||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create and initialize contexts *
|
||||
*************************************************/
|
||||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||
if (private_free == NULL) private_free = default_free;
|
||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default compile context is set up to save having to initialize at run time
|
||||
when no context is supplied to the compile function. */
|
||||
|
||||
pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
PCRE2_UNSET, /* Max pattern compiled length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT, /* As it says */
|
||||
0, /* Extra options */
|
||||
MAX_VARLOOKBEHIND, /* As it says */
|
||||
PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default match context is set up to save having to initialize at run time
|
||||
when no context is supplied to a match function. */
|
||||
|
||||
pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL, /* JIT callback */
|
||||
NULL, /* JIT callback data */
|
||||
#endif
|
||||
NULL, /* Callout function */
|
||||
NULL, /* Callout data */
|
||||
NULL, /* Substitute callout function */
|
||||
NULL, /* Substitute callout data */
|
||||
NULL, /* Substitute case callout function */
|
||||
NULL, /* Substitute case callout data */
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
return mcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default convert context is set up to save having to initialize at run time
|
||||
when no context is supplied to the convert function. */
|
||||
|
||||
pcre2_convert_context PRIV(default_convert_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
#ifdef _WIN32
|
||||
CHAR_BACKSLASH, /* Default path separator */
|
||||
CHAR_GRAVE_ACCENT /* Default escape character */
|
||||
#else /* Not Windows */
|
||||
CHAR_SLASH, /* Default path separator */
|
||||
CHAR_BACKSLASH /* Default escape character */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_convert_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context copy functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *newcontext =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *newcontext =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_convert_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context free functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||
{
|
||||
if (mcontext != NULL)
|
||||
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_free(pcre2_convert_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const uint8_t *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_compiled_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->max_varlookbehind = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||
{
|
||||
ccontext->extra_options = options;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t, void *), void *user_data)
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
ccontext->stack_guard_data = user_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
|
||||
{
|
||||
if (ccontext == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
switch (directive)
|
||||
{
|
||||
case PCRE2_OPTIMIZATION_NONE:
|
||||
ccontext->optimization_flags = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_OPTIMIZATION_FULL:
|
||||
ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
|
||||
{
|
||||
/* Even directive numbers starting from 64 switch a bit on;
|
||||
* Odd directive numbers starting from 65 switch a bit off */
|
||||
if ((directive & 1) != 0)
|
||||
ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
|
||||
else
|
||||
ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
|
||||
return 0;
|
||||
}
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------ Match context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
void *substitute_callout_data)
|
||||
{
|
||||
mcontext->substitute_callout = substitute_callout;
|
||||
mcontext->substitute_callout_data = substitute_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *),
|
||||
void *substitute_case_callout_data)
|
||||
{
|
||||
mcontext->substitute_case_callout = substitute_case_callout;
|
||||
mcontext->substitute_case_callout_data = substitute_case_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->depth_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* These functions became obsolete at release 10.30. The first is kept as a
|
||||
synonym for backwards compatibility. The second now does nothing. */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
return pcre2_set_depth_limit(mcontext, limit);
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
(void)mcontext;
|
||||
(void)mymalloc;
|
||||
(void)myfree;
|
||||
(void)mydata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ------------ Convert context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
|
||||
{
|
||||
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
|
||||
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_separator = separator;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *globpunct =
|
||||
STR_EXCLAMATION_MARK STR_QUOTATION_MARK STR_NUMBER_SIGN STR_DOLLAR_SIGN
|
||||
STR_PERCENT_SIGN STR_AMPERSAND STR_APOSTROPHE STR_LEFT_PARENTHESIS
|
||||
STR_RIGHT_PARENTHESIS STR_ASTERISK STR_PLUS STR_COMMA STR_MINUS STR_DOT
|
||||
STR_SLASH STR_COLON STR_SEMICOLON STR_LESS_THAN_SIGN STR_EQUALS_SIGN
|
||||
STR_GREATER_THAN_SIGN STR_QUESTION_MARK STR_COMMERCIAL_AT
|
||||
STR_LEFT_SQUARE_BRACKET STR_BACKSLASH STR_RIGHT_SQUARE_BRACKET
|
||||
STR_CIRCUMFLEX_ACCENT STR_UNDERSCORE STR_GRAVE_ACCENT STR_LEFT_CURLY_BRACKET
|
||||
STR_VERTICAL_LINE STR_RIGHT_CURLY_BRACKET STR_TILDE;
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
|
||||
{
|
||||
if (escape > 255 || (escape != 0 && strchr(globpunct, escape) == NULL))
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_escape = escape;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
||||
1261
deps/pcre2/pcre2_convert.c
vendored
Normal file
1261
deps/pcre2/pcre2_convert.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4134
deps/pcre2/pcre2_dfa_match.c
vendored
Normal file
4134
deps/pcre2/pcre2_dfa_match.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
384
deps/pcre2/pcre2_error.c
vendored
Normal file
384
deps/pcre2/pcre2_error.c
vendored
Normal file
@@ -0,0 +1,384 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const unsigned char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"escape sequence is invalid in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"quantifier does not follow a repeatable item\0"
|
||||
/* 10 */
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"missing closing parenthesis\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"pattern passed as NULL with non-zero length\0"
|
||||
"unrecognised compile-time option bit(s)\0"
|
||||
"missing ) after (?# comment\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"missing closing parenthesis for condition\0"
|
||||
/* 25 */
|
||||
"length of lookbehind assertion is not limited\0"
|
||||
"a relative value of zero is not allowed\0"
|
||||
"conditional subpattern contains more than two branches\0"
|
||||
"atomic assertion expected after (?( or (?(?C)\0"
|
||||
"digit expected after (?+\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE2 does not have Unicode support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"lookbehind is too complicated\0"
|
||||
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator?)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
"subpattern name must start with a non-digit\0"
|
||||
/* 45 */
|
||||
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"invalid range in character class\0"
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE subpattern contains more than one branch\0"
|
||||
/* 55 */
|
||||
"missing opening brace after \\o\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
|
||||
"obsolete error (should not occur)\0" /* Was the above */
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"subpattern number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"internal error: parsed pattern overflow\0"
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
#ifndef EBCDIC
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
#else
|
||||
"\\c must be followed by a letter or one of @[\\]^_?\0"
|
||||
#endif
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown meta code in check_lookbehinds()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"callout string is too long\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
|
||||
"syntax error or number too big in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
"internal error: unknown code in parsed pattern\0"
|
||||
/* 90 */
|
||||
"internal error: bad code value in parsed_skip()\0"
|
||||
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||
"invalid option bits with PCRE2_LITERAL\0"
|
||||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||
"invalid hyphen in option setting\0"
|
||||
/* 95 */
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
"too many capturing groups (maximum 65535)\0"
|
||||
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
|
||||
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
|
||||
/* 100 */
|
||||
"branch too long in variable-length lookbehind assertion\0"
|
||||
"compiled pattern would be longer than the limit set by the application\0"
|
||||
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
|
||||
"using callouts is disabled by the application\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
|
||||
/* 105 */
|
||||
"PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
|
||||
"extended character class nesting is too deep\0"
|
||||
"invalid operator in extended character class\0"
|
||||
"unexpected operator in extended character class (no preceding operand)\0"
|
||||
/* 110 */
|
||||
"expected operand after operator in extended character class\0"
|
||||
"square brackets needed to clarify operator precedence in extended character class\0"
|
||||
"missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
|
||||
"unexpected expression in extended character class (no preceding operator)\0"
|
||||
"empty expression in extended character class\0"
|
||||
/* 115 */
|
||||
"terminating ] with no following closing parenthesis in (?[...]\0"
|
||||
"unexpected character in (?[...]) extended character class\0"
|
||||
"expected capture group number or name\0"
|
||||
"missing opening parenthesis\0"
|
||||
"syntax error in subpattern number (missing terminator?)\0"
|
||||
/* 120 */
|
||||
"erroroffset passed as NULL\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const unsigned char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"patterns do not all use the same character tables\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"invalid replacement string\0"
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||
"function is not supported for DFA matching\0"
|
||||
"pattern contains an item that is not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed with non-zero length\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"matching depth limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start or start moved backwards is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error: duplicate substitution match\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
"internal error: invalid substring offset\0"
|
||||
"feature is not supported by the JIT compiler\0"
|
||||
"error performing replacement case transformation\0"
|
||||
/* 70 */
|
||||
"replacement too large (longer than PCRE2_SIZE)\0"
|
||||
"substitute pattern differs from prior match call\0"
|
||||
"substitute subject differs from prior match call\0"
|
||||
"substitute start offset differs from prior match call\0"
|
||||
"substitute options differ from prior match call\0"
|
||||
"disallowed use of \\K in lookaround\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer in code units
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
|
||||
{
|
||||
const unsigned char *message;
|
||||
PCRE2_SIZE i;
|
||||
int n, rc = 0;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else if (enumber < 0) /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
else /* Invalid error number */
|
||||
{
|
||||
message = (const unsigned char *)"\0"; /* Empty message list */
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NUL) {}
|
||||
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
break;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
#if defined EBCDIC && 'a' != 0x81
|
||||
/* If compiling for EBCDIC, but the compiler's string literals are not EBCDIC,
|
||||
then we are in the "force EBCDIC 1047" mode. I have chosen to add a few lines
|
||||
here to translate the error strings on the fly, rather than require the string
|
||||
literals above to be written out arduously using the "STR_XYZ" macros. */
|
||||
for (PCRE2_SIZE j = 0; j < i; ++j)
|
||||
buffer[j] = PRIV(ascii_to_ebcdic_1047)[buffer[j]];
|
||||
#endif
|
||||
|
||||
buffer[i] = 0; /* Terminate message, even if truncated. */
|
||||
return rc? rc : (int)i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
||||
159
deps/pcre2/pcre2_extuni.c
vendored
Normal file
159
deps/pcre2/pcre2_extuni.c
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match a Unicode
|
||||
extended grapheme sequence. It is used by both pcre2_match() and
|
||||
pcre2_dfa_match(). However, it is called only when Unicode support is being
|
||||
compiled. Nevertheless, we provide a dummy function when there is no Unicode
|
||||
support, because some compilers do not like functionless source files. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/* Dummy function */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
(void)c;
|
||||
(void)eptr;
|
||||
(void)start_subject;
|
||||
(void)end_subject;
|
||||
(void)utf;
|
||||
(void)xcount;
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match an extended grapheme sequence *
|
||||
*************************************************/
|
||||
|
||||
/* NOTE: The logic contained in this function is replicated in three special-
|
||||
purpose functions in the pcre2_jit_compile.c module. If the logic below is
|
||||
changed, they must be kept in step so that the interpreter and the JIT have the
|
||||
same behaviour.
|
||||
|
||||
Arguments:
|
||||
c the first character
|
||||
eptr pointer to next character
|
||||
start_subject pointer to start of subject
|
||||
end_subject pointer to end of subject
|
||||
utf TRUE if in UTF mode
|
||||
xcount pointer to count of additional characters,
|
||||
or NULL if count not needed
|
||||
|
||||
Returns: pointer after the end of the sequence
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
int lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (eptr < end_subject)
|
||||
{
|
||||
int rgb;
|
||||
int len = 1;
|
||||
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = eptr - 1;
|
||||
if (utf) BACKCHAR(bptr);
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
}
|
||||
else
|
||||
c = *bptr;
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
|
||||
|
||||
eptr += len;
|
||||
if (xcount != NULL) *xcount += 1;
|
||||
}
|
||||
|
||||
return eptr;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_extuni.c */
|
||||
217
deps/pcre2/pcre2_find_bracket.c
vendored
Normal file
217
deps/pcre2/pcre2_find_bracket.c
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
|
||||
function is called from pcre2_compile.c and also from pcre2_study.c when
|
||||
finding the minimum matching length. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. ECLASS is used for
|
||||
classes that use set operations internally. CALLOUT_STR is used for
|
||||
callouts with string arguments. In each case the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE || c == OP_VREVERSE)
|
||||
{
|
||||
if (number < 0) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
||||
844
deps/pcre2/pcre2_fuzzsupport.c
vendored
Normal file
844
deps/pcre2/pcre2_fuzzsupport.c
vendored
Normal file
@@ -0,0 +1,844 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
||||
tries to compile and match it, deriving options from the string itself. If
|
||||
STANDALONE is defined, a main program that calls the driver with the contents
|
||||
of specified files is compiled, and commentary on what is happening is output.
|
||||
If an argument starts with '=' the rest of it it is taken as a literal string
|
||||
rather than a file name. This allows easy testing of short strings.
|
||||
|
||||
Written by Philip Hazel, October 2016
|
||||
Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
|
||||
Further updates March/April/May 2024 by PH
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* stack size adjustment */
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#define STACK_SIZE_MB 256
|
||||
#define JIT_SIZE_LIMIT (200 * 1024)
|
||||
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define MAX_MATCH_SIZE 1000
|
||||
|
||||
#define DFA_WORKSPACE_COUNT 100
|
||||
|
||||
/* When adding new compile or match options, remember to update the functions
|
||||
below that output them. */
|
||||
|
||||
#define ALLOWED_COMPILE_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||
PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \
|
||||
PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
|
||||
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
|
||||
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||
PCRE2_NO_AUTO_CAPTURE| \
|
||||
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
||||
PCRE2_UTF)
|
||||
|
||||
#define ALLOWED_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT)
|
||||
|
||||
#define BASE_MATCH_OPTIONS \
|
||||
(PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
|
||||
|
||||
|
||||
#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
|
||||
static void print_compile_options(FILE *stream, uint32_t compile_options)
|
||||
{
|
||||
fprintf(stream, "Compile options %s%.8x =",
|
||||
(compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
|
||||
compile_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
||||
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
||||
((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "",
|
||||
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
|
||||
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
||||
((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
||||
((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
||||
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||
((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
|
||||
((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
|
||||
((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
|
||||
((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
|
||||
((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
|
||||
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
|
||||
((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
|
||||
((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
|
||||
((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
|
||||
((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
|
||||
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
||||
((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||
((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
|
||||
((compile_options & PCRE2_UTF) != 0)? " utf" : "");
|
||||
}
|
||||
|
||||
static void print_match_options(FILE *stream, uint32_t match_options)
|
||||
{
|
||||
fprintf(stream, "Match options %s%.8x =",
|
||||
(match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
}
|
||||
|
||||
|
||||
/* This function can print an error message at all code unit widths. */
|
||||
|
||||
static void print_error(FILE *f, int errorcode, const char *text, ...)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR *p = buffer;
|
||||
va_list ap;
|
||||
va_start(ap, text);
|
||||
vfprintf(f, text, ap);
|
||||
va_end(ap);
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
while (*p != 0) fprintf(f, "%c", *p++);
|
||||
printf("\n");
|
||||
}
|
||||
#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
|
||||
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
|
||||
{
|
||||
int errorcode;
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr = NULL;
|
||||
PCRE2_SIZE bufflen = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
|
||||
&bufflen);
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
fprintf(stream, "Match %d (hex encoded): ", index);
|
||||
for (PCRE2_SIZE i = 0; i < bufflen; i++)
|
||||
{
|
||||
fprintf(stream, "%02x", bufferptr[i]);
|
||||
}
|
||||
fprintf(stream, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
print_error(stream, errorcode, "Match %d failed: ", index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This function describes the current test case being evaluated, then aborts */
|
||||
|
||||
static void describe_failure(
|
||||
const char *task,
|
||||
const PCRE2_UCHAR *data,
|
||||
PCRE2_SIZE size,
|
||||
uint32_t compile_options,
|
||||
uint32_t match_options,
|
||||
int errorcode,
|
||||
int errorcode_jit,
|
||||
int matches,
|
||||
int matches_jit,
|
||||
pcre2_match_data *match_data,
|
||||
pcre2_match_data *match_data_jit
|
||||
) {
|
||||
|
||||
fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
|
||||
|
||||
fprintf(stderr, "Pattern/sample string (hex encoded): ");
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
fprintf(stderr, "%02x", data[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
print_compile_options(stderr, compile_options);
|
||||
print_match_options(stderr, match_options);
|
||||
|
||||
if (errorcode < 0)
|
||||
{
|
||||
print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
|
||||
}
|
||||
|
||||
if (matches >= 0)
|
||||
{
|
||||
fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
|
||||
if (match_data != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
|
||||
dump_matches(stderr, matches, match_data);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (errorcode_jit < 0)
|
||||
{
|
||||
print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
|
||||
errorcode_jit);
|
||||
}
|
||||
|
||||
if (matches_jit >= 0)
|
||||
{
|
||||
fprintf(stderr, "JIT'd operation did not emit an error.\n");
|
||||
if (match_data_jit != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
|
||||
dump_matches(stderr, matches_jit, match_data_jit);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
abort();
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* This is the callout function. Its only purpose is to halt matching if there
|
||||
are more than 100 callouts, as one way of stopping too much time being spent on
|
||||
fruitless matches. The callout data is a pointer to the counter. */
|
||||
|
||||
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
||||
{
|
||||
(void)cb; /* Avoid unused parameter warning */
|
||||
*((uint32_t *)callout_data) += 1;
|
||||
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
||||
}
|
||||
|
||||
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
||||
"no previous prototype" warning when compiling at high warning level. */
|
||||
|
||||
int LLVMFuzzerInitialize(int *, char ***);
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *, size_t);
|
||||
|
||||
int LLVMFuzzerInitialize(int *argc, char ***argv)
|
||||
{
|
||||
int rc;
|
||||
struct rlimit rlim;
|
||||
getrlimit(RLIMIT_STACK, &rlim);
|
||||
rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
|
||||
if (rlim.rlim_cur > rlim.rlim_max)
|
||||
{
|
||||
fprintf(stderr, "Hard stack size limit is too small\n");
|
||||
_exit(1);
|
||||
}
|
||||
rc = setrlimit(RLIMIT_STACK, &rlim);
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to expand stack size\n");
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
(void)argc; /* Avoid "unused parameter" warnings */
|
||||
(void)argv;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Here's the driving function. */
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
|
||||
{
|
||||
PCRE2_UCHAR *wdata;
|
||||
PCRE2_UCHAR *newwdata = NULL;
|
||||
uint32_t compile_options;
|
||||
uint32_t match_options;
|
||||
uint64_t random_options;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_match_data *match_data_jit = NULL;
|
||||
#endif
|
||||
pcre2_compile_context *compile_context = NULL;
|
||||
pcre2_match_context *match_context = NULL;
|
||||
size_t match_size;
|
||||
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
||||
|
||||
if (size < sizeof(random_options)) return -1;
|
||||
|
||||
random_options = *(uint64_t *)(data);
|
||||
data += sizeof(random_options);
|
||||
wdata = (PCRE2_UCHAR *)data;
|
||||
size -= sizeof(random_options);
|
||||
size /= PCRE2_CODE_UNIT_WIDTH / 8;
|
||||
|
||||
/* PCRE2 compiles quantified groups by replicating them. In certain cases of
|
||||
very large quantifiers this can lead to unacceptably long JIT compile times. To
|
||||
get around this, we scan the data string for large quantifiers that follow a
|
||||
closing parenthesis, and reduce the value of the quantifier to 10, assuming
|
||||
that this will make minimal difference to the detection of bugs.
|
||||
|
||||
Do the same for quantifiers that follow a closing square bracket, because
|
||||
classes that contain a number of non-ascii characters can take a lot of time
|
||||
when matching.
|
||||
|
||||
We have to make a copy of the input because oss-fuzz complains if we overwrite
|
||||
the original. Start the scan at the second character so there can be a
|
||||
lookbehind for a backslash, and end it before the end so that the next
|
||||
character can be checked for an opening brace. */
|
||||
|
||||
if (size > 3)
|
||||
{
|
||||
newwdata = malloc(size * sizeof(PCRE2_UCHAR));
|
||||
memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
|
||||
wdata = newwdata;
|
||||
|
||||
for (size_t i = 1; i < size - 2; i++)
|
||||
{
|
||||
size_t j;
|
||||
|
||||
if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
|
||||
wdata[i+1] != '{')
|
||||
continue;
|
||||
i++; /* Points to '{' */
|
||||
|
||||
/* Loop for two values in a quantifier. Offset i points to brace or comma
|
||||
at the start of the loop. */
|
||||
|
||||
for (int ii = 0; ii < 2; ii++)
|
||||
{
|
||||
int q = 0;
|
||||
|
||||
if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
|
||||
|
||||
/* Ignore leading spaces. */
|
||||
|
||||
while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Ignore non-significant leading zeros. */
|
||||
|
||||
while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' &&
|
||||
wdata[i+2] <= '9')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Scan for a number ending in brace, or comma in the first iteration,
|
||||
optionally preceded by space. */
|
||||
|
||||
for (j = i + 1; j < size && j < i + 7; j++)
|
||||
{
|
||||
if (wdata[j] == ' ' || wdata[j] == '\t')
|
||||
{
|
||||
j++;
|
||||
while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
|
||||
if (j >= size) goto OUTERLOOP;
|
||||
if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
|
||||
}
|
||||
if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
|
||||
|
||||
if (wdata[j] < '0' || wdata[j] > '9')
|
||||
{
|
||||
j--; /* Ensure this character is checked next. The */
|
||||
goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
|
||||
}
|
||||
q = q * 10 + (wdata[j] - '0');
|
||||
}
|
||||
|
||||
if (j >= size) goto END_QSCAN; /* End of data */
|
||||
|
||||
/* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which
|
||||
is the maximum quantifier. Leave such numbers alone. */
|
||||
|
||||
if (j >= i + 7 || q > 65535) goto OUTERLOOP;
|
||||
|
||||
/* Limit the quantifier size to 10 */
|
||||
|
||||
if (q > 10)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Reduced quantifier value %d to 10.\n", q);
|
||||
#endif
|
||||
for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
|
||||
wdata[j - 2] = '1';
|
||||
}
|
||||
|
||||
/* Advance to end of number and break if reached closing brace (continue
|
||||
after comma, which is only valid in the first time round this loop). */
|
||||
|
||||
i = j;
|
||||
if (wdata[i] == '}') break;
|
||||
}
|
||||
|
||||
/* Continue along the data string */
|
||||
|
||||
OUTERLOOP:
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
END_QSCAN:
|
||||
|
||||
/* Limiting the length of the subject for matching stops fruitless searches
|
||||
in large trees taking too much time. */
|
||||
|
||||
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
||||
|
||||
/* Create a compile context, and set a limit on the size of the compiled
|
||||
pattern. This stops the fuzzer using vast amounts of memory. */
|
||||
|
||||
compile_context = pcre2_compile_context_create(NULL);
|
||||
if (compile_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create compile context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
|
||||
|
||||
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
||||
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
||||
input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
|
||||
no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
|
||||
because \C in random patterns is highly likely to cause a crash. */
|
||||
|
||||
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
|
||||
PCRE2_NEVER_BACKSLASH_C;
|
||||
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
|
||||
BASE_MATCH_OPTIONS;
|
||||
|
||||
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
||||
allowed together and just give an immediate error return. */
|
||||
|
||||
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
||||
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
||||
|
||||
/* Do the compile with and without the options, and after a successful compile,
|
||||
likewise do the match with and without the options. */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t callout_count;
|
||||
int errorcode;
|
||||
#ifdef SUPPORT_JIT
|
||||
int errorcode_jit;
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
int matches = 0;
|
||||
int matches_jit = 0;
|
||||
#endif
|
||||
#endif
|
||||
PCRE2_SIZE erroroffset;
|
||||
pcre2_code *code;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
print_compile_options(stdout, compile_options);
|
||||
#endif
|
||||
|
||||
code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
|
||||
&errorcode, &erroroffset, compile_context);
|
||||
|
||||
/* Compilation succeeded */
|
||||
|
||||
if (code != NULL)
|
||||
{
|
||||
int j;
|
||||
uint32_t save_match_options = match_options;
|
||||
|
||||
/* Call JIT compile only if the compiled pattern is not too big. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
int jit_ret = -1;
|
||||
if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Compile succeeded; calling JIT compile\n");
|
||||
#endif
|
||||
jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
||||
#ifdef STANDALONE
|
||||
if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Not calling JIT: compiled pattern is too long "
|
||||
"(%ld bytes; limit=%d)\n",
|
||||
((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Create match data and context blocks only when we first need them. Set
|
||||
low match and depth limits to avoid wasting too much searching large
|
||||
pattern trees. Almost all matches are going to fail. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
match_data = pcre2_match_data_create(32, NULL);
|
||||
#ifdef SUPPORT_JIT
|
||||
match_data_jit = pcre2_match_data_create(32, NULL);
|
||||
if (match_data == NULL || match_data_jit == NULL)
|
||||
#else
|
||||
if (match_data == NULL)
|
||||
#endif
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match data block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (match_context == NULL)
|
||||
{
|
||||
match_context = pcre2_match_context_create(NULL);
|
||||
if (match_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
(void)pcre2_set_match_limit(match_context, 100);
|
||||
(void)pcre2_set_depth_limit(match_context, 100);
|
||||
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
||||
}
|
||||
|
||||
/* Match twice, with and without options. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_match_options(stdout, match_options);
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options, match_data, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||
print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
|
||||
with the interpreter. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
if (jit_ret >= 0)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Matching with JIT\n");
|
||||
#endif
|
||||
callout_count = 0;
|
||||
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode_jit >= 0)
|
||||
printf("Match returned %d\n", errorcode_jit);
|
||||
else
|
||||
print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
|
||||
errorcode_jit);
|
||||
#else
|
||||
(void)errorcode_jit; /* Avoid compiler warning */
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* With differential matching enabled, compare with interpreter. */
|
||||
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
matches = errorcode;
|
||||
matches_jit = errorcode_jit;
|
||||
|
||||
if (errorcode_jit != errorcode)
|
||||
{
|
||||
if (!(errorcode < 0 && errorcode_jit < 0) &&
|
||||
errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
|
||||
errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
|
||||
{
|
||||
describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int index = 0; index < errorcode; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
|
||||
PCRE2_SIZE bufflen, bufflen_jit;
|
||||
|
||||
bufferptr = bufferptr_jit = NULL;
|
||||
bufflen = bufflen_jit = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
|
||||
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
|
||||
|
||||
if (errorcode != errorcode_jit)
|
||||
{
|
||||
describe_failure("match entry errorcode comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
if (bufflen != bufflen_jit)
|
||||
{
|
||||
describe_failure("match entry length comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
|
||||
{
|
||||
describe_failure("match entry content comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_substring_free(bufferptr);
|
||||
pcre2_substring_free(bufferptr_jit);
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
|
||||
match_options = BASE_MATCH_OPTIONS; /* For second time */
|
||||
}
|
||||
|
||||
/* Match with DFA twice, with and without options, but remove options that
|
||||
are not allowed with DFA. */
|
||||
|
||||
match_options = save_match_options & ~BASE_MATCH_OPTIONS;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("DFA match options %.8x =", match_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
|
||||
(PCRE2_SIZE)match_size, 0, match_options, match_data,
|
||||
match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0)
|
||||
printf("Match returned %d\n", errorcode);
|
||||
else
|
||||
print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
if (match_options == 0) break; /* No point doing same twice */
|
||||
match_options = 0; /* For second time */
|
||||
}
|
||||
|
||||
match_options = save_match_options; /* Reset for the second compile */
|
||||
pcre2_code_free(code);
|
||||
}
|
||||
|
||||
/* Compilation failed */
|
||||
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
|
||||
erroroffset);
|
||||
#else
|
||||
if (errorcode == PCRE2_ERROR_INTERNAL) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
|
||||
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
||||
}
|
||||
|
||||
/* Tidy up before exiting */
|
||||
|
||||
if (match_data != NULL) pcre2_match_data_free(match_data);
|
||||
#ifdef SUPPORT_JIT
|
||||
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
|
||||
#endif
|
||||
free(newwdata);
|
||||
if (match_context != NULL) pcre2_match_context_free(match_context);
|
||||
if (compile_context != NULL) pcre2_compile_context_free(compile_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Optional main program. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
LLVMFuzzerInitialize(&argc, &argv);
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("** No arguments given\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
size_t filelen;
|
||||
size_t readsize;
|
||||
unsigned char *buffer;
|
||||
FILE *f;
|
||||
|
||||
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
||||
overrunning work. */
|
||||
|
||||
if (argv[i][0] == '=')
|
||||
{
|
||||
readsize = strlen(argv[i]) - 1;
|
||||
printf("------ <Literal> ------\n");
|
||||
printf("Length = %lu\n", readsize);
|
||||
printf("%.*s\n", (int)readsize, argv[i]+1);
|
||||
buffer = (unsigned char *)malloc(readsize);
|
||||
if (buffer == NULL)
|
||||
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
||||
else
|
||||
{
|
||||
memcpy(buffer, argv[i]+1, readsize);
|
||||
LLVMFuzzerTestOneInput(buffer, readsize);
|
||||
free(buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a string given in a file */
|
||||
|
||||
f = fopen(argv[i], "rb");
|
||||
if (f == NULL)
|
||||
{
|
||||
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("------ %s ------\n", argv[i]);
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
filelen = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
buffer = (unsigned char *)malloc(filelen);
|
||||
if (buffer == NULL)
|
||||
{
|
||||
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
||||
fclose(f);
|
||||
continue;
|
||||
}
|
||||
|
||||
readsize = fread(buffer, 1, filelen, f);
|
||||
fclose(f);
|
||||
|
||||
if (readsize != filelen)
|
||||
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
||||
else
|
||||
{
|
||||
printf("Length = %lu\n", filelen);
|
||||
LLVMFuzzerTestOneInput(buffer, filelen);
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* End */
|
||||
2353
deps/pcre2/pcre2_internal.h
vendored
Normal file
2353
deps/pcre2/pcre2_internal.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1044
deps/pcre2/pcre2_intmodedep.h
vendored
Normal file
1044
deps/pcre2/pcre2_intmodedep.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
177
deps/pcre2/pcre2_maketables.c
vendored
Normal file
177
deps/pcre2/pcre2_maketables.c
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. It is also included in the compilation of
|
||||
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||
PCRE2_DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
#include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE2 character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||
freestanding auxiliary program) malloc() is used, and the function has a
|
||||
different name so as not to clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: pointers to character-transforming functions when PCRE2_DFTABLES is
|
||||
defined;
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data;
|
||||
else NULL if memory allocation failed
|
||||
*/
|
||||
|
||||
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||
static const uint8_t *maketables(int (*charfn_to)(int), int (*charfn_from)(int))
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||
|
||||
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||
malloc(TABLES_LENGTH));
|
||||
|
||||
#define charfn_to(c) (c)
|
||||
#define charfn_from(c) (c)
|
||||
#endif /* PCRE2_DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int c = charfn_from(tolower(charfn_to(i)));
|
||||
*p++ = (c < 256)? c : i;
|
||||
}
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int c = charfn_from(islower(charfn_to(i))? toupper(charfn_to(i))
|
||||
: tolower(charfn_to(i)));
|
||||
*p++ = (c < 256)? c : i;
|
||||
}
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different.
|
||||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||
space, because Perl added VT at release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||
test for alnum specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(charfn_to(i))) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(charfn_to(i))) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(charfn_to(i))) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(charfn_to(i))) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == CHAR_UNDERSCORE) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(charfn_to(i))) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(charfn_to(i))) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(charfn_to(i))) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(charfn_to(i))) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(charfn_to(i))) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(charfn_to(i))) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (isspace(charfn_to(i))) x += ctype_space;
|
||||
if (isalpha(charfn_to(i))) x += ctype_letter;
|
||||
if (islower(charfn_to(i))) x += ctype_lcletter;
|
||||
if (isdigit(charfn_to(i))) x += ctype_digit;
|
||||
if (isalnum(charfn_to(i)) || i == CHAR_UNDERSCORE) x += ctype_word;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
#undef charfn_to
|
||||
#undef charfn_from
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
|
||||
else
|
||||
free((void *)tables);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* End of pcre2_maketables.c */
|
||||
8244
deps/pcre2/pcre2_match.c
vendored
Normal file
8244
deps/pcre2/pcre2_match.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
184
deps/pcre2/pcre2_match_data.c
vendored
Normal file
184
deps/pcre2/pcre2_match_data.c
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
|
||||
imposed because the oveccount field in a match data block is uintt6_t. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
yield->flags = 0;
|
||||
yield->heapframes = NULL;
|
||||
yield->heapframes_size = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block using pattern data *
|
||||
*************************************************/
|
||||
|
||||
/* If no context is supplied, use the memory allocator from the code. This code
|
||||
assumes that a general context contains nothing other than a memory allocator.
|
||||
If that ever changes, this code will need fixing. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
if (code == NULL) return NULL;
|
||||
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||
return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
{
|
||||
if (match_data->heapframes != NULL)
|
||||
match_data->memctl.free(match_data->heapframes,
|
||||
match_data->memctl.memory_data);
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get last mark in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||
pcre2_get_mark(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->mark;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get number of ovector slots *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->oveccount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return offsetof(pcre2_match_data, ovector) +
|
||||
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get heapframes size *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->heapframes_size;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
||||
171
deps/pcre2/pcre2_match_next.c
vendored
Normal file
171
deps/pcre2/pcre2_match_next.c
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/* Advance the offset by one code unit, and return the new value.
|
||||
It is only called when the offset is not at the end of the subject. */
|
||||
|
||||
static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
|
||||
PCRE2_SIZE offset)
|
||||
{
|
||||
PCRE2_SPTR subject = match_data->subject;
|
||||
PCRE2_SIZE subject_length = match_data->subject_length;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
|
||||
sequence. */
|
||||
|
||||
if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
|
||||
subject[offset + 1] == CHAR_LF)
|
||||
{
|
||||
switch(match_data->code->newline_convention)
|
||||
{
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
return offset + 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Advance by one full character if in UTF mode. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR next = subject + offset + 1;
|
||||
PCRE2_SPTR subject_end = subject + subject_length;
|
||||
|
||||
(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
|
||||
FORWARDCHARTEST(next, subject_end);
|
||||
return next - subject;
|
||||
}
|
||||
#endif
|
||||
|
||||
return offset + 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Advance the match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,
|
||||
uint32_t *poptions)
|
||||
{
|
||||
int rc = match_data->rc;
|
||||
PCRE2_SIZE start_offset = match_data->start_offset;
|
||||
PCRE2_SIZE *ovector = match_data->ovector;
|
||||
|
||||
/* Match error, or no match: no further iteration possible. In previous versions
|
||||
of PCRE2, we recommended that clients use a strategy which involved retrying in
|
||||
certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */
|
||||
|
||||
if (rc < 0)
|
||||
return FALSE;
|
||||
|
||||
/* Match succeeded: get the start offset for the next match */
|
||||
|
||||
/* Although \K can affect the position of ovector[0], there are no ways to do
|
||||
anything surprising with ovector[1], which must always be >= start_offset. */
|
||||
|
||||
PCRE2_ASSERT(ovector[1] >= start_offset);
|
||||
|
||||
/* Special handling for patterns which contain \K in a lookaround, which enables
|
||||
the match start to be pushed back to before the starting search offset
|
||||
(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
|
||||
This is not a problem if ovector[1] > start_offset, because in this case, we can
|
||||
just attempt the next match at ovector[1]: we are making progress, which is all
|
||||
that we require.
|
||||
|
||||
However, if we have ovector[1] == start_offset, then we have a very rare case
|
||||
which must be handled specially, because it's a non-empty match which
|
||||
nonetheless fails to make progress through the subject. */
|
||||
|
||||
if (ovector[0] != start_offset && ovector[1] == start_offset)
|
||||
{
|
||||
/* If the match end is at the end of the subject, we are done. */
|
||||
|
||||
if (start_offset >= match_data->subject_length)
|
||||
return FALSE;
|
||||
|
||||
/* Otherwise, bump along by one code unit, and do a normal search. */
|
||||
|
||||
*pstart_offset = do_bumpalong(match_data, ovector[1]);
|
||||
*poptions = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are at
|
||||
the end of the subject. Otherwise, arrange to run another match at the same
|
||||
point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
/* If the match is at the end of the subject, we are done. */
|
||||
|
||||
if (ovector[0] >= match_data->subject_length)
|
||||
return FALSE;
|
||||
|
||||
/* Otherwise, continue at this exact same point, but we must set the flag
|
||||
which ensures that we don't return the exact same empty match again. */
|
||||
|
||||
*pstart_offset = ovector[1];
|
||||
*poptions = PCRE2_NOTEMPTY_ATSTART;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Finally, we must be in the happy state of a non-empty match, where the end of
|
||||
the match is further on in the subject than start_offset, so we are easily able
|
||||
to continue and make progress. */
|
||||
|
||||
*pstart_offset = ovector[1];
|
||||
*poptions = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_next.c */
|
||||
239
deps/pcre2/pcre2_newline.c
vendored
Normal file
239
deps/pcre2/pcre2_newline.c
vendored
Normal file
@@ -0,0 +1,239 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_newline.c */
|
||||
118
deps/pcre2/pcre2_ord2utf.c
vendored
Normal file
118
deps/pcre2/pcre2_ord2utf.c
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (unsigned int j = i; j != 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = (PCRE2_UCHAR)(PRIV(utf8_table2)[i] | (int)cvalue);
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_ord2utf.c */
|
||||
430
deps/pcre2/pcre2_pattern_info.c
vendored
Normal file
430
deps/pcre2/pcre2_pattern_info.c
vendored
Normal file
@@ -0,0 +1,430 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
what what information is required
|
||||
where where to put the information; if NULL, return length
|
||||
|
||||
Returns: 0 when data returned
|
||||
> 0 when length requested
|
||||
< 0 on error or unset value
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
if (where == NULL) /* Requests field length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
*((uint32_t *)where) = re->overall_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
*((uint32_t *)where) = re->compile_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
*((uint32_t *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BSR:
|
||||
*((uint32_t *)where) = re->bsr_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
*((uint32_t *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_depth;
|
||||
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
*((uint32_t *)where) = re->extra_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
|
||||
re->first_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
|
||||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
*((size_t *)where) = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) = (re->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(re->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
|
||||
re->last_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
*((uint32_t *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
*((uint32_t *)where) = re->minlength;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
*((uint32_t *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
*((uint32_t *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +
|
||||
sizeof(pcre2_real_code));
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
*((uint32_t *)where) = re->newline_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout enumerator *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
callback function called for each callout block
|
||||
callout_data user data passed to the callback
|
||||
|
||||
Returns: 0 when successfully completed
|
||||
< 0 on local error
|
||||
!= 0 for callback error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_callout_enumerate(const pcre2_code *code,
|
||||
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
pcre2_callout_enumerate_block cb;
|
||||
PCRE2_SPTR cc;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
cb.version = 0;
|
||||
cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
int rc;
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
case OP_TYPEPOSUPTO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||
#endif
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_length = 0;
|
||||
cb.callout_string = NULL;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_length =
|
||||
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_pattern_info.c */
|
||||
1098
deps/pcre2/pcre2_printint_inc.h
vendored
Normal file
1098
deps/pcre2/pcre2_printint_inc.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
343
deps/pcre2/pcre2_script_run.c
vendored
Normal file
343
deps/pcre2/pcre2_script_run.c
vendored
Normal file
@@ -0,0 +1,343 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the function for checking a script run. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check script run *
|
||||
*************************************************/
|
||||
|
||||
/* A script run is conceptually a sequence of characters all in the same
|
||||
Unicode script. However, it isn't quite that simple. There are special rules
|
||||
for scripts that are commonly used together, and also special rules for digits.
|
||||
This function implements the appropriate checks, which is possible only when
|
||||
PCRE2 is compiled with Unicode support. The function returns TRUE if there is
|
||||
no Unicode support; however, it should never be called in that circumstance
|
||||
because an error is given by pcre2_compile() if a script run is called for in a
|
||||
version of PCRE2 compiled without Unicode support.
|
||||
|
||||
Arguments:
|
||||
pgr point to the first character
|
||||
endptr point after the last character
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if this is a valid script run
|
||||
*/
|
||||
|
||||
/* These are states in the checking process. */
|
||||
|
||||
enum { SCRIPT_UNSET, /* Requirement as yet unknown */
|
||||
SCRIPT_MAP, /* Bitmap contains acceptable scripts */
|
||||
SCRIPT_HANPENDING, /* Have had only Han characters */
|
||||
SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */
|
||||
SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */
|
||||
SCRIPT_HANHANGUL /* Expect Han or Hangul */
|
||||
};
|
||||
|
||||
#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
|
||||
#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
|
||||
|
||||
BOOL
|
||||
PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t require_state = SCRIPT_UNSET;
|
||||
uint32_t require_map[FULL_MAPSIZE];
|
||||
uint32_t map[FULL_MAPSIZE];
|
||||
uint32_t require_digitset = 0;
|
||||
uint32_t c;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif
|
||||
|
||||
/* Any string containing fewer than 2 characters is a valid script run. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
if (ptr >= endptr) return TRUE;
|
||||
|
||||
/* Initialize the require map. This is a full-size bitmap that has a bit for
|
||||
every script, as opposed to the maps in ucd_script_sets, which only have bits
|
||||
for scripts less than ucp_Unknown - those that appear in script extension
|
||||
lists. */
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
|
||||
|
||||
/* Scan strings of two or more characters, checking the Unicode characteristics
|
||||
of each code point. There is special code for scripts that can be combined with
|
||||
characters from the Han Chinese script. This may be used in conjunction with
|
||||
four other scripts in these combinations:
|
||||
|
||||
. Han with Hiragana and Katakana is allowed (for Japanese).
|
||||
. Han with Bopomofo is allowed (for Taiwanese Mandarin).
|
||||
. Han with Hangul is allowed (for Korean).
|
||||
|
||||
If the first significant character's script is one of the four, the required
|
||||
script type is immediately known. However, if the first significant
|
||||
character's script is Han, we have to keep checking for a non-Han character.
|
||||
Hence the SCRIPT_HANPENDING state. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const ucd_record *ucd = GET_UCD(c);
|
||||
uint32_t script = ucd->script;
|
||||
|
||||
/* If the script is Unknown, the string is not a valid script run. Such
|
||||
characters can only form script runs of length one (see test above). */
|
||||
|
||||
if (script == ucp_Unknown) return FALSE;
|
||||
|
||||
/* A character without any script extensions whose script is Inherited or
|
||||
Common is always accepted with any script. If there are extensions, the
|
||||
following processing happens for all scripts. */
|
||||
|
||||
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||
{
|
||||
BOOL OK;
|
||||
|
||||
/* Set up a full-sized map for this character that can include bits for all
|
||||
scripts. Copy the scriptx map for this character (which covers those
|
||||
scripts that appear in script extension lists), set the remaining values to
|
||||
zero, and then, except for Common or Inherited, add this script's bit to
|
||||
the map. */
|
||||
|
||||
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
|
||||
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
||||
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
||||
|
||||
/* Handle the different checking states */
|
||||
|
||||
switch(require_state)
|
||||
{
|
||||
/* First significant character - it might follow Common or Inherited
|
||||
characters that do not have any script extensions. */
|
||||
|
||||
case SCRIPT_UNSET:
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
default:
|
||||
memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
|
||||
require_state = SCRIPT_MAP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The first significant character was Han. An inspection of the Unicode
|
||||
11.0.0 files shows that there are the following types of Script Extension
|
||||
list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
|
||||
scripts:
|
||||
|
||||
. Bopomofo + Han
|
||||
. Han + Hiragana + Katakana
|
||||
. Hiragana + Katakana
|
||||
. Bopopmofo + Hangul + Han + Hiragana + Katakana
|
||||
|
||||
The following code tries to make sense of this. */
|
||||
|
||||
#define FOUND_BOPOMOFO 1
|
||||
#define FOUND_HIRAGANA 2
|
||||
#define FOUND_KATAKANA 4
|
||||
#define FOUND_HANGUL 8
|
||||
|
||||
case SCRIPT_HANPENDING:
|
||||
if (script != ucp_Han) /* Another Han does nothing */
|
||||
{
|
||||
uint32_t chspecial = 0;
|
||||
|
||||
if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
|
||||
if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
|
||||
if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
|
||||
if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL;
|
||||
|
||||
if (chspecial == 0) return FALSE; /* Not allowed with Han */
|
||||
|
||||
if (chspecial == FOUND_BOPOMOFO)
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
|
||||
/* Otherwise this character must be allowed with all of them, so remain
|
||||
in the pending state. */
|
||||
}
|
||||
break;
|
||||
|
||||
/* Previously encountered one of the "with Han" scripts. Check that
|
||||
this character is appropriate. */
|
||||
|
||||
case SCRIPT_HANHIRAKATA:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
|
||||
MAPBIT(map, ucp_Katakana) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANBOPOMOFO:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANHANGUL:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
/* Previously encountered one or more characters that are allowed with a
|
||||
list of scripts. */
|
||||
|
||||
case SCRIPT_MAP:
|
||||
OK = FALSE;
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++)
|
||||
{
|
||||
if ((require_map[i] & map[i]) != 0)
|
||||
{
|
||||
OK = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!OK) return FALSE;
|
||||
|
||||
/* The rest of the string must be in this script, but we have to
|
||||
allow for the Han complications. */
|
||||
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
/* Compute the intersection of the required list of scripts and the
|
||||
allowed scripts for this character. */
|
||||
|
||||
default:
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
} /* End checking character's script and extensions. */
|
||||
|
||||
/* The character is in an acceptable script. We must now ensure that all
|
||||
decimal digits in the string come from the same set. Some scripts (e.g.
|
||||
Common, Arabic) have more than one set of decimal digits. This code does
|
||||
not allow mixing sets, even within the same script. The vector called
|
||||
PRIV(ucd_digit_sets)[] contains, in its first element, the number of
|
||||
following elements, and then, in ascending order, the code points of the
|
||||
'9' characters in every set of 10 digits. Each set is identified by the
|
||||
offset in the vector of its '9' character. An initial check of the first
|
||||
value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
|
||||
|
||||
if (ucd->chartype == ucp_Nd)
|
||||
{
|
||||
uint32_t digitset;
|
||||
|
||||
if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
|
||||
{
|
||||
int mid;
|
||||
int bot = 1;
|
||||
int top = PRIV(ucd_digit_sets)[0];
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot + 1) /* <= rather than == is paranoia */
|
||||
{
|
||||
digitset = top;
|
||||
break;
|
||||
}
|
||||
mid = (top + bot) / 2;
|
||||
if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
|
||||
}
|
||||
}
|
||||
|
||||
/* A required value of 0 means "unset". */
|
||||
|
||||
if (require_digitset == 0) require_digitset = digitset;
|
||||
else if (digitset != require_digitset) return FALSE;
|
||||
} /* End digit handling */
|
||||
|
||||
/* If we haven't yet got to the end, pick up the next character. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
} /* End checking loop */
|
||||
|
||||
#else /* NOT SUPPORT_UNICODE */
|
||||
(void)ptr;
|
||||
(void)endptr;
|
||||
(void)utf;
|
||||
return TRUE;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
/* End of pcre2_script_run.c */
|
||||
284
deps/pcre2/pcre2_serialize.c
vendored
Normal file
284
deps/pcre2/pcre2_serialize.c
vendored
Normal file
@@ -0,0 +1,284 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains functions for serializing and deserializing
|
||||
a sequence of compiled codes. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define SERIALIZED_DATA_MAGIC 0x50523253u
|
||||
|
||||
/* Deserialization is limited to the current PCRE version and
|
||||
character width. */
|
||||
|
||||
#define SERIALIZED_DATA_VERSION \
|
||||
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
|
||||
|
||||
#define SERIALIZED_DATA_CONFIG \
|
||||
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Serialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
|
||||
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t *dst_bytes;
|
||||
int32_t i;
|
||||
PCRE2_SIZE total_size;
|
||||
const pcre2_real_code *re;
|
||||
const uint8_t *tables;
|
||||
pcre2_serialized_data *data;
|
||||
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
if (tables == NULL)
|
||||
tables = re->tables;
|
||||
else if (tables != re->tables)
|
||||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
/* The controller is stored as a hidden parameter. */
|
||||
memcpy(bytes, memctl, sizeof(pcre2_memctl));
|
||||
bytes += sizeof(pcre2_memctl);
|
||||
|
||||
data = (pcre2_serialized_data *)bytes;
|
||||
data->magic = SERIALIZED_DATA_MAGIC;
|
||||
data->version = SERIALIZED_DATA_VERSION;
|
||||
data->config = SERIALIZED_DATA_CONFIG;
|
||||
data->number_of_codes = number_of_codes;
|
||||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||
dst_bytes += TABLES_LENGTH;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
(void)memcpy(dst_bytes, (const char *)re, re->blocksize);
|
||||
|
||||
/* Certain fields in the compiled code block are re-set during
|
||||
deserialization. In order to ensure that the serialized data stream is always
|
||||
the same for the same pattern, set them to zero here. We can't assume the
|
||||
copy of the pattern is correctly aligned for accessing the fields as part of
|
||||
a structure. Note the use of sizeof(void *) in the second of these, to
|
||||
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
|
||||
pointer to uint8_t), gcc gives a warning because the first argument is also a
|
||||
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
|
||||
it didn't stop Coverity giving the same complaint. */
|
||||
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
|
||||
sizeof(pcre2_memctl));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
|
||||
sizeof(void *));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
|
||||
sizeof(void *));
|
||||
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
*serialized_bytes = bytes;
|
||||
*serialized_size = total_size;
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Deserialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
|
||||
const uint8_t *bytes, pcre2_general_context *gcontext)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
const uint8_t *src_bytes;
|
||||
pcre2_real_code *dst_re;
|
||||
uint8_t *tables;
|
||||
int32_t i, j;
|
||||
|
||||
/* Sanity checks. */
|
||||
|
||||
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
if (number_of_codes > data->number_of_codes)
|
||||
number_of_codes = data->number_of_codes;
|
||||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||
src_bytes += TABLES_LENGTH;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
CODE_BLOCKSIZE_TYPE blocksize;
|
||||
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
|
||||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
if (blocksize <= sizeof(pcre2_real_code))
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
memctl->free(tables, memctl->memory_data);
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
memctl->free(codes[j], memctl->memory_data);
|
||||
codes[j] = NULL;
|
||||
}
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
if (dst_re->magic_number != MAGIC_NUMBER ||
|
||||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
|
||||
dst_re->name_count > MAX_NAME_COUNT)
|
||||
{
|
||||
memctl->free(dst_re, memctl->memory_data);
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
}
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
src_bytes += blocksize;
|
||||
}
|
||||
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the number of serialized patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
|
||||
if (data == NULL) return PCRE2_ERROR_NULL;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
return data->number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free the allocated stream *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_free(uint8_t *bytes)
|
||||
{
|
||||
if (bytes != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
||||
199
deps/pcre2/pcre2_string_utils.c
vendored
Normal file
199
deps/pcre2/pcre2_string_utils.c
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2018-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
PCRE2_SIZE c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy 8-bit 0-terminated string to PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR *t = str1;
|
||||
while (*str2 != 0) *t++ = *str2++;
|
||||
*t = 0;
|
||||
return t - str1;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
||||
2086
deps/pcre2/pcre2_study.c
vendored
Normal file
2086
deps/pcre2/pcre2_study.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1761
deps/pcre2/pcre2_substitute.c
vendored
Normal file
1761
deps/pcre2/pcre2_substitute.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
553
deps/pcre2/pcre2_substring.c
vendored
Normal file
553
deps/pcre2/pcre2_substring.c
vendored
Normal file
@@ -0,0 +1,553 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy numbered captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by number.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
if (size != 0) memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract named captured string *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new memory. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract captured string to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
memory.
|
||||
|
||||
Arguments:
|
||||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_UCHAR *yield;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
if (size != 0) memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_free(PCRE2_UCHAR *string)
|
||||
{
|
||||
if (string != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a named substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a named captured substring. If the regex
|
||||
permits duplicate names, the first substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
int count = match_data->rc;
|
||||
if (count == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
|
||||
count = 0;
|
||||
}
|
||||
else if (count < 0) return count; /* Match failed */
|
||||
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
else /* Matched using pcre2_dfa_match() */
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
/* LCOV_EXCL_START - this appears to be unreachable, as the ovector and
|
||||
subject_length should always be set consistently, no matter what misbehaviour
|
||||
the caller has committed. */
|
||||
if (left > match_data->subject_length || right > match_data->subject_length)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return PCRE2_ERROR_INVALIDOFFSET;
|
||||
}
|
||||
/* LCOV_EXCL_STOP */
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract all captured strings to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_SIZE *lensp;
|
||||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (count == 0) count = match_data->oveccount; /* Ovector too small */
|
||||
|
||||
count2 = 2*count;
|
||||
ovector = match_data->ovector;
|
||||
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
|
||||
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
|
||||
/* Size == 0 includes the case when the capture is unset. Avoid adding
|
||||
PCRE2_UNSET to match_data->subject because it overflows, even though with
|
||||
zero size calling memcpy() is harmless. */
|
||||
|
||||
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
|
||||
*listp++ = sp;
|
||||
if (lensp != NULL) *lensp++ = size;
|
||||
sp += size;
|
||||
*sp++ = 0;
|
||||
}
|
||||
|
||||
*listp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by substring_list_get *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_list_get()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **list)
|
||||
{
|
||||
if (list != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a unique group with the given name. If duplicate names are
|
||||
permitted, and the name is not unique, an error is generated.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
|
||||
otherwise, if firstptr and lastptr are NULL:
|
||||
a group number for a unique substring
|
||||
else PCRE2_ERROR_NOUNIQUESUBSTRING
|
||||
otherwise:
|
||||
the length of each entry, having set firstptr and lastptr
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
|
||||
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
|
||||
{
|
||||
uint16_t bot = 0;
|
||||
uint16_t top = code->name_count;
|
||||
uint16_t entrysize = code->name_entry_size;
|
||||
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
|
||||
|
||||
while (top > bot)
|
||||
{
|
||||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parenthesis, or a negative number
|
||||
PCRE2_ERROR_NOSUBSTRING if not found
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
}
|
||||
|
||||
/* End of pcre2_substring.c */
|
||||
310
deps/pcre2/pcre2_tables.c
vendored
Normal file
310
deps/pcre2/pcre2_tables.c
vendored
Normal file
@@ -0,0 +1,310 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE2 code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
|
||||
#if !defined(PCRE2_PCRE2TEST) && !defined(PCRE2_DFTABLES) && \
|
||||
!defined(PCRE2_PCRE2POSIX) /* We're compiling the library */
|
||||
#include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* Utility macros */
|
||||
#define ARR_SIZE(x) sizeof(x)/sizeof(x[0])
|
||||
|
||||
|
||||
#if !defined(PCRE2_PCRE2TEST) && !defined(PCRE2_DFTABLES) && \
|
||||
!defined(PCRE2_PCRE2POSIX)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so it is skipped when this file is included by
|
||||
pcre2test. */
|
||||
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
|
||||
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||
adding to classes. */
|
||||
|
||||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
#endif /* !PCRE2_PCRE2TEST && !PCRE2_DFTABLES && !PCRE2_PCRE2POSIX */
|
||||
|
||||
|
||||
#if !defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX)
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
#endif /* !PCRE2_DFTABLES && !PCRE2_PCRE2POSIX */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(!defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX) && \
|
||||
defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff };
|
||||
|
||||
const unsigned PRIV(utf8_table1_size) = ARR_SIZE(PRIV(utf8_table1));
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uint8_t PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
/* Tables concerned with Unicode properties are relevant only when Unicode
|
||||
support is enabled. See also the pcre2_ucptables_inc.h file, which is generated by
|
||||
a Python script from Unicode data files. */
|
||||
|
||||
#if !defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX) && \
|
||||
defined(SUPPORT_UNICODE)
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const uint32_t PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
|
||||
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
|
||||
code points. The left property selects a word from the table, and the right
|
||||
property selects a bit from that word like this:
|
||||
|
||||
PRIV(ucp_gbtable)[left-property] & (1u << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
||||
1. Break at the start and end of text (pretty obviously).
|
||||
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
controls.
|
||||
|
||||
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||
|
||||
L may be followed by L, V, LV or LVT
|
||||
LV or V may be followed by V or T
|
||||
LVT or T may be followed by T
|
||||
|
||||
4. Do not break before extending characters or zero-width-joiner (ZWJ).
|
||||
|
||||
The following rules are only for extended grapheme clusters (but that's what we
|
||||
are implementing).
|
||||
|
||||
5. Do not break before SpacingMarks.
|
||||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
|
||||
is, do not break between characters with the Extended_Pictographic property
|
||||
if a ZWJ intervenes. Extend characters are allowed between the characters;
|
||||
this cannot be represented in this table, the code has to deal with it.
|
||||
|
||||
8. Do not break within emoji flag sequences. That is, do not break between
|
||||
regional indicator (RI) symbols if there are an odd number of RI characters
|
||||
before the break point. This table encodes "join RI characters"; the code
|
||||
has to deal with checking for previous adjoining RIs.
|
||||
|
||||
9. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1u<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
|
||||
(1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
|
||||
(1u<<ucp_gbRegional_Indicator),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */
|
||||
(1u<<ucp_gbLVT),
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1u<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1u<<ucp_gbT), /* 10 LVT */
|
||||
(1u<<ucp_gbRegional_Indicator), /* 11 Regional Indicator */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ|(1u<<ucp_gbExtended_Pictographic), /* 13 ZWJ */
|
||||
ESZ /* 14 Extended Pictographic */
|
||||
};
|
||||
|
||||
#undef ESZ
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Finally, include the tables that are auto-generated from the Unicode data
|
||||
files. */
|
||||
|
||||
#include "pcre2_ucptables_inc.h"
|
||||
|
||||
#endif /* Unicode support needed */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for EBCDIC support *
|
||||
*************************************************/
|
||||
|
||||
#if defined(EBCDIC) && \
|
||||
(defined(PCRE2_PCRE2TEST) || defined(PCRE2_DFTABLES) || 'a' != 0x81)
|
||||
|
||||
const uint8_t PRIV(ebcdic_1047_to_ascii)[256] = {
|
||||
0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
#ifdef EBCDIC_NL25
|
||||
0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
|
||||
#else
|
||||
0x10,0x11,0x12,0x13,0x9d,0x0a,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
|
||||
#endif
|
||||
0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a,
|
||||
0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c,
|
||||
0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0x5e,
|
||||
0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f,
|
||||
0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22,
|
||||
0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xfe,0xb1,
|
||||
0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4,
|
||||
0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0x5b,0xde,0xae,
|
||||
0xac,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0xdd,0xa8,0xaf,0x5d,0xb4,0xd7,
|
||||
0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5,
|
||||
0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff,
|
||||
0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f,
|
||||
};
|
||||
|
||||
const uint8_t PRIV(ascii_to_ebcdic_1047)[256] = {
|
||||
#ifdef EBCDIC_NL25
|
||||
0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
#else
|
||||
0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x15,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
#endif
|
||||
0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f,
|
||||
0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f,
|
||||
0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
|
||||
0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xad,0xe0,0xbd,0x5f,0x6d,
|
||||
0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96,
|
||||
0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07,
|
||||
#ifdef EBCDIC_NL25
|
||||
0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
|
||||
#else
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
|
||||
#endif
|
||||
0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff,
|
||||
0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbb,0xb4,0x9a,0x8a,0xb0,0xca,0xaf,0xbc,
|
||||
0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab,
|
||||
0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77,
|
||||
0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xba,0xae,0x59,
|
||||
0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57,
|
||||
0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0x8e,0xdf,
|
||||
};
|
||||
|
||||
#endif /* EBCDIC support needed */
|
||||
|
||||
/* End of pcre2_tables.c */
|
||||
5805
deps/pcre2/pcre2_ucd.c
vendored
Normal file
5805
deps/pcre2/pcre2_ucd.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
408
deps/pcre2/pcre2_ucp.h
vendored
Normal file
408
deps/pcre2/pcre2_ucp.h
vendored
Normal file
@@ -0,0 +1,408 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2022 University of Cambridge
|
||||
|
||||
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
|
||||
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
|
||||
a new version of this code.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* This file contains definitions of the Unicode property values that are
|
||||
returned by the UCD access macros and used throughout PCRE2.
|
||||
|
||||
IMPORTANT: The specific values of the first two enums (general and particular
|
||||
character categories) are assumed by the table called catposstab in the file
|
||||
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
|
||||
an update. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C,
|
||||
ucp_L,
|
||||
ucp_M,
|
||||
ucp_N,
|
||||
ucp_P,
|
||||
ucp_S,
|
||||
ucp_Z,
|
||||
};
|
||||
|
||||
/* These are the particular character categories. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs, /* Space separator */
|
||||
};
|
||||
|
||||
/* These are Boolean properties. */
|
||||
|
||||
enum {
|
||||
ucp_ASCII,
|
||||
ucp_ASCII_Hex_Digit,
|
||||
ucp_Alphabetic,
|
||||
ucp_Bidi_Control,
|
||||
ucp_Bidi_Mirrored,
|
||||
ucp_Case_Ignorable,
|
||||
ucp_Cased,
|
||||
ucp_Changes_When_Casefolded,
|
||||
ucp_Changes_When_Casemapped,
|
||||
ucp_Changes_When_Lowercased,
|
||||
ucp_Changes_When_Titlecased,
|
||||
ucp_Changes_When_Uppercased,
|
||||
ucp_Dash,
|
||||
ucp_Default_Ignorable_Code_Point,
|
||||
ucp_Deprecated,
|
||||
ucp_Diacritic,
|
||||
ucp_Emoji,
|
||||
ucp_Emoji_Component,
|
||||
ucp_Emoji_Modifier,
|
||||
ucp_Emoji_Modifier_Base,
|
||||
ucp_Emoji_Presentation,
|
||||
ucp_Extended_Pictographic,
|
||||
ucp_Extender,
|
||||
ucp_Grapheme_Base,
|
||||
ucp_Grapheme_Extend,
|
||||
ucp_Grapheme_Link,
|
||||
ucp_Hex_Digit,
|
||||
ucp_IDS_Binary_Operator,
|
||||
ucp_IDS_Trinary_Operator,
|
||||
ucp_IDS_Unary_Operator,
|
||||
ucp_ID_Compat_Math_Continue,
|
||||
ucp_ID_Compat_Math_Start,
|
||||
ucp_ID_Continue,
|
||||
ucp_ID_Start,
|
||||
ucp_Ideographic,
|
||||
ucp_InCB,
|
||||
ucp_Join_Control,
|
||||
ucp_Logical_Order_Exception,
|
||||
ucp_Lowercase,
|
||||
ucp_Math,
|
||||
ucp_Modifier_Combining_Mark,
|
||||
ucp_Noncharacter_Code_Point,
|
||||
ucp_Pattern_Syntax,
|
||||
ucp_Pattern_White_Space,
|
||||
ucp_Prepended_Concatenation_Mark,
|
||||
ucp_Quotation_Mark,
|
||||
ucp_Radical,
|
||||
ucp_Regional_Indicator,
|
||||
ucp_Sentence_Terminal,
|
||||
ucp_Soft_Dotted,
|
||||
ucp_Terminal_Punctuation,
|
||||
ucp_Unified_Ideograph,
|
||||
ucp_Uppercase,
|
||||
ucp_Variation_Selector,
|
||||
ucp_White_Space,
|
||||
ucp_XID_Continue,
|
||||
ucp_XID_Start,
|
||||
/* This must be last */
|
||||
ucp_Bprop_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_boolprop_sets[] */
|
||||
|
||||
#define ucd_boolprop_sets_item_size 2
|
||||
|
||||
/* These are the bidi class values. */
|
||||
|
||||
enum {
|
||||
ucp_bidiAL, /* Arabic_Letter */
|
||||
ucp_bidiAN, /* Arabic_Number */
|
||||
ucp_bidiB, /* Paragraph_Separator */
|
||||
ucp_bidiBN, /* Boundary_Neutral */
|
||||
ucp_bidiCS, /* Common_Separator */
|
||||
ucp_bidiEN, /* European_Number */
|
||||
ucp_bidiES, /* European_Separator */
|
||||
ucp_bidiET, /* European_Terminator */
|
||||
ucp_bidiFSI, /* First_Strong_Isolate */
|
||||
ucp_bidiL, /* Left_To_Right */
|
||||
ucp_bidiLRE, /* Left_To_Right_Embedding */
|
||||
ucp_bidiLRI, /* Left_To_Right_Isolate */
|
||||
ucp_bidiLRO, /* Left_To_Right_Override */
|
||||
ucp_bidiNSM, /* Nonspacing_Mark */
|
||||
ucp_bidiON, /* Other_Neutral */
|
||||
ucp_bidiPDF, /* Pop_Directional_Format */
|
||||
ucp_bidiPDI, /* Pop_Directional_Isolate */
|
||||
ucp_bidiR, /* Right_To_Left */
|
||||
ucp_bidiRLE, /* Right_To_Left_Embedding */
|
||||
ucp_bidiRLI, /* Right_To_Left_Isolate */
|
||||
ucp_bidiRLO, /* Right_To_Left_Override */
|
||||
ucp_bidiS, /* Segment_Separator */
|
||||
ucp_bidiWS, /* White_Space */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegional_Indicator, /* 11 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbZWJ, /* 13 */
|
||||
ucp_gbExtended_Pictographic, /* 14 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
/* Scripts which has characters in other scripts. */
|
||||
ucp_Latin,
|
||||
ucp_Greek,
|
||||
ucp_Cyrillic,
|
||||
ucp_Armenian,
|
||||
ucp_Hebrew,
|
||||
ucp_Arabic,
|
||||
ucp_Syriac,
|
||||
ucp_Thaana,
|
||||
ucp_Devanagari,
|
||||
ucp_Bengali,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Gujarati,
|
||||
ucp_Oriya,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Kannada,
|
||||
ucp_Malayalam,
|
||||
ucp_Sinhala,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Myanmar,
|
||||
ucp_Georgian,
|
||||
ucp_Hangul,
|
||||
ucp_Ethiopic,
|
||||
ucp_Cherokee,
|
||||
ucp_Runic,
|
||||
ucp_Mongolian,
|
||||
ucp_Hiragana,
|
||||
ucp_Katakana,
|
||||
ucp_Bopomofo,
|
||||
ucp_Han,
|
||||
ucp_Yi,
|
||||
ucp_Gothic,
|
||||
ucp_Tagalog,
|
||||
ucp_Hanunoo,
|
||||
ucp_Buhid,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Limbu,
|
||||
ucp_Tai_Le,
|
||||
ucp_Linear_B,
|
||||
ucp_Shavian,
|
||||
ucp_Cypriot,
|
||||
ucp_Buginese,
|
||||
ucp_Coptic,
|
||||
ucp_Glagolitic,
|
||||
ucp_Tifinagh,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Nko,
|
||||
ucp_Kayah_Li,
|
||||
ucp_Lycian,
|
||||
ucp_Carian,
|
||||
ucp_Lydian,
|
||||
ucp_Avestan,
|
||||
ucp_Samaritan,
|
||||
ucp_Lisu,
|
||||
ucp_Javanese,
|
||||
ucp_Old_Turkic,
|
||||
ucp_Kaithi,
|
||||
ucp_Mandaic,
|
||||
ucp_Chakma,
|
||||
ucp_Meroitic_Hieroglyphs,
|
||||
ucp_Sharada,
|
||||
ucp_Takri,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Modi,
|
||||
ucp_Old_Permic,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Khudawadi,
|
||||
ucp_Tirhuta,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_Adlam,
|
||||
ucp_Osage,
|
||||
ucp_Tangut,
|
||||
ucp_Masaram_Gondi,
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Sogdian,
|
||||
ucp_Nandinagari,
|
||||
ucp_Yezidi,
|
||||
ucp_Cypro_Minoan,
|
||||
ucp_Old_Uyghur,
|
||||
ucp_Toto,
|
||||
ucp_Garay,
|
||||
ucp_Gurung_Khema,
|
||||
ucp_Ol_Onal,
|
||||
ucp_Sunuwar,
|
||||
ucp_Todhri,
|
||||
ucp_Tulu_Tigalari,
|
||||
|
||||
/* Scripts which has no characters in other scripts. */
|
||||
ucp_Unknown,
|
||||
ucp_Common,
|
||||
ucp_Lao,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Ogham,
|
||||
ucp_Khmer,
|
||||
ucp_Old_Italic,
|
||||
ucp_Deseret,
|
||||
ucp_Inherited,
|
||||
ucp_Ugaritic,
|
||||
ucp_Osmanya,
|
||||
ucp_Braille,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Old_Persian,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Phoenician,
|
||||
ucp_Sundanese,
|
||||
ucp_Lepcha,
|
||||
ucp_Ol_Chiki,
|
||||
ucp_Vai,
|
||||
ucp_Saurashtra,
|
||||
ucp_Rejang,
|
||||
ucp_Cham,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
ucp_Bamum,
|
||||
ucp_Meetei_Mayek,
|
||||
ucp_Imperial_Aramaic,
|
||||
ucp_Old_South_Arabian,
|
||||
ucp_Inscriptional_Parthian,
|
||||
ucp_Inscriptional_Pahlavi,
|
||||
ucp_Batak,
|
||||
ucp_Brahmi,
|
||||
ucp_Meroitic_Cursive,
|
||||
ucp_Miao,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Mro,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Nabataean,
|
||||
ucp_Palmyrene,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Warang_Citi,
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_SignWriting,
|
||||
ucp_Bhaiksuki,
|
||||
ucp_Marchen,
|
||||
ucp_Newa,
|
||||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square,
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Elymaic,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho,
|
||||
ucp_Chorasmian,
|
||||
ucp_Dives_Akuru,
|
||||
ucp_Khitan_Small_Script,
|
||||
ucp_Tangsa,
|
||||
ucp_Vithkuqi,
|
||||
ucp_Kawi,
|
||||
ucp_Nag_Mundari,
|
||||
ucp_Kirat_Rai,
|
||||
|
||||
/* This must be last */
|
||||
ucp_Script_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_script_sets[] */
|
||||
|
||||
#define ucd_script_sets_item_size 4
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_ucp.h */
|
||||
1596
deps/pcre2/pcre2_ucptables_inc.h
vendored
Normal file
1596
deps/pcre2/pcre2_ucptables_inc.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
179
deps/pcre2/pcre2_util.h
vendored
Normal file
179
deps/pcre2/pcre2_util.h
vendored
Normal file
@@ -0,0 +1,179 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* Assertion macros */
|
||||
|
||||
#ifdef PCRE2_DEBUG
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
|
||||
that the code below doesn't support. It is a NOP for non debug builds
|
||||
but in debug builds will print information about the location of the
|
||||
code where it triggered and crash.
|
||||
|
||||
It is meant to work like assert(), and therefore the expression used
|
||||
should indicate what the expected state is, and shouldn't have any
|
||||
side-effects. */
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#define PCRE2_ASSERT(x) assert(x)
|
||||
#else
|
||||
#define PCRE2_ASSERT(x) do \
|
||||
{ \
|
||||
if (!(x)) \
|
||||
{ \
|
||||
fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
|
||||
/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
|
||||
shouldn't be reached. In non debug builds is defined as a hint for
|
||||
the compiler to eliminate any code after it, so it is useful also for
|
||||
performance reasons, but should be used with care because if it is
|
||||
ever reached will trigger Undefined Behaviour and if you are lucky a
|
||||
crash. In debug builds it will report the location where it was triggered
|
||||
and crash. One important point to consider when using this macro, is
|
||||
that it is only implemented for a few compilers, and therefore can't
|
||||
be relied on to always be active either, so if it is followed by some
|
||||
code it is important to make sure that the whole thing is safe to
|
||||
use even if the macro is not there (ex: make sure there is a `break`
|
||||
after it if used at the end of a `case`) and to test your code also
|
||||
with a configuration where the macro will be a NOP. */
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#define PCRE2_UNREACHABLE() \
|
||||
assert(((void)"Execution reached unexpected point", 0))
|
||||
#else
|
||||
#define PCRE2_UNREACHABLE() do \
|
||||
{ \
|
||||
fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
|
||||
":%d\n", __LINE__); \
|
||||
abort(); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
|
||||
macro. It is meant to be used in places where the code is handling
|
||||
an error situation in code that shouldn't be reached, but that has
|
||||
some sort of fallback code to normally handle the error. When in
|
||||
doubt you should use this instead of the previous macro. Like in
|
||||
the previous case, it is a good idea to document as much as possible
|
||||
the reason and the actions that should be taken if it ever triggers. */
|
||||
|
||||
#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
#endif /* PCRE2_DEBUG */
|
||||
|
||||
#ifndef PCRE2_ASSERT
|
||||
#define PCRE2_ASSERT(x) do {} while(0)
|
||||
#endif
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
|
||||
#ifndef PCRE2_DEBUG_UNREACHABLE
|
||||
#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
|
||||
#endif
|
||||
|
||||
#ifndef PCRE2_UNREACHABLE
|
||||
#ifdef HAVE_BUILTIN_UNREACHABLE
|
||||
#define PCRE2_UNREACHABLE() __builtin_unreachable()
|
||||
#elif defined(HAVE_BUILTIN_ASSUME)
|
||||
#define PCRE2_UNREACHABLE() __assume(0)
|
||||
#else
|
||||
#define PCRE2_UNREACHABLE() do {} while(0)
|
||||
#endif
|
||||
#endif /* !PCRE2_UNREACHABLE */
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
/* We define this fallthrough macro for suppressing -Wimplicit-fallthrough warnings.
|
||||
Clang only allows this via an attribute, whereas other compilers (eg. GCC) match attributes
|
||||
and also specially-formatted comments.
|
||||
|
||||
This macro should be used with no following semicolon, and ideally with a comment: */
|
||||
|
||||
// PCRE2_FALLTHROUGH /* Fall through */
|
||||
|
||||
#ifndef PCRE2_FALLTHROUGH
|
||||
|
||||
#if defined(__cplusplus) && __cplusplus >= 202002L && \
|
||||
defined(__has_cpp_attribute)
|
||||
/* Standards-compatible C++ variant. */
|
||||
#if __has_cpp_attribute(fallthrough)
|
||||
#define PCRE2_FALLTHROUGH [[fallthrough]];
|
||||
#endif
|
||||
#elif !defined(__cplusplus) && \
|
||||
defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L && \
|
||||
defined(__has_c_attribute)
|
||||
/* Standards-compatible C variant. */
|
||||
#if __has_c_attribute(fallthrough)
|
||||
#define PCRE2_FALLTHROUGH [[fallthrough]];
|
||||
#endif
|
||||
#elif ((defined(__clang__) && __clang_major__ >= 10) || \
|
||||
(defined(__GNUC__) && __GNUC__ >= 7)) && \
|
||||
defined(__has_attribute)
|
||||
/* Clang and GCC syntax. Rule out old versions because apparently Clang at
|
||||
least has a broken implementation of __has_attribute. */
|
||||
#if __has_attribute(fallthrough)
|
||||
#define PCRE2_FALLTHROUGH __attribute__((fallthrough));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* !PCRE2_FALLTHROUGH */
|
||||
|
||||
#ifndef PCRE2_FALLTHROUGH
|
||||
#define PCRE2_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_util.h */
|
||||
397
deps/pcre2/pcre2_valid_utf.c
vendored
Normal file
397
deps/pcre2/pcre2_valid_utf.c
vendored
Normal file
@@ -0,0 +1,397 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF character
|
||||
strings. This file is also #included by the pcre2test program, which uses
|
||||
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
|
||||
with the library. In this case, PCRE2_PCRE2TEST is defined. */
|
||||
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Dummy function when Unicode is not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when Unicode is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
(void)erroroffset;
|
||||
return 0;
|
||||
}
|
||||
#else /* UTF is supported */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
!= 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
uint32_t c;
|
||||
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* Originally, this function checked according to RFC 2279, allowing for values
|
||||
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
|
||||
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked. Error returns are as follows:
|
||||
|
||||
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - ab;
|
||||
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
||||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
/* Normal UTF-16 code point. Neither high nor low surrogate. */
|
||||
}
|
||||
else if ((c & 0x0400) == 0)
|
||||
{
|
||||
/* High surrogate. Must be a followed by a low surrogate. */
|
||||
if (length == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF16_ERR1;
|
||||
}
|
||||
p++;
|
||||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Isolated low surrogate. Always an error. */
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF16_ERR3;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-32 string ----------------- */
|
||||
|
||||
#else
|
||||
|
||||
/* There is very little to do for a UTF-32 string.
|
||||
PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
||||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; length--, p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
/* Normal UTF-32 code point. Neither high nor low surrogate. */
|
||||
if (c > 0x10ffffu)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF32_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A surrogate */
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF32_ERR1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_valid_utf.c */
|
||||
547
deps/pcre2/pcre2_xclass.c
vendored
Normal file
547
deps/pcre2/pcre2_xclass.c
vendored
Normal file
@@ -0,0 +1,547 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains two internal functions that are used to match
|
||||
OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both
|
||||
pcre2_match() and pcre2_dfa_match(). */
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain codepoints above 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag code unit of the XCLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf)
|
||||
{
|
||||
/* Update PRIV(update_classbits) when this function is changed. */
|
||||
PCRE2_UCHAR t;
|
||||
BOOL not_negated = (*data & XCL_NOT) == 0;
|
||||
uint32_t type, max_index, min_index, value;
|
||||
const uint8_t *next_char;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Code points < 256 are matched against a bitmap, if one is present. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0)
|
||||
{
|
||||
if (c < 256)
|
||||
return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0;
|
||||
/* Skip bitmap. */
|
||||
data += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Match against the list of Unicode properties. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (*data == XCL_PROP || *data == XCL_NOTPROP)
|
||||
{
|
||||
/* The UCD record is the same for all properties. */
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
|
||||
do
|
||||
{
|
||||
int chartype;
|
||||
BOOL isprop = (*data++) == XCL_PROP;
|
||||
BOOL ok;
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == prop->chartype) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == prop->script) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
ok = (data[1] == prop->script ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
|
||||
if (ok == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (isprop) return not_negated;
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
if (c < 0xa0)
|
||||
{
|
||||
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT) == isprop)
|
||||
return not_negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((c < 0xd800 || c > 0xdfff) == isprop)
|
||||
return not_negated;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), data[1]) != 0;
|
||||
if (ok == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
/* The following three properties can occur only in an XCLASS, as there
|
||||
is no \p or \P coding for them. */
|
||||
|
||||
/* Graphic character. Implement this as not Z (space or separator) and
|
||||
not C (other), except for Cf (format) with a few exceptions. This seems
|
||||
to be what Perl does. The exceptional characters are:
|
||||
|
||||
U+061C Arabic Letter Mark
|
||||
U+180E Mongolian Vowel Separator
|
||||
U+2066 - U+2069 Various "isolate"s
|
||||
*/
|
||||
|
||||
case PT_PXGRAPH:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] != ucp_Z &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C ||
|
||||
(chartype == ucp_Cf &&
|
||||
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Printable character: same as graphic, with the addition of Zs, i.e.
|
||||
not Zl and not Zp, and U+180E. */
|
||||
|
||||
case PT_PXPRINT:
|
||||
chartype = prop->chartype;
|
||||
if ((chartype != ucp_Zl &&
|
||||
chartype != ucp_Zp &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C ||
|
||||
(chartype == ucp_Cf &&
|
||||
c != 0x061c && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Punctuation: all Unicode punctuation, plus ASCII characters that
|
||||
Unicode treats as symbols rather than punctuation, for Perl
|
||||
compatibility (these are $+<=>^`|~). */
|
||||
|
||||
case PT_PXPUNCT:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_P ||
|
||||
(c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Perl has two sets of hex digits */
|
||||
|
||||
case PT_PXXDIGIT:
|
||||
if (((c >= CHAR_0 && c <= CHAR_9) ||
|
||||
(c >= CHAR_A && c <= CHAR_F) ||
|
||||
(c >= CHAR_a && c <= CHAR_f) ||
|
||||
(c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */
|
||||
(c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */
|
||||
(c >= 0xff41 && c <= 0xff46)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
while (*data == XCL_PROP || *data == XCL_NOTPROP);
|
||||
}
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Match against large chars or ranges that end with a large char. */
|
||||
if (*data < XCL_LIST)
|
||||
{
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
uint32_t x, y;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
/* Since character ranges follow the properties, and they are
|
||||
sorted, early return is possible for all characters <= x. */
|
||||
if (c <= x) return (c == x) ? not_negated : !not_negated;
|
||||
continue;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(t == XCL_RANGE);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
y = *data++;
|
||||
|
||||
/* Since character ranges follow the properties, and they are
|
||||
sorted, early return is possible for all characters <= y. */
|
||||
if (c <= y) return (c >= x) ? not_negated : !not_negated;
|
||||
}
|
||||
|
||||
return !not_negated; /* char did not match */
|
||||
}
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(data[0] << 8) | data[1];
|
||||
data += 2;
|
||||
#else
|
||||
type = data[0];
|
||||
data++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(data, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
/* Alignment check. */
|
||||
PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0);
|
||||
|
||||
if (c >= XCL_CHAR_LIST_HIGH_16_START)
|
||||
{
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
next_char += max_index << 1;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
if (c < XCL_CHAR_LIST_LOW_32_START)
|
||||
{
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
if (max_index == 0 || c < *(const uint16_t*)next_char)
|
||||
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
|
||||
|
||||
min_index = 0;
|
||||
value = ((const uint16_t*)next_char)[--max_index];
|
||||
if (c >= value)
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
|
||||
max_index--;
|
||||
|
||||
/* Binary search of a range. */
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t mid_index = (min_index + max_index) >> 1;
|
||||
value = ((const uint16_t*)next_char)[mid_index];
|
||||
|
||||
if (c < value)
|
||||
max_index = mid_index - 1;
|
||||
else if (((const uint16_t*)next_char)[mid_index + 1] <= c)
|
||||
min_index = mid_index + 1;
|
||||
else
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip the 16 bit ranges. */
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
next_char += (max_index << 1);
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
/* Alignment check. */
|
||||
PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0);
|
||||
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= XCL_CHAR_LIST_HIGH_32_START)
|
||||
{
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint32_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
next_char += max_index << 2;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if (max_index == 0 || c < *(const uint32_t*)next_char)
|
||||
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
|
||||
|
||||
min_index = 0;
|
||||
value = ((const uint32_t*)next_char)[--max_index];
|
||||
if (c >= value)
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
|
||||
max_index--;
|
||||
|
||||
/* Binary search of a range. */
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t mid_index = (min_index + max_index) >> 1;
|
||||
value = ((const uint32_t*)next_char)[mid_index];
|
||||
|
||||
if (c < value)
|
||||
max_index = mid_index - 1;
|
||||
else if (((const uint32_t*)next_char)[mid_index + 1] <= c)
|
||||
min_index = mid_index + 1;
|
||||
else
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an ECLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class
|
||||
used for describing characters using boolean operations on sets.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data_start points to the start of the ECLASS data
|
||||
data_end points one-past-the-last of the ECLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end,
|
||||
const uint8_t *char_lists_end, BOOL utf)
|
||||
{
|
||||
PCRE2_SPTR ptr = data_start;
|
||||
PCRE2_UCHAR flags;
|
||||
uint32_t stack = 0;
|
||||
int stack_depth = 0;
|
||||
|
||||
PCRE2_ASSERT(data_start < data_end);
|
||||
flags = *ptr++;
|
||||
PCRE2_ASSERT((flags & ECL_MAP) == 0 ||
|
||||
(data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR));
|
||||
|
||||
/* Code points < 256 are matched against a bitmap, if one is present.
|
||||
Otherwise all codepoints are checked later. */
|
||||
|
||||
if ((flags & ECL_MAP) != 0)
|
||||
{
|
||||
if (c < 256)
|
||||
return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0;
|
||||
|
||||
/* Skip the bitmap. */
|
||||
ptr += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Do a little loop, until we reach the end of the ECLASS. */
|
||||
while (ptr < data_end)
|
||||
{
|
||||
switch (*ptr)
|
||||
{
|
||||
case ECL_AND:
|
||||
++ptr;
|
||||
stack = (stack >> 1) & (stack | ~(uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_OR:
|
||||
++ptr;
|
||||
stack = (stack >> 1) | (stack & (uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_XOR:
|
||||
++ptr;
|
||||
stack = (stack >> 1) ^ (stack & (uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_NOT:
|
||||
++ptr;
|
||||
stack ^= (uint32_t)1u;
|
||||
PCRE2_ASSERT(stack_depth >= 1);
|
||||
break;
|
||||
|
||||
case ECL_XCLASS:
|
||||
{
|
||||
uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf);
|
||||
|
||||
ptr += GET(ptr, 1);
|
||||
stack = (stack << 1) | matched;
|
||||
++stack_depth;
|
||||
break;
|
||||
}
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(stack_depth == 1);
|
||||
(void)stack_depth; /* Ignore unused variable, if assertions are disabled. */
|
||||
|
||||
/* The final bit left on the stack now holds the match result. */
|
||||
return (stack & 1u) != 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_xclass.c */
|
||||
Reference in New Issue
Block a user