deps: include pcre2

Ye olde PCRE (8.45) was end-of-lifed several years ago. For our bundled
regular expression implementation, we want to include the new,
still-maintained PCRE2 framework. Include PCRE2 v10.47.
This commit is contained in:
Edward Thomson
2026-05-26 15:37:03 +01:00
parent a7e1eb2638
commit 7842631242
42 changed files with 54482 additions and 0 deletions

110
deps/pcre2/CMakeLists.txt vendored Normal file
View File

@@ -0,0 +1,110 @@
#
# Static configuration
#
set(SUPPORT_PCRE2_8 1)
set(SUPPORT_UNICODE 1)
set(PCRE2_NEWLINE "LF")
set(NEWLINE_DEFAULT "2")
set(PCRE2_LINK_SIZE "2")
set(PCRE2_MAX_VARLOOKBEHIND "255")
set(PCRE2_PARENS_NEST_LIMIT "250")
set(PCRE2_HEAP_LIMIT "20000000")
set(PCRE2_MATCH_LIMIT "10000000")
set(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT")
#
# Dynamic configuration
#
include(CheckCSourceCompiles)
include(CheckFunctionExists)
include(CheckSymbolExists)
include(CheckIncludeFile)
check_include_file(assert.h HAVE_ASSERT_H)
check_include_file(dirent.h HAVE_DIRENT_H)
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
check_include_file(unistd.h HAVE_UNISTD_H)
check_include_file(windows.h HAVE_WINDOWS_H)
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) # glibc 2.27
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) # glibc 2.17
check_c_source_compiles(
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
HAVE_ATTRIBUTE_UNINITIALIZED
)
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
check_c_source_compiles(
[=[
#include <stddef.h>
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
]=]
HAVE_BUILTIN_MUL_OVERFLOW
)
check_c_source_compiles(
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
HAVE_BUILTIN_UNREACHABLE
)
# Output files
configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h @ONLY)
# Source code
set(PCRE2_HEADERS pcre2.h)
set(PCRE2_SOURCES
pcre2_auto_possess.c
pcre2_chartables.c
pcre2_chkdint.c
pcre2_compile.c
pcre2_compile_cgroup.c
pcre2_compile_class.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_extuni.c
pcre2_find_bracket.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_match_next.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
)
# Build setup
add_definitions(-DHAVE_CONFIG_H)
add_definitions(-DPCRE2_CODE_UNIT_WIDTH=8)
if(MSVC)
add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS)
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_library(pcre2 OBJECT ${PCRE2_HEADERS} ${PCRE2_SOURCES})

104
deps/pcre2/LICENCE.md vendored Normal file
View File

@@ -0,0 +1,104 @@
PCRE2 Licence
=============
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|---------|-------|
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
licence, as specified below, with one exemption for certain binary
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
is distributed under the same terms as the software itself. The data in the
testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
optimize pattern matching. This is an optional feature that can be omitted when
the library is built. The just-in-time compiler is separately licensed under the
"2-clause BSD" licence.
COPYRIGHT
---------
### The basic library functions
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Retired from University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2007 University of Cambridge
Copyright (c) 2007-2024 Philip Hazel
All rights reserved.
### PCRE2 Just-In-Time compilation support
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright (c) 2010-2024 Zoltan Herczeg
All rights reserved.
### Stack-less Just-In-Time compiler
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright (c) 2009-2024 Zoltan Herczeg
All rights reserved.
The code in the `deps/sljit` directory has its own LICENSE file.
### All other contributions
Many other contributors have participated in the authorship of PCRE2. As PCRE2
has never required a Contributor Licensing Agreement, or other copyright
assignment agreement, all contributions have copyright retained by each
original contributor or their employer.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notices,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
------------------------------------------
The second condition in the BSD licence (covering binary redistributions) does
not apply all the way down a chain of software. If binary package A includes
PCRE2, it must respect the condition, but if package B is software that
includes package A, the condition is not imposed on package B unless it uses
PCRE2 independently.

55
deps/pcre2/config.h.in vendored Normal file
View File

@@ -0,0 +1,55 @@
/* config.h for CMake builds */
#cmakedefine HAVE_ASSERT_H 1
#cmakedefine HAVE_BUILTIN_ASSUME 1
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_MEMFD_CREATE 1
#cmakedefine HAVE_SECURE_GETENV 1
#cmakedefine SUPPORT_PCRE2_8 1
#cmakedefine SUPPORT_PCRE2_16 1
#cmakedefine SUPPORT_PCRE2_32 1
#cmakedefine DISABLE_PERCENT_ZT 1
#cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBEDIT 1
#cmakedefine SUPPORT_LIBREADLINE 1
#cmakedefine SUPPORT_LIBZ 1
#cmakedefine SUPPORT_JIT 1
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
#cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1
#cmakedefine BSR_ANYCRLF 1
#cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine EBCDIC_IGNORING_COMPILER 1
#cmakedefine NEVER_BACKSLASH_C 1
#define PCRE2_EXPORT @PCRE2_EXPORT@
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
#define MAX_NAME_SIZE 128
#define MAX_NAME_COUNT 10000
/* end config.h for CMake builds */

1079
deps/pcre2/pcre2.h vendored Normal file

File diff suppressed because it is too large Load Diff

1416
deps/pcre2/pcre2_auto_possess.c vendored Normal file

File diff suppressed because it is too large Load Diff

192
deps/pcre2/pcre2_chartables.c vendored Normal file
View File

@@ -0,0 +1,192 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file was automatically written by the pcre2_dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256, and
only relevant if not in UCP mode. */
/* This set of tables was written in the C locale. */
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
to build alternative versions of this file. This is necessary if you are
running in an EBCDIC environment, or if you want to default to a different
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
these tables in the "C" locale by default. This happens automatically if
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
pcre2_dftables manually with the -L option to build tables using the LC_ALL
locale. */
#include "pcre2_internal.h"
const uint8_t PRIV(default_tables)[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 lower case letter
0x08 decimal digit
0x10 word (alphanumeric or '_')
*/
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre2_chartables.c */

94
deps/pcre2/pcre2_chkdint.c vendored Normal file
View File

@@ -0,0 +1,94 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 2023 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains functions to implement checked integer operation */
#ifndef PCRE2_PCRE2TEST
#include "pcre2_internal.h"
#endif
/*************************************************
* Checked Integer Multiplication *
*************************************************/
/*
Arguments:
r A pointer to PCRE2_SIZE to store the answer
a, b Two integers
Returns: Bool indicating if the operation overflows
It is modeled after C23's <stdckdint.h> interface
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
otherwise double. */
BOOL
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
{
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
PCRE2_SIZE m;
if (__builtin_mul_overflow(a, b, &m)) return TRUE;
*r = m;
#else
INT64_OR_DOUBLE m;
PCRE2_ASSERT(a >= 0 && b >= 0);
m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
#if defined INT64_MAX || defined int64_t
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
*r = (PCRE2_SIZE)m;
#else
if (m > PCRE2_SIZE_MAX) return TRUE;
*r = m;
#endif
#endif
return FALSE;
}
/* End of pcre2_chkdint.c */

11343
deps/pcre2/pcre2_compile.c vendored Normal file

File diff suppressed because it is too large Load Diff

356
deps/pcre2/pcre2_compile.h vendored Normal file
View File

@@ -0,0 +1,356 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
#include "pcre2_internal.h"
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
eint2 in pcre2posix.c may need to be updated, and a new error text must be
added to compile_error_texts in pcre2_error.c. Also, the error codes in
pcre2.h.in must be updated - their values are exactly 100 greater than these
values. */
enum { ERR0 = COMPILE_ERROR_BASE,
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
ERR101, ERR102, ERR103, ERR104, ERR105, ERR106, ERR107, ERR108, ERR109, ERR110,
ERR111, ERR112, ERR113, ERR114, ERR115, ERR116, ERR117, ERR118, ERR119, ERR120 };
/* Code values for parsed patterns, which are stored in a vector of 32-bit
unsigned ints. Values less than META_END are literal data values. The coding
for identifying the item is in the top 16-bits, leaving 16 bits for the
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
macros are used to manipulate parsed pattern elements.
NOTE: When these definitions are changed, the table of extra lengths for each
code (meta_extra_lengths) must be updated to remain in step. */
#define META_END 0x80000000u /* End of pattern */
#define META_ALT 0x80010000u /* alternation */
#define META_ATOMIC 0x80020000u /* atomic group */
#define META_BACKREF 0x80030000u /* Back ref */
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
#define META_CLASS 0x800a0000u /* start non-empty class */
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
#define META_OFFSET 0x80160000u /* Setting offset for various META
codes (e.g. META_CAPTURE_NAME) */
#define META_SCS 0x80170000u /* (*scan_substring:... */
#define META_CAPTURE_NAME 0x80180000u /* Next <name> in capture lists */
#define META_CAPTURE_NUMBER 0x80190000u /* Next digits in capture lists */
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
#define META_DOT 0x801b0000u /* . metacharacter */
#define META_ESCAPE 0x801c0000u /* \d and friends */
#define META_KET 0x801d0000u /* closing parenthesis */
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
#define META_POSIX 0x80200000u /* POSIX class item */
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
#define META_RECURSE 0x80240000u /* Recursion */
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
/* These must be kept together to make it easy to check that an assertion
is present where expected in a conditional group. */
#define META_LOOKAHEAD 0x80270000u /* (?= */
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
#define META_LOOKBEHIND 0x80290000u /* (?<= */
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
/* These cannot be conditions */
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
/* These must be kept in this order, with consecutive values, and the _ARG
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */
#define META_MARK 0x802d0000u /* (*MARK) */
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
#define META_FAIL 0x802f0000u /* (*FAIL) */
#define META_COMMIT 0x80300000u /* These */
#define META_COMMIT_ARG 0x80310000u /* pairs */
#define META_PRUNE 0x80320000u /* must */
#define META_PRUNE_ARG 0x80330000u /* be */
#define META_SKIP 0x80340000u /* kept */
#define META_SKIP_ARG 0x80350000u /* in */
#define META_THEN 0x80360000u /* this */
#define META_THEN_ARG 0x80370000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */
#define META_ASTERISK 0x80380000u /* * */
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
#define META_PLUS 0x803b0000u /* + */
#define META_PLUS_PLUS 0x803c0000u /* ++ */
#define META_PLUS_QUERY 0x803d0000u /* +? */
#define META_QUERY 0x803e0000u /* ? */
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
#define META_QUERY_QUERY 0x80400000u /* ?? */
#define META_MINMAX 0x80410000u /* {n,m} repeat */
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
this order, and AND/NOT at the start/end. */
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
/* Convenience aliases. */
#define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
/* This is a special "meta code" that is used only to distinguish (*asr: from
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
therefore no need for it to have a length entry, so use a high value. */
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
/* Macros for manipulating elements of the parsed pattern vector. */
#define META_CODE(x) (x & 0xffff0000u)
#define META_DATA(x) (x & 0x0000ffffu)
#define META_DIFF(x,y) ((x-y)>>16)
/* Macros to store and retrieve a PCRE2_SIZE value in the parsed pattern, which
consists of uint32_t elements. Assume that if uint32_t can't hold it, two of
them will be able to (i.e. assume a 64-bit world). */
#if PCRE2_SIZE_MAX <= UINT32_MAX
#define PUTOFFSET(s,p) *p++ = s
#define GETOFFSET(s,p) s = *p++
#define GETPLUSOFFSET(s,p) s = *(++p)
#define READPLUSOFFSET(s,p) s = p[1]
#define SKIPOFFSET(p) p++
#define SIZEOFFSET 1
#else
#define PUTOFFSET(s,p) \
{ *p++ = (uint32_t)(s >> 32); *p++ = (uint32_t)(s & 0xffffffff); }
#define GETOFFSET(s,p) \
{ s = ((PCRE2_SIZE)p[0] << 32) | (PCRE2_SIZE)p[1]; p += 2; }
#define GETPLUSOFFSET(s,p) \
{ s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; p += 2; }
#define READPLUSOFFSET(s,p) \
{ s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; }
#define SKIPOFFSET(p) p += 2
#define SIZEOFFSET 2
#endif
#ifdef PCRE2_DEBUG
/* Compile data types. */
#define CDATA_RECURSE_ARGS 0 /* Argument list for recurse */
#define CDATA_CRANGE 1 /* Character range list */
#endif
/* Extended class management flags. */
#define CLASS_IS_ECLASS 0x1
/* Macro for the highest character value. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_UCHAR_VALUE 0xffu
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define MAX_UCHAR_VALUE 0xffffu
#else
#define MAX_UCHAR_VALUE 0xffffffffu
#endif
#define GET_MAX_CHAR_VALUE(utf) \
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
/* Macro for setting individual bits in class bitmaps. */
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
/* Macro for 8 bit specific checks. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define SELECT_VALUE8(value8, value) (value8)
#else
#define SELECT_VALUE8(value8, value) (value)
#endif
/* Macro for aligning data. */
#define CLIST_ALIGN_TO(base, align) \
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
/* Structure for holding information about an OP_ECLASS internal operand.
An "operand" here could be just a single OP_[X]CLASS, or it could be some
complex expression; but it's some sequence of ECL_* codes which pushes one
value to the stack. */
typedef struct {
/* The position of the operand - or NULL if (lengthptr != NULL). */
PCRE2_UCHAR *code_start;
PCRE2_SIZE length;
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
otherwise zero if the operand is not atomic. */
uint8_t op_single_type;
/* Regardless of whether it's a single code or not, we fully constant-fold
the bitmap for code points < 256. */
class_bits_storage bits;
} eclass_op_info;
/* Macros for the definitions below, to prevent name collisions. */
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
#define _pcre2_compile_get_hash_from_name PCRE2_SUFFIX(_pcre2_compile_get_hash_from_name)
#define _pcre2_compile_find_named_group PCRE2_SUFFIX(_pcre2_compile_find_named_group)
#define _pcre2_compile_find_dupname_details PCRE2_SUFFIX(_pcre2_compile_find_dupname_details)
#define _pcre2_compile_add_name_to_table PCRE2_SUFFIX(_pcre2_compile_add_name_to_table)
#define _pcre2_compile_parse_scan_substr_args PCRE2_SUFFIX(_pcre2_compile_parse_scan_substr_args)
#define _pcre2_compile_parse_recurse_args PCRE2_SUFFIX(_pcre2_compile_parse_recurse_args)
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
posix_class_maps, and posix_substitutes. They must be kept in sync. */
#define PC_DIGIT 7
#define PC_GRAPH 8
#define PC_PRINT 9
#define PC_PUNCT 10
#define PC_XDIGIT 13
extern const int PRIV(posix_class_maps)[];
/* Defines for hash_dup member in named_group structure. */
#define NAMED_GROUP_HASH_MASK ((uint16_t)0x7fff)
#define NAMED_GROUP_IS_DUPNAME ((uint16_t)0x8000)
#define NAMED_GROUP_GET_HASH(ng) ((ng)->hash_dup & NAMED_GROUP_HASH_MASK)
/* Exported functions from pcre2_compile_class.c file: */
/* Set bits in classbits according to the property type */
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
uint8_t *classbits);
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
start at a META_CLASS or META_CLASS_NOT.
The pptr will be left pointing at the matching META_CLASS_END. */
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
compile_block *cb, PCRE2_SIZE *lengthptr);
/* Exported functions from pcre2_compile_cgroup.c file: */
/* Compute hash from a capture name. */
uint16_t PRIV(compile_get_hash_from_name)(PCRE2_SPTR name, uint32_t length);
/* Get the descriptor of a known named capture. */
named_group *PRIV(compile_find_named_group)(PCRE2_SPTR name,
uint32_t length, compile_block *cb);
/* Add entires to name table in alphabetical order. */
uint32_t PRIV(compile_add_name_to_table)(compile_block *cb,
named_group *ng, uint32_t tablecount);
/* Searches the properties of duplicated names, and returns them
in indexptr and countptr. */
BOOL PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb);
/* Parse the arguments of recurse operations. */
uint32_t * PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
/* Parse the arguments of recurse operations. */
BOOL PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb);
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
/* End of pcre2_compile.h */

632
deps/pcre2/pcre2_compile_cgroup.c vendored Normal file
View File

@@ -0,0 +1,632 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_compile.h"
/*************************************************
* Compute the hash code from a capture name *
*************************************************/
/* This function returns with a simple hash code
computed from the name of a capture group.
Arguments:
name name of the capture group
length the length of the name
Returns: hash code
*/
uint16_t
PRIV(compile_get_hash_from_name)(PCRE2_SPTR name, uint32_t length)
{
uint16_t hash;
PCRE2_ASSERT(length > 0);
hash = (uint16_t)((name[0] & 0x7f) | ((name[length - 1] & 0xff) << 7));
PCRE2_ASSERT(hash <= NAMED_GROUP_HASH_MASK);
return hash;
}
/*************************************************
* Get the descriptor of a known named capture *
*************************************************/
/* This function returns the descriptor in the
named group list of a known capture group.
Arguments:
name name of the capture group
length the length of the name
Returns: pointer to the descriptor when found,
NULL otherwise
*/
named_group *
PRIV(compile_find_named_group)(PCRE2_SPTR name,
uint32_t length, compile_block *cb)
{
uint16_t hash = PRIV(compile_get_hash_from_name)(name, length);
named_group *ng;
named_group *end = cb->named_groups + cb->names_found;
for (ng = cb->named_groups; ng < end; ng++)
if (length == ng->length && hash == NAMED_GROUP_GET_HASH(ng) &&
PRIV(strncmp)(name, ng->name, length) == 0) return ng;
return NULL;
}
/*************************************************
* Add an entry to the name/number table *
*************************************************/
/* This function is called between compiling passes to add an entry to the
name/number table, maintaining alphabetical order. Checking for permitted
and forbidden duplicates has already been done.
Arguments:
cb the compile data block
nb named group entry
tablecount the count of names in the table so far
Returns: new tablecount
*/
uint32_t
PRIV(compile_add_name_to_table)(compile_block *cb,
named_group *ng, uint32_t tablecount)
{
uint32_t i;
PCRE2_SPTR name = ng->name;
int length = ng->length;
uint32_t duplicate_count = 1;
PCRE2_UCHAR *slot = cb->name_table;
PCRE2_ASSERT(length > 0);
if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0)
{
named_group *ng_it;
named_group *end = cb->named_groups + cb->names_found;
for (ng_it = ng + 1; ng_it < end; ng_it++)
if (ng_it->name == name) duplicate_count++;
}
for (i = 0; i < tablecount; i++)
{
int crc = memcmp(name, slot + IMM2_SIZE, CU2BYTES(length));
if (crc == 0 && slot[IMM2_SIZE + length] != 0)
crc = -1; /* Current name is a substring */
/* Make space in the table and break the loop for an earlier name. For a
duplicate or later name, carry on. We do this for duplicates so that in the
simple case (when ?(| is not used) they are in order of their numbers. In all
cases they are in the order in which they appear in the pattern. */
if (crc < 0)
{
(void)memmove(slot + cb->name_entry_size * duplicate_count, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size));
break;
}
/* Continue the loop for a later or duplicate name */
slot += cb->name_entry_size;
}
tablecount += duplicate_count;
while (TRUE)
{
PUT2(slot, 0, ng->number);
memcpy(slot + IMM2_SIZE, name, CU2BYTES(length));
/* Add a terminating zero and fill the rest of the slot with zeroes so that
the memory is all initialized. Otherwise valgrind moans about uninitialized
memory when saving serialized compiled patterns. */
memset(slot + IMM2_SIZE + length, 0,
CU2BYTES(cb->name_entry_size - length - IMM2_SIZE));
if (--duplicate_count == 0) break;
while (TRUE)
{
++ng;
if (ng->name == name) break;
}
slot += cb->name_entry_size;
}
return tablecount;
}
/*************************************************
* Find details of duplicate group names *
*************************************************/
/* This is called from compile_branch() when it needs to know the index and
count of duplicates in the names table when processing named backreferences,
either directly, or as conditions.
Arguments:
name points to the name
length the length of the name
indexptr where to put the index
countptr where to put the count of duplicates
errorcodeptr where to put an error code
cb the compile block
Returns: TRUE if OK, FALSE if not, error code set
*/
BOOL
PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb)
{
uint32_t i, groupnumber;
int count;
PCRE2_UCHAR *slot = cb->name_table;
/* Find the first entry in the table */
for (i = 0; i < cb->names_found; i++)
{
if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) == 0 &&
slot[IMM2_SIZE + length] == 0) break;
slot += cb->name_entry_size;
}
/* This should not occur, because this function is called only when we know we
have duplicate names. Give an internal error. */
/* LCOV_EXCL_START */
if (i >= cb->names_found)
{
PCRE2_DEBUG_UNREACHABLE();
*errorcodeptr = ERR53;
cb->erroroffset = name - cb->start_pattern;
return FALSE;
}
/* LCOV_EXCL_STOP */
/* Record the index and then see how many duplicates there are, updating the
backref map and maximum back reference as we do. */
*indexptr = i;
count = 0;
for (;;)
{
count++;
groupnumber = GET2(slot, 0);
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
if (++i >= cb->names_found) break;
slot += cb->name_entry_size;
if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) != 0 ||
(slot + IMM2_SIZE)[length] != 0) break;
}
*countptr = count;
return TRUE;
}
/* Process the capture list of scan substring and recurse
operations. Since at least one argument must be present,
a 0 return value represents error. */
static size_t
PRIV(compile_process_capture_list)(uint32_t *pptr, PCRE2_SIZE offset,
int *errorcodeptr, compile_block *cb)
{
size_t i, size = 0;
named_group *ng;
PCRE2_SPTR name;
uint32_t length;
named_group *end = cb->named_groups + cb->names_found;
while (TRUE)
{
++pptr;
switch (META_CODE(*pptr))
{
case META_OFFSET:
GETPLUSOFFSET(offset, pptr);
continue;
case META_CAPTURE_NAME:
offset += META_DATA(*pptr);
length = *(++pptr);
name = cb->start_pattern + offset;
ng = PRIV(compile_find_named_group)(name, length, cb);
if (ng == NULL)
{
*errorcodeptr = ERR15;
cb->erroroffset = offset;
return 0;
}
if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) == 0)
{
pptr[-1] = META_CAPTURE_NUMBER;
pptr[0] = ng->number;
size++;
continue;
}
/* Remains only for duplicated names. */
pptr[-1] = META_CAPTURE_NAME;
pptr[0] = (uint32_t)(ng - cb->named_groups);
size++;
name = ng->name;
while (++ng < end)
if (ng->name == name) size++;
continue;
case META_CAPTURE_NUMBER:
offset += META_DATA(*pptr);
i = *(++pptr);
if (i > cb->bracount)
{
*errorcodeptr = ERR15;
cb->erroroffset = offset;
return 0;
}
if (i > cb->top_backref) cb->top_backref = (uint16_t)i;
size++;
continue;
default:
break;
}
PCRE2_ASSERT(size > 0);
return size;
}
}
/*******************************************************
* Parse the arguments of scan substring operations *
********************************************************/
/* This function parses the arguments of scan substring operations.
Arguments:
pptr_start points to the current parsed pattern pointer
offset argument starting offset in the pattern
errorcodeptr where to put an error code
cb the compile block
lengthptr NULL during the real compile phase
points to length accumulator during pre-compile phase
Returns: TRUE if OK, FALSE if not, error code set
*/
uint32_t *
PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
{
uint8_t *captures;
uint8_t *capture_ptr;
uint8_t bit;
PCRE2_SPTR name;
named_group *ng;
named_group *end = cb->named_groups + cb->names_found;
BOOL all_found;
size_t size;
PCRE2_ASSERT(*pptr == META_OFFSET);
if (PRIV(compile_process_capture_list)(pptr - 1, 0, errorcodeptr, cb) == 0)
return NULL;
/* Align to bytes. Since the highest capture can
be equal to bracount, +1 is added before the aligning. */
size = (cb->bracount + 1 + 7) >> 3;
captures = (uint8_t*)cb->cx->memctl.malloc(size, cb->cx->memctl.memory_data);
if (captures == NULL)
{
*errorcodeptr = ERR21;
READPLUSOFFSET(cb->erroroffset, pptr);
return NULL;
}
memset(captures, 0, size);
while (TRUE)
{
switch (META_CODE(*pptr))
{
case META_OFFSET:
pptr++;
SKIPOFFSET(pptr);
continue;
case META_CAPTURE_NAME:
ng = cb->named_groups + pptr[1];
PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);
pptr += 2;
name = ng->name;
all_found = TRUE;
do
{
if (ng->name != name) continue;
capture_ptr = captures + (ng->number >> 3);
PCRE2_ASSERT(capture_ptr < captures + size);
bit = (uint8_t)(1 << (ng->number & 0x7));
if ((*capture_ptr & bit) == 0)
{
*capture_ptr |= bit;
all_found = FALSE;
}
}
while (++ng < end);
if (!all_found)
{
*lengthptr += 1 + 2 * IMM2_SIZE;
continue;
}
pptr[-2] = META_CAPTURE_NUMBER;
pptr[-1] = 0;
continue;
case META_CAPTURE_NUMBER:
pptr += 2;
capture_ptr = captures + (pptr[-1] >> 3);
PCRE2_ASSERT(capture_ptr < captures + size);
bit = (uint8_t)(1 << (pptr[-1] & 0x7));
if ((*capture_ptr & bit) != 0)
{
pptr[-1] = 0;
continue;
}
*capture_ptr |= bit;
*lengthptr += 1 + IMM2_SIZE;
continue;
default:
break;
}
break;
}
cb->cx->memctl.free(captures, cb->cx->memctl.memory_data);
return pptr - 1;
}
/* Implement heapsort heapify algorithm. */
static void do_heapify_u16(uint16_t *captures, size_t size, size_t i)
{
size_t max;
size_t left;
size_t right;
uint16_t tmp;
while (TRUE)
{
max = i;
left = (i << 1) + 1;
right = left + 1;
if (left < size && captures[left] > captures[max]) max = left;
if (right < size && captures[right] > captures[max]) max = right;
if (i == max) return;
tmp = captures[i];
captures[i] = captures[max];
captures[max] = tmp;
i = max;
}
}
/*************************************************
* Parse the arguments of recurse operations *
*************************************************/
/* This function parses the arguments of recurse operations.
Arguments:
pptr_start the current parsed pattern pointer
offset argument starting offset in the pattern
errorcodeptr where to put an error code
cb the compile block
lengthptr NULL during the real compile phase
points to length accumulator during pre-compile phase
Returns: TRUE if OK, FALSE if not, error code set
*/
BOOL
PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb)
{
uint32_t *pptr = pptr_start;
size_t i, size;
PCRE2_SPTR name;
named_group *ng;
named_group *end = cb->named_groups + cb->names_found;
recurse_arguments *args;
uint16_t *captures;
uint16_t *current;
uint16_t *captures_end;
uint16_t tmp;
/* Process all arguments, compute the required size. */
size = PRIV(compile_process_capture_list)(pptr, offset, errorcodeptr, cb);
if (size == 0) return FALSE;
args = cb->cx->memctl.malloc(
sizeof(recurse_arguments) + size * sizeof(uint16_t), cb->cx->memctl.memory_data);
if (args == NULL)
{
*errorcodeptr = ERR21;
cb->erroroffset = offset;
return FALSE;
}
args->header.next = NULL;
#ifdef PCRE2_DEBUG
args->header.type = CDATA_RECURSE_ARGS;
#endif
args->size = size;
/* Caching the pre-processed capture list. */
if (cb->last_data != NULL)
cb->last_data->next = &args->header;
else
cb->first_data = &args->header;
cb->last_data = &args->header;
/* Create the capture list size. */
captures = (uint16_t*)(args + 1);
while (TRUE)
{
++pptr;
switch (META_CODE(*pptr))
{
case META_OFFSET:
SKIPOFFSET(pptr);
continue;
case META_CAPTURE_NAME:
ng = cb->named_groups + *(++pptr);
PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);
*captures++ = (uint16_t)(ng->number);
name = ng->name;
while (++ng < end)
if (ng->name == name) *captures++ = (uint16_t)(ng->number);
continue;
case META_CAPTURE_NUMBER:
*captures++ = *(++pptr);
continue;
default:
break;
}
break;
}
PCRE2_ASSERT(size == (size_t)(captures - (uint16_t*)(args + 1)));
args->skip_size = (size_t)(pptr - pptr_start) - 1;
if (size == 1) return TRUE;
/* Sort captures. */
captures = (uint16_t*)(args + 1);
i = (size >> 1) - 1;
while (TRUE)
{
do_heapify_u16(captures, size, i);
if (i == 0) break;
i--;
}
for (i = size - 1; i > 0; i--)
{
tmp = captures[0];
captures[0] = captures[i];
captures[i] = tmp;
do_heapify_u16(captures, i, 0);
}
/* Remove duplicates. */
captures_end = captures + size;
tmp = *captures++;
current = captures;
while (current < captures_end)
{
if (*current != tmp)
{
tmp = *current;
*captures++ = tmp;
}
current++;
}
args->size = (size_t)(captures - (uint16_t*)(args + 1));
return TRUE;
}
/* End of pcre2_compile_cgroup.c */

2769
deps/pcre2/pcre2_compile_class.c vendored Normal file

File diff suppressed because it is too large Load Diff

250
deps/pcre2/pcre2_config.c vendored Normal file
View File

@@ -0,0 +1,250 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
convenient for user programs that want to test their values. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/*************************************************
* Return info about what features are configured *
*************************************************/
/* If where is NULL, the length of memory required is returned.
Arguments:
what what information is required
where where to put the information
Returns: 0 if a numerical value is returned
>= 0 if a string value
PCRE2_ERROR_BADOPTION if "where" not recognized
or JIT target requested when JIT not enabled
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_config(uint32_t what, void *where)
{
if (where == NULL) /* Requests a length */
{
switch (what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_COMPILED_WIDTHS:
case PCRE2_CONFIG_DEPTHLIMIT:
case PCRE2_CONFIG_EFFECTIVE_LINKSIZE:
case PCRE2_CONFIG_HEAPLIMIT:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT:
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
case PCRE2_CONFIG_TABLES_LENGTH:
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
/* These are handled below */
case PCRE2_CONFIG_JITTARGET:
case PCRE2_CONFIG_UNICODE_VERSION:
case PCRE2_CONFIG_VERSION:
break;
}
}
switch (what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
#else
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
#endif
break;
case PCRE2_CONFIG_COMPILED_WIDTHS:
*((uint32_t *)where) = 0
#ifdef SUPPORT_PCRE2_8
+ (1 << 0)
#endif
#ifdef SUPPORT_PCRE2_16
+ (1 << 1)
#endif
#ifdef SUPPORT_PCRE2_32
+ (1 << 2)
#endif
;
break;
case PCRE2_CONFIG_DEPTHLIMIT:
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
break;
case PCRE2_CONFIG_EFFECTIVE_LINKSIZE:
*((uint32_t *)where) = LINK_SIZE * sizeof(PCRE2_UCHAR);
break;
case PCRE2_CONFIG_HEAPLIMIT:
*((uint32_t *)where) = HEAP_LIMIT;
break;
case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
case PCRE2_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
{
const char *v = PRIV(jit_get_target)();
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
#else
return PCRE2_ERROR_BADOPTION;
#endif
case PCRE2_CONFIG_LINKSIZE:
*((uint32_t *)where) = (uint32_t)CONFIGURED_LINK_SIZE;
break;
case PCRE2_CONFIG_MATCHLIMIT:
*((uint32_t *)where) = MATCH_LIMIT;
break;
case PCRE2_CONFIG_NEWLINE:
*((uint32_t *)where) = NEWLINE_DEFAULT;
break;
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
#ifdef NEVER_BACKSLASH_C
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
case PCRE2_CONFIG_PARENSLIMIT:
*((uint32_t *)where) = PARENS_NEST_LIMIT;
break;
/* This is now obsolete. The stack is no longer used via recursion for
handling backtracking in pcre2_match(). */
case PCRE2_CONFIG_STACKRECURSE:
*((uint32_t *)where) = 0;
break;
case PCRE2_CONFIG_TABLES_LENGTH:
*((uint32_t *)where) = TABLES_LENGTH;
break;
case PCRE2_CONFIG_UNICODE_VERSION:
{
#if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version);
#else
const char *v = "Unicode not supported";
#endif
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
case PCRE2_CONFIG_UNICODE:
#if defined SUPPORT_UNICODE
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
/* The hackery in setting "v" below is to cope with the case when
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
If the second alternative is used in this case, it does not leave a space
before the date. On the other hand, if all four macros are put into a single
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
There are problems using an "obvious" approach like this:
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
of STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what
we really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to
resort to a runtime test. */
case PCRE2_CONFIG_VERSION:
{
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
}
return 0;
}
/* End of pcre2_config.c */

557
deps/pcre2/pcre2_context.c vendored Normal file
View File

@@ -0,0 +1,557 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/*************************************************
* Default malloc/free functions *
*************************************************/
/* Ignore the "user data" argument in each case. */
static void *default_malloc(size_t size, void *data)
{
(void)data;
return malloc(size);
}
static void default_free(void *block, void *data)
{
(void)data;
free(block);
}
/*************************************************
* Get a block and save memory control *
*************************************************/
/* This internal function is called to get a block of memory in which the
memory control data is to be stored at the start for future use.
Arguments:
size amount of memory required
memctl pointer to a memctl block or NULL
Returns: pointer to memory or NULL on failure
*/
extern void *
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
{
pcre2_memctl *newmemctl;
void *yield = (memctl == NULL)? malloc(size) :
memctl->malloc(size, memctl->memory_data);
if (yield == NULL) return NULL;
newmemctl = (pcre2_memctl *)yield;
if (memctl == NULL)
{
newmemctl->malloc = default_malloc;
newmemctl->free = default_free;
newmemctl->memory_data = NULL;
}
else *newmemctl = *memctl;
return yield;
}
/*************************************************
* Create and initialize contexts *
*************************************************/
/* Initializing for compile and match contexts is done in separate, private
functions so that these can be called from functions such as pcre2_compile()
when an external context is not supplied. The initializing functions have an
option to set up default memory management. */
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
void (*private_free)(void *, void *), void *memory_data)
{
pcre2_general_context *gcontext;
if (private_malloc == NULL) private_malloc = default_malloc;
if (private_free == NULL) private_free = default_free;
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
if (gcontext == NULL) return NULL;
gcontext->memctl.malloc = private_malloc;
gcontext->memctl.free = private_free;
gcontext->memctl.memory_data = memory_data;
return gcontext;
}
/* A default compile context is set up to save having to initialize at run time
when no context is supplied to the compile function. */
pcre2_compile_context PRIV(default_compile_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
NULL, /* Stack guard */
NULL, /* Stack guard data */
PRIV(default_tables), /* Character tables */
PCRE2_UNSET, /* Max pattern length */
PCRE2_UNSET, /* Max pattern compiled length */
BSR_DEFAULT, /* Backslash R default */
NEWLINE_DEFAULT, /* Newline convention */
PARENS_NEST_LIMIT, /* As it says */
0, /* Extra options */
MAX_VARLOOKBEHIND, /* As it says */
PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
};
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_create(pcre2_general_context *gcontext)
{
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_compile_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/* A default match context is set up to save having to initialize at run time
when no context is supplied to a match function. */
pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
#ifdef SUPPORT_JIT
NULL, /* JIT callback */
NULL, /* JIT callback data */
#endif
NULL, /* Callout function */
NULL, /* Callout data */
NULL, /* Substitute callout function */
NULL, /* Substitute callout data */
NULL, /* Substitute case callout function */
NULL, /* Substitute case callout data */
PCRE2_UNSET, /* Offset limit */
HEAP_LIMIT,
MATCH_LIMIT,
MATCH_LIMIT_DEPTH };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_create(pcre2_general_context *gcontext)
{
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL;
*mcontext = PRIV(default_match_context);
if (gcontext != NULL)
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
return mcontext;
}
/* A default convert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */
pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32
CHAR_BACKSLASH, /* Default path separator */
CHAR_GRAVE_ACCENT /* Default escape character */
#else /* Not Windows */
CHAR_SLASH, /* Default path separator */
CHAR_BACKSLASH /* Default escape character */
#endif
};
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_create(pcre2_general_context *gcontext)
{
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_convert_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/*************************************************
* Context copy functions *
*************************************************/
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_copy(pcre2_general_context *gcontext)
{
pcre2_general_context *newcontext =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
gcontext->memctl.memory_data);
if (newcontext == NULL) return NULL;
memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
return newcontext;
}
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
{
pcre2_compile_context *newcontext =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
ccontext->memctl.memory_data);
if (newcontext == NULL) return NULL;
memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
return newcontext;
}
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_copy(pcre2_match_context *mcontext)
{
pcre2_match_context *newcontext =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
mcontext->memctl.memory_data);
if (newcontext == NULL) return NULL;
memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
return newcontext;
}
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
{
pcre2_convert_context *newcontext =
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
ccontext->memctl.memory_data);
if (newcontext == NULL) return NULL;
memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
return newcontext;
}
/*************************************************
* Context free functions *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_general_context_free(pcre2_general_context *gcontext)
{
if (gcontext != NULL)
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_compile_context_free(pcre2_compile_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_context_free(pcre2_match_context *mcontext)
{
if (mcontext != NULL)
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_convert_context_free(pcre2_convert_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
/*************************************************
* Set values in contexts *
*************************************************/
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
data is given. Only some of the functions are able to test the validity of the
data. */
/* ------------ Compile context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const uint8_t *tables)
{
ccontext->tables = tables;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
{
switch(value)
{
case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
{
ccontext->max_pattern_length = length;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
{
ccontext->max_pattern_compiled_length = length;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{
switch(newline)
{
case PCRE2_NEWLINE_CR:
case PCRE2_NEWLINE_LF:
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
case PCRE2_NEWLINE_NUL:
ccontext->newline_convention = newline;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
{
ccontext->max_varlookbehind = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
{
ccontext->parens_nest_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
{
ccontext->extra_options = options;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t, void *), void *user_data)
{
ccontext->stack_guard = guard;
ccontext->stack_guard_data = user_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
{
if (ccontext == NULL)
return PCRE2_ERROR_NULL;
switch (directive)
{
case PCRE2_OPTIMIZATION_NONE:
ccontext->optimization_flags = 0;
break;
case PCRE2_OPTIMIZATION_FULL:
ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
break;
default:
if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
{
/* Even directive numbers starting from 64 switch a bit on;
* Odd directive numbers starting from 65 switch a bit off */
if ((directive & 1) != 0)
ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
else
ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
return 0;
}
return PCRE2_ERROR_BADOPTION;
}
return 0;
}
/* ------------ Match context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
{
mcontext->callout = callout;
mcontext->callout_data = callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
void *substitute_callout_data)
{
mcontext->substitute_callout = substitute_callout;
mcontext->substitute_callout_data = substitute_callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
PCRE2_SIZE, int, void *),
void *substitute_case_callout_data)
{
mcontext->substitute_case_callout = substitute_case_callout;
mcontext->substitute_case_callout_data = substitute_case_callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->heap_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->match_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->depth_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
/* These functions became obsolete at release 10.30. The first is kept as a
synonym for backwards compatibility. The second now does nothing. */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{
return pcre2_set_depth_limit(mcontext, limit);
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
(void)mcontext;
(void)mymalloc;
(void)myfree;
(void)mydata;
return 0;
}
/* ------------ Convert context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
{
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
ccontext->glob_separator = separator;
return 0;
}
static const char *globpunct =
STR_EXCLAMATION_MARK STR_QUOTATION_MARK STR_NUMBER_SIGN STR_DOLLAR_SIGN
STR_PERCENT_SIGN STR_AMPERSAND STR_APOSTROPHE STR_LEFT_PARENTHESIS
STR_RIGHT_PARENTHESIS STR_ASTERISK STR_PLUS STR_COMMA STR_MINUS STR_DOT
STR_SLASH STR_COLON STR_SEMICOLON STR_LESS_THAN_SIGN STR_EQUALS_SIGN
STR_GREATER_THAN_SIGN STR_QUESTION_MARK STR_COMMERCIAL_AT
STR_LEFT_SQUARE_BRACKET STR_BACKSLASH STR_RIGHT_SQUARE_BRACKET
STR_CIRCUMFLEX_ACCENT STR_UNDERSCORE STR_GRAVE_ACCENT STR_LEFT_CURLY_BRACKET
STR_VERTICAL_LINE STR_RIGHT_CURLY_BRACKET STR_TILDE;
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
{
if (escape > 255 || (escape != 0 && strchr(globpunct, escape) == NULL))
return PCRE2_ERROR_BADDATA;
ccontext->glob_escape = escape;
return 0;
}
/* End of pcre2_context.c */

1261
deps/pcre2/pcre2_convert.c vendored Normal file

File diff suppressed because it is too large Load Diff

4134
deps/pcre2/pcre2_dfa_match.c vendored Normal file

File diff suppressed because it is too large Load Diff

384
deps/pcre2/pcre2_error.c vendored Normal file
View File

@@ -0,0 +1,384 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* The texts of compile-time error messages. Compile-time error numbers start
at COMPILE_ERROR_BASE (100).
This used to be a table of strings, but in order to reduce the number of
relocations needed when a shared library is loaded dynamically, it is now one
long string. We cannot use a table of offsets, because the lengths of inserts
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
pcre2_get_error_message() counts through to the one it wants - this isn't a
performance issue because these strings are used only when there is an error.
Each substring ends with \0 to insert a null character. This includes the final
substring, so that the whole string ends with \0\0, which can be detected when
counting through. */
static const unsigned char compile_error_texts[] =
"no error\0"
"\\ at end of pattern\0"
"\\c at end of pattern\0"
"unrecognized character follows \\\0"
"numbers out of order in {} quantifier\0"
/* 5 */
"number too big in {} quantifier\0"
"missing terminating ] for character class\0"
"escape sequence is invalid in character class\0"
"range out of order in character class\0"
"quantifier does not follow a repeatable item\0"
/* 10 */
"internal error: unexpected repeat\0"
"unrecognized character after (? or (?-\0"
"POSIX named classes are supported only within a class\0"
"POSIX collating elements are not supported\0"
"missing closing parenthesis\0"
/* 15 */
"reference to non-existent subpattern\0"
"pattern passed as NULL with non-zero length\0"
"unrecognised compile-time option bit(s)\0"
"missing ) after (?# comment\0"
"parentheses are too deeply nested\0"
/* 20 */
"regular expression is too large\0"
"failed to allocate heap memory\0"
"unmatched closing parenthesis\0"
"internal error: code overflow\0"
"missing closing parenthesis for condition\0"
/* 25 */
"length of lookbehind assertion is not limited\0"
"a relative value of zero is not allowed\0"
"conditional subpattern contains more than two branches\0"
"atomic assertion expected after (?( or (?(?C)\0"
"digit expected after (?+\0"
/* 30 */
"unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0"
"this version of PCRE2 does not have Unicode support\0"
"parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0"
/* 35 */
"lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0"
/* 40 */
"invalid escape sequence in (*VERB) name\0"
"unrecognized character after (?P\0"
"syntax error in subpattern name (missing terminator?)\0"
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
"subpattern name must start with a non-digit\0"
/* 45 */
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
"malformed \\P or \\p sequence\0"
"unknown property after \\P or \\p\0"
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
"invalid range in character class\0"
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
"internal error: overran compiling workspace\0"
"internal error: previously-checked referenced subpattern not found\0"
"DEFINE subpattern contains more than one branch\0"
/* 55 */
"missing opening brace after \\o\0"
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
"obsolete error (should not occur)\0" /* Was the above */
/* 60 */
"(*VERB) not recognized or malformed\0"
"subpattern number is too big\0"
"subpattern name expected\0"
"internal error: parsed pattern overflow\0"
"non-octal character in \\o{} (closing brace missing?)\0"
/* 65 */
"different names for subpatterns of the same number are not allowed\0"
"(*MARK) must have an argument\0"
"non-hex character in \\x{} (closing brace missing?)\0"
#ifndef EBCDIC
"\\c must be followed by a printable ASCII character\0"
#else
"\\c must be followed by a letter or one of @[\\]^_?\0"
#endif
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown meta code in check_lookbehinds()\0"
"\\N is not supported in a class\0"
"callout string is too long\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"using UTF is disabled by the application\0"
/* 75 */
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
"syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
"missing terminating delimiter for callout with string argument\0"
"unrecognized string delimiter follows (?C\0"
"using \\C is disabled by the application\0"
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
/* 85 */
"using \\C is disabled in this PCRE2 library\0"
"regular expression is too complicated\0"
"lookbehind assertion is too long\0"
"pattern string is longer than the limit set by the application\0"
"internal error: unknown code in parsed pattern\0"
/* 90 */
"internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0"
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
"invalid hyphen in option setting\0"
/* 95 */
"(*alpha_assertion) not recognized\0"
"script runs require Unicode support, which this version of PCRE2 does not have\0"
"too many capturing groups (maximum 65535)\0"
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
/* 100 */
"branch too long in variable-length lookbehind assertion\0"
"compiled pattern would be longer than the limit set by the application\0"
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
"using callouts is disabled by the application\0"
"PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
/* 105 */
"PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
"PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
"extended character class nesting is too deep\0"
"invalid operator in extended character class\0"
"unexpected operator in extended character class (no preceding operand)\0"
/* 110 */
"expected operand after operator in extended character class\0"
"square brackets needed to clarify operator precedence in extended character class\0"
"missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
"unexpected expression in extended character class (no preceding operator)\0"
"empty expression in extended character class\0"
/* 115 */
"terminating ] with no following closing parenthesis in (?[...]\0"
"unexpected character in (?[...]) extended character class\0"
"expected capture group number or name\0"
"missing opening parenthesis\0"
"syntax error in subpattern number (missing terminator?)\0"
/* 120 */
"erroroffset passed as NULL\0"
;
/* Match-time and UTF error texts are in the same format. */
static const unsigned char match_error_texts[] =
"no error\0"
"no match\0"
"partial match\0"
"UTF-8 error: 1 byte missing at end\0"
"UTF-8 error: 2 bytes missing at end\0"
/* 5 */
"UTF-8 error: 3 bytes missing at end\0"
"UTF-8 error: 4 bytes missing at end\0"
"UTF-8 error: 5 bytes missing at end\0"
"UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 6 top bits not 0x80\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0"
/* 20 */
"UTF-8 error: overlong 5-byte sequence\0"
"UTF-8 error: overlong 6-byte sequence\0"
"UTF-8 error: isolated byte with 0x80 bit set\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0"
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad data value\0"
/* 30 */
"patterns do not all use the same character tables\0"
"magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0"
"bad option value\0"
/* 35 */
"invalid replacement string\0"
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
/* 40 */
"backreference condition or recursion test is not supported for DFA matching\0"
"function is not supported for DFA matching\0"
"pattern contains an item that is not supported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
/* 45 */
"bad JIT option\0"
"JIT stack limit reached\0"
"match limit exceeded\0"
"no more memory\0"
"unknown substring\0"
/* 50 */
"non-unique substring name\0"
"NULL argument passed with non-zero length\0"
"nested recursion at the same subject position\0"
"matching depth limit exceeded\0"
"requested value is not available\0"
/* 55 */
"requested value is not set\0"
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
"match with end before start or start moved backwards is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
"invalid syntax\0"
/* 65 */
"internal error: duplicate substitution match\0"
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
"internal error: invalid substring offset\0"
"feature is not supported by the JIT compiler\0"
"error performing replacement case transformation\0"
/* 70 */
"replacement too large (longer than PCRE2_SIZE)\0"
"substitute pattern differs from prior match call\0"
"substitute subject differs from prior match call\0"
"substitute start offset differs from prior match call\0"
"substitute options differ from prior match call\0"
"disallowed use of \\K in lookaround\0"
;
/*************************************************
* Return error message *
*************************************************/
/* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all
distinct.
Arguments:
enumber error number
buffer where to put the message (zero terminated)
size size of the buffer in code units
Returns: length of message if all is well
negative on error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
{
const unsigned char *message;
PCRE2_SIZE i;
int n, rc = 0;
if (size == 0) return PCRE2_ERROR_NOMEMORY;
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
{
message = compile_error_texts;
n = enumber - COMPILE_ERROR_BASE;
}
else if (enumber < 0) /* Match or UTF error */
{
message = match_error_texts;
n = -enumber;
}
else /* Invalid error number */
{
message = (const unsigned char *)"\0"; /* Empty message list */
n = 1;
}
for (; n > 0; n--)
{
while (*message++ != CHAR_NUL) {}
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
}
for (i = 0; *message != 0; i++)
{
if (i >= size - 1)
{
rc = PCRE2_ERROR_NOMEMORY;
break;
}
buffer[i] = *message++;
}
#if defined EBCDIC && 'a' != 0x81
/* If compiling for EBCDIC, but the compiler's string literals are not EBCDIC,
then we are in the "force EBCDIC 1047" mode. I have chosen to add a few lines
here to translate the error strings on the fly, rather than require the string
literals above to be written out arduously using the "STR_XYZ" macros. */
for (PCRE2_SIZE j = 0; j < i; ++j)
buffer[j] = PRIV(ascii_to_ebcdic_1047)[buffer[j]];
#endif
buffer[i] = 0; /* Terminate message, even if truncated. */
return rc? rc : (int)i;
}
/* End of pcre2_error.c */

159
deps/pcre2/pcre2_extuni.c vendored Normal file
View File

@@ -0,0 +1,159 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match a Unicode
extended grapheme sequence. It is used by both pcre2_match() and
pcre2_dfa_match(). However, it is called only when Unicode support is being
compiled. Nevertheless, we provide a dummy function when there is no Unicode
support, because some compilers do not like functionless source files. */
#include "pcre2_internal.h"
/* Dummy function */
#ifndef SUPPORT_UNICODE
PCRE2_SPTR
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
{
(void)c;
(void)eptr;
(void)start_subject;
(void)end_subject;
(void)utf;
(void)xcount;
return NULL;
}
#else
/*************************************************
* Match an extended grapheme sequence *
*************************************************/
/* NOTE: The logic contained in this function is replicated in three special-
purpose functions in the pcre2_jit_compile.c module. If the logic below is
changed, they must be kept in step so that the interpreter and the JIT have the
same behaviour.
Arguments:
c the first character
eptr pointer to next character
start_subject pointer to start of subject
end_subject pointer to end of subject
utf TRUE if in UTF mode
xcount pointer to count of additional characters,
or NULL if count not needed
Returns: pointer after the end of the sequence
*/
PCRE2_SPTR
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
{
BOOL was_ep_ZWJ = FALSE;
int lgb = UCD_GRAPHBREAK(c);
while (eptr < end_subject)
{
int rgb;
int len = 1;
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
rgb = UCD_GRAPHBREAK(c);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
preceded by Extended Pictographic. */
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
break;
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
{
int ricount = 0;
PCRE2_SPTR bptr = eptr - 1;
if (utf) BACKCHAR(bptr);
/* bptr is pointing to the left-hand character */
while (bptr > start_subject)
{
bptr--;
if (utf)
{
BACKCHAR(bptr);
GETCHAR(c, bptr);
}
else
c = *bptr;
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
ricount++;
}
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
between; see next statement). */
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
any number of them before a following ZWJ. */
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
eptr += len;
if (xcount != NULL) *xcount += 1;
}
return eptr;
}
#endif /* SUPPORT_UNICODE */
/* End of pcre2_extuni.c */

217
deps/pcre2/pcre2_find_bracket.c vendored Normal file
View File

@@ -0,0 +1,217 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a single function that scans through a compiled pattern
until it finds a capturing bracket with the given number, or, if the number is
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
function is called from pcre2_compile.c and also from pcre2_study.c when
finding the minimum matching length. */
#include "pcre2_internal.h"
/*************************************************
* Scan compiled regex for specific bracket *
*************************************************/
/*
Arguments:
code points to start of expression
utf TRUE in UTF mode
number the required bracket number or negative to find a lookbehind
Returns: pointer to the opcode for the bracket, or NULL if not found
*/
PCRE2_SPTR
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
{
for (;;)
{
PCRE2_UCHAR c = *code;
if (c == OP_END) return NULL;
/* XCLASS is used for classes that cannot be represented just by a bit map.
This includes negated single high-valued characters. ECLASS is used for
classes that use set operations internally. CALLOUT_STR is used for
callouts with string arguments. In each case the length in the table is
zero; the actual length is stored in the compiled code. */
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
/* Handle lookbehind */
else if (c == OP_REVERSE || c == OP_VREVERSE)
{
if (number < 0) return code;
code += PRIV(OP_lengths)[c];
}
/* Handle capturing bracket */
else if (c == OP_CBRA || c == OP_SCBRA ||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
{
int n = (int)GET2(code, 1+LINK_SIZE);
if (n == number) return code;
code += PRIV(OP_lengths)[c];
}
/* Otherwise, we can get the item's length from the table, except that for
repeated character types, we have to test for \p and \P, which have an extra
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
must add in its length. */
else
{
switch(c)
{
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSUPTO:
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
code += 2;
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
code += code[1];
break;
}
/* Add in the fixed length from the table */
code += PRIV(OP_lengths)[c];
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
followed by a multi-byte character. The length in the table is a minimum, so
we have to arrange to skip the extra bytes. */
#ifdef MAYBE_UTF_MULTI
if (utf) switch(c)
{
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_EXACT:
case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_UPTO:
case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_PLUS:
case OP_PLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_QUERY:
case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break;
}
#else
(void)(utf); /* Keep compiler happy by referencing function argument */
#endif /* MAYBE_UTF_MULTI */
}
}
}
/* End of pcre2_find_bracket.c */

844
deps/pcre2/pcre2_fuzzsupport.c vendored Normal file
View File

@@ -0,0 +1,844 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/***************************************************************************
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
tries to compile and match it, deriving options from the string itself. If
STANDALONE is defined, a main program that calls the driver with the contents
of specified files is compiled, and commentary on what is happening is output.
If an argument starts with '=' the rest of it it is taken as a literal string
rather than a file name. This allows easy testing of short strings.
Written by Philip Hazel, October 2016
Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
Further updates March/April/May 2024 by PH
***************************************************************************/
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* stack size adjustment */
#include <sys/time.h>
#include <sys/resource.h>
#define STACK_SIZE_MB 256
#define JIT_SIZE_LIMIT (200 * 1024)
#ifndef PCRE2_CODE_UNIT_WIDTH
#define PCRE2_CODE_UNIT_WIDTH 8
#endif
#include "pcre2_internal.h"
#define MAX_MATCH_SIZE 1000
#define DFA_WORKSPACE_COUNT 100
/* When adding new compile or match options, remember to update the functions
below that output them. */
#define ALLOWED_COMPILE_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \
PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
PCRE2_NO_AUTO_CAPTURE| \
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
PCRE2_UTF)
#define ALLOWED_MATCH_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT)
#define BASE_MATCH_OPTIONS \
(PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
static void print_compile_options(FILE *stream, uint32_t compile_options)
{
fprintf(stream, "Compile options %s%.8x =",
(compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
compile_options);
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "",
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
((compile_options & PCRE2_UTF) != 0)? " utf" : "");
}
static void print_match_options(FILE *stream, uint32_t match_options)
{
fprintf(stream, "Match options %s%.8x =",
(match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
}
/* This function can print an error message at all code unit widths. */
static void print_error(FILE *f, int errorcode, const char *text, ...)
{
PCRE2_UCHAR buffer[256];
PCRE2_UCHAR *p = buffer;
va_list ap;
va_start(ap, text);
vfprintf(f, text, ap);
va_end(ap);
pcre2_get_error_message(errorcode, buffer, 256);
while (*p != 0) fprintf(f, "%c", *p++);
printf("\n");
}
#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
#ifdef SUPPORT_JIT
#ifdef SUPPORT_DIFF_FUZZ
static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
{
int errorcode;
for (int index = 0; index < count; index++)
{
PCRE2_UCHAR *bufferptr = NULL;
PCRE2_SIZE bufflen = 0;
errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
&bufflen);
if (errorcode >= 0)
{
fprintf(stream, "Match %d (hex encoded): ", index);
for (PCRE2_SIZE i = 0; i < bufflen; i++)
{
fprintf(stream, "%02x", bufferptr[i]);
}
fprintf(stream, "\n");
}
else
{
print_error(stream, errorcode, "Match %d failed: ", index);
}
}
}
/* This function describes the current test case being evaluated, then aborts */
static void describe_failure(
const char *task,
const PCRE2_UCHAR *data,
PCRE2_SIZE size,
uint32_t compile_options,
uint32_t match_options,
int errorcode,
int errorcode_jit,
int matches,
int matches_jit,
pcre2_match_data *match_data,
pcre2_match_data *match_data_jit
) {
fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
fprintf(stderr, "Pattern/sample string (hex encoded): ");
for (size_t i = 0; i < size; i++)
{
fprintf(stderr, "%02x", data[i]);
}
fprintf(stderr, "\n");
print_compile_options(stderr, compile_options);
print_match_options(stderr, match_options);
if (errorcode < 0)
{
print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
}
if (matches >= 0)
{
fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
if (match_data != NULL)
{
fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
dump_matches(stderr, matches, match_data);
fprintf(stderr, "\n");
}
}
if (errorcode_jit < 0)
{
print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
errorcode_jit);
}
if (matches_jit >= 0)
{
fprintf(stderr, "JIT'd operation did not emit an error.\n");
if (match_data_jit != NULL)
{
fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
dump_matches(stderr, matches_jit, match_data_jit);
fprintf(stderr, "\n");
}
}
abort();
}
#endif /* SUPPORT_DIFF_FUZZ */
#endif /* SUPPORT_JIT */
/* This is the callout function. Its only purpose is to halt matching if there
are more than 100 callouts, as one way of stopping too much time being spent on
fruitless matches. The callout data is a pointer to the counter. */
static int callout_function(pcre2_callout_block *cb, void *callout_data)
{
(void)cb; /* Avoid unused parameter warning */
*((uint32_t *)callout_data) += 1;
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
}
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */
int LLVMFuzzerInitialize(int *, char ***);
int LLVMFuzzerTestOneInput(unsigned char *, size_t);
int LLVMFuzzerInitialize(int *argc, char ***argv)
{
int rc;
struct rlimit rlim;
getrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
if (rlim.rlim_cur > rlim.rlim_max)
{
fprintf(stderr, "Hard stack size limit is too small\n");
_exit(1);
}
rc = setrlimit(RLIMIT_STACK, &rlim);
if (rc != 0)
{
fprintf(stderr, "Failed to expand stack size\n");
_exit(1);
}
(void)argc; /* Avoid "unused parameter" warnings */
(void)argv;
return 0;
}
/* Here's the driving function. */
int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
{
PCRE2_UCHAR *wdata;
PCRE2_UCHAR *newwdata = NULL;
uint32_t compile_options;
uint32_t match_options;
uint64_t random_options;
pcre2_match_data *match_data = NULL;
#ifdef SUPPORT_JIT
pcre2_match_data *match_data_jit = NULL;
#endif
pcre2_compile_context *compile_context = NULL;
pcre2_match_context *match_context = NULL;
size_t match_size;
int dfa_workspace[DFA_WORKSPACE_COUNT];
if (size < sizeof(random_options)) return -1;
random_options = *(uint64_t *)(data);
data += sizeof(random_options);
wdata = (PCRE2_UCHAR *)data;
size -= sizeof(random_options);
size /= PCRE2_CODE_UNIT_WIDTH / 8;
/* PCRE2 compiles quantified groups by replicating them. In certain cases of
very large quantifiers this can lead to unacceptably long JIT compile times. To
get around this, we scan the data string for large quantifiers that follow a
closing parenthesis, and reduce the value of the quantifier to 10, assuming
that this will make minimal difference to the detection of bugs.
Do the same for quantifiers that follow a closing square bracket, because
classes that contain a number of non-ascii characters can take a lot of time
when matching.
We have to make a copy of the input because oss-fuzz complains if we overwrite
the original. Start the scan at the second character so there can be a
lookbehind for a backslash, and end it before the end so that the next
character can be checked for an opening brace. */
if (size > 3)
{
newwdata = malloc(size * sizeof(PCRE2_UCHAR));
memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
wdata = newwdata;
for (size_t i = 1; i < size - 2; i++)
{
size_t j;
if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
wdata[i+1] != '{')
continue;
i++; /* Points to '{' */
/* Loop for two values in a quantifier. Offset i points to brace or comma
at the start of the loop. */
for (int ii = 0; ii < 2; ii++)
{
int q = 0;
if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
/* Ignore leading spaces. */
while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
{
i++;
if (i >= size - 1) goto END_QSCAN;
}
/* Ignore non-significant leading zeros. */
while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' &&
wdata[i+2] <= '9')
{
i++;
if (i >= size - 1) goto END_QSCAN;
}
/* Scan for a number ending in brace, or comma in the first iteration,
optionally preceded by space. */
for (j = i + 1; j < size && j < i + 7; j++)
{
if (wdata[j] == ' ' || wdata[j] == '\t')
{
j++;
while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
if (j >= size) goto OUTERLOOP;
if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
}
if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
if (wdata[j] < '0' || wdata[j] > '9')
{
j--; /* Ensure this character is checked next. The */
goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
}
q = q * 10 + (wdata[j] - '0');
}
if (j >= size) goto END_QSCAN; /* End of data */
/* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which
is the maximum quantifier. Leave such numbers alone. */
if (j >= i + 7 || q > 65535) goto OUTERLOOP;
/* Limit the quantifier size to 10 */
if (q > 10)
{
#ifdef STANDALONE
printf("Reduced quantifier value %d to 10.\n", q);
#endif
for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
wdata[j - 2] = '1';
}
/* Advance to end of number and break if reached closing brace (continue
after comma, which is only valid in the first time round this loop). */
i = j;
if (wdata[i] == '}') break;
}
/* Continue along the data string */
OUTERLOOP:
i = j;
continue;
}
}
END_QSCAN:
/* Limiting the length of the subject for matching stops fruitless searches
in large trees taking too much time. */
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
/* Create a compile context, and set a limit on the size of the compiled
pattern. This stops the fuzzer using vast amounts of memory. */
compile_context = pcre2_compile_context_create(NULL);
if (compile_context == NULL)
{
#ifdef STANDALONE
fprintf(stderr, "** Failed to create compile context block\n");
#endif
abort();
}
pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
/* Ensure that all undefined option bits are zero (waste of time trying them)
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
because \C in random patterns is highly likely to cause a crash. */
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
PCRE2_NEVER_BACKSLASH_C;
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
BASE_MATCH_OPTIONS;
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
allowed together and just give an immediate error return. */
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
/* Do the compile with and without the options, and after a successful compile,
likewise do the match with and without the options. */
for (int i = 0; i < 2; i++)
{
uint32_t callout_count;
int errorcode;
#ifdef SUPPORT_JIT
int errorcode_jit;
#ifdef SUPPORT_DIFF_FUZZ
int matches = 0;
int matches_jit = 0;
#endif
#endif
PCRE2_SIZE erroroffset;
pcre2_code *code;
#ifdef STANDALONE
printf("\n");
print_compile_options(stdout, compile_options);
#endif
code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
&errorcode, &erroroffset, compile_context);
/* Compilation succeeded */
if (code != NULL)
{
int j;
uint32_t save_match_options = match_options;
/* Call JIT compile only if the compiled pattern is not too big. */
#ifdef SUPPORT_JIT
int jit_ret = -1;
if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
{
#ifdef STANDALONE
printf("Compile succeeded; calling JIT compile\n");
#endif
jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
#ifdef STANDALONE
if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
#endif
}
else
{
#ifdef STANDALONE
printf("Not calling JIT: compiled pattern is too long "
"(%ld bytes; limit=%d)\n",
((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
#endif
}
#endif /* SUPPORT_JIT */
/* Create match data and context blocks only when we first need them. Set
low match and depth limits to avoid wasting too much searching large
pattern trees. Almost all matches are going to fail. */
if (match_data == NULL)
{
match_data = pcre2_match_data_create(32, NULL);
#ifdef SUPPORT_JIT
match_data_jit = pcre2_match_data_create(32, NULL);
if (match_data == NULL || match_data_jit == NULL)
#else
if (match_data == NULL)
#endif
{
#ifdef STANDALONE
fprintf(stderr, "** Failed to create match data block\n");
#endif
abort();
}
}
if (match_context == NULL)
{
match_context = pcre2_match_context_create(NULL);
if (match_context == NULL)
{
#ifdef STANDALONE
fprintf(stderr, "** Failed to create match context block\n");
#endif
abort();
}
(void)pcre2_set_match_limit(match_context, 100);
(void)pcre2_set_depth_limit(match_context, 100);
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
}
/* Match twice, with and without options. */
#ifdef STANDALONE
printf("\n");
#endif
for (j = 0; j < 2; j++)
{
#ifdef STANDALONE
print_match_options(stdout, match_options);
#endif
callout_count = 0;
errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
match_options, match_data, match_context);
#ifdef STANDALONE
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
#endif
/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
with the interpreter. */
#ifdef SUPPORT_JIT
if (jit_ret >= 0)
{
#ifdef STANDALONE
printf("Matching with JIT\n");
#endif
callout_count = 0;
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
#ifdef STANDALONE
if (errorcode_jit >= 0)
printf("Match returned %d\n", errorcode_jit);
else
print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
errorcode_jit);
#else
(void)errorcode_jit; /* Avoid compiler warning */
#endif /* STANDALONE */
/* With differential matching enabled, compare with interpreter. */
#ifdef SUPPORT_DIFF_FUZZ
matches = errorcode;
matches_jit = errorcode_jit;
if (errorcode_jit != errorcode)
{
if (!(errorcode < 0 && errorcode_jit < 0) &&
errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
{
describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
}
}
else
{
for (int index = 0; index < errorcode; index++)
{
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
PCRE2_SIZE bufflen, bufflen_jit;
bufferptr = bufferptr_jit = NULL;
bufflen = bufflen_jit = 0;
errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
if (errorcode != errorcode_jit)
{
describe_failure("match entry errorcode comparison", wdata, size,
compile_options, match_options, errorcode, errorcode_jit,
matches, matches_jit, match_data, match_data_jit);
}
if (errorcode >= 0)
{
if (bufflen != bufflen_jit)
{
describe_failure("match entry length comparison", wdata, size,
compile_options, match_options, errorcode, errorcode_jit,
matches, matches_jit, match_data, match_data_jit);
}
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
{
describe_failure("match entry content comparison", wdata, size,
compile_options, match_options, errorcode, errorcode_jit,
matches, matches_jit, match_data, match_data_jit);
}
}
pcre2_substring_free(bufferptr);
pcre2_substring_free(bufferptr_jit);
}
}
#endif /* SUPPORT_DIFF_FUZZ */
}
#endif /* SUPPORT_JIT */
if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
match_options = BASE_MATCH_OPTIONS; /* For second time */
}
/* Match with DFA twice, with and without options, but remove options that
are not allowed with DFA. */
match_options = save_match_options & ~BASE_MATCH_OPTIONS;
#ifdef STANDALONE
printf("\n");
#endif
for (j = 0; j < 2; j++)
{
#ifdef STANDALONE
printf("DFA match options %.8x =", match_options);
printf("%s%s%s%s%s%s%s%s%s\n",
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
#endif
callout_count = 0;
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
(PCRE2_SIZE)match_size, 0, match_options, match_data,
match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
#ifdef STANDALONE
if (errorcode >= 0)
printf("Match returned %d\n", errorcode);
else
print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
#endif
if (match_options == 0) break; /* No point doing same twice */
match_options = 0; /* For second time */
}
match_options = save_match_options; /* Reset for the second compile */
pcre2_code_free(code);
}
/* Compilation failed */
else
{
#ifdef STANDALONE
print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
erroroffset);
#else
if (errorcode == PCRE2_ERROR_INTERNAL) abort();
#endif
}
if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
}
/* Tidy up before exiting */
if (match_data != NULL) pcre2_match_data_free(match_data);
#ifdef SUPPORT_JIT
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
#endif
free(newwdata);
if (match_context != NULL) pcre2_match_context_free(match_context);
if (compile_context != NULL) pcre2_compile_context_free(compile_context);
return 0;
}
/* Optional main program. */
#ifdef STANDALONE
int main(int argc, char **argv)
{
LLVMFuzzerInitialize(&argc, &argv);
if (argc < 2)
{
printf("** No arguments given\n");
return 0;
}
for (int i = 1; i < argc; i++)
{
size_t filelen;
size_t readsize;
unsigned char *buffer;
FILE *f;
/* Handle a literal string. Copy to an exact size buffer so that checks for
overrunning work. */
if (argv[i][0] == '=')
{
readsize = strlen(argv[i]) - 1;
printf("------ <Literal> ------\n");
printf("Length = %lu\n", readsize);
printf("%.*s\n", (int)readsize, argv[i]+1);
buffer = (unsigned char *)malloc(readsize);
if (buffer == NULL)
printf("** Failed to allocate %lu bytes of memory\n", readsize);
else
{
memcpy(buffer, argv[i]+1, readsize);
LLVMFuzzerTestOneInput(buffer, readsize);
free(buffer);
}
continue;
}
/* Handle a string given in a file */
f = fopen(argv[i], "rb");
if (f == NULL)
{
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
continue;
}
printf("------ %s ------\n", argv[i]);
fseek(f, 0, SEEK_END);
filelen = ftell(f);
fseek(f, 0, SEEK_SET);
buffer = (unsigned char *)malloc(filelen);
if (buffer == NULL)
{
printf("** Failed to allocate %lu bytes of memory\n", filelen);
fclose(f);
continue;
}
readsize = fread(buffer, 1, filelen, f);
fclose(f);
if (readsize != filelen)
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
else
{
printf("Length = %lu\n", filelen);
LLVMFuzzerTestOneInput(buffer, filelen);
}
free(buffer);
}
return 0;
}
#endif /* STANDALONE */
/* End */

2353
deps/pcre2/pcre2_internal.h vendored Normal file

File diff suppressed because it is too large Load Diff

1044
deps/pcre2/pcre2_intmodedep.h vendored Normal file

File diff suppressed because it is too large Load Diff

177
deps/pcre2/pcre2_maketables.c vendored Normal file
View File

@@ -0,0 +1,177 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre2_maketables(), which builds
character tables for PCRE2 in the current locale. The file is compiled on its
own as part of the PCRE2 library. It is also included in the compilation of
pcre2_dftables.c as a freestanding program, in which case the macro
PCRE2_DFTABLES is defined. */
#ifndef PCRE2_DFTABLES /* Compiling the library */
#include "pcre2_internal.h"
#endif
/*************************************************
* Create PCRE2 character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE2 and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
freestanding auxiliary program) malloc() is used, and the function has a
different name so as not to clash with the prototype in pcre2.h.
Arguments: pointers to character-transforming functions when PCRE2_DFTABLES is
defined;
else a PCRE2 general context or NULL
Returns: pointer to the contiguous block of data;
else NULL if memory allocation failed
*/
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
static const uint8_t *maketables(int (*charfn_to)(int), int (*charfn_from)(int))
{
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
pcre2_maketables(pcre2_general_context *gcontext)
{
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
malloc(TABLES_LENGTH));
#define charfn_to(c) (c)
#define charfn_from(c) (c)
#endif /* PCRE2_DFTABLES */
int i;
uint8_t *p;
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++)
{
int c = charfn_from(tolower(charfn_to(i)));
*p++ = (c < 256)? c : i;
}
/* Next the case-flipping table */
for (i = 0; i < 256; i++)
{
int c = charfn_from(islower(charfn_to(i))? toupper(charfn_to(i))
: tolower(charfn_to(i)));
*p++ = (c < 256)? c : i;
}
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
space, because Perl added VT at release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(charfn_to(i))) p[cbit_digit + i/8] |= 1u << (i&7);
if (isupper(charfn_to(i))) p[cbit_upper + i/8] |= 1u << (i&7);
if (islower(charfn_to(i))) p[cbit_lower + i/8] |= 1u << (i&7);
if (isalnum(charfn_to(i))) p[cbit_word + i/8] |= 1u << (i&7);
if (i == CHAR_UNDERSCORE) p[cbit_word + i/8] |= 1u << (i&7);
if (isspace(charfn_to(i))) p[cbit_space + i/8] |= 1u << (i&7);
if (isxdigit(charfn_to(i))) p[cbit_xdigit + i/8] |= 1u << (i&7);
if (isgraph(charfn_to(i))) p[cbit_graph + i/8] |= 1u << (i&7);
if (isprint(charfn_to(i))) p[cbit_print + i/8] |= 1u << (i&7);
if (ispunct(charfn_to(i))) p[cbit_punct + i/8] |= 1u << (i&7);
if (iscntrl(charfn_to(i))) p[cbit_cntrl + i/8] |= 1u << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
comments within regexes. However, Perl changed at release 5.18, so PCRE1
changed at release 8.34 and it's always been this way for PCRE2. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (isspace(charfn_to(i))) x += ctype_space;
if (isalpha(charfn_to(i))) x += ctype_letter;
if (islower(charfn_to(i))) x += ctype_lcletter;
if (isdigit(charfn_to(i))) x += ctype_digit;
if (isalnum(charfn_to(i)) || i == CHAR_UNDERSCORE) x += ctype_word;
*p++ = x;
}
return yield;
}
#ifndef PCRE2_DFTABLES /* Compiling the library */
#undef charfn_to
#undef charfn_from
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
{
if (gcontext != NULL)
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
else
free((void *)tables);
}
#endif
/* End of pcre2_maketables.c */

8244
deps/pcre2/pcre2_match.c vendored Normal file

File diff suppressed because it is too large Load Diff

184
deps/pcre2/pcre2_match_data.c vendored Normal file
View File

@@ -0,0 +1,184 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/*************************************************
* Create a match data block given ovector size *
*************************************************/
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
imposed because the oveccount field in a match data block is uintt6_t. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
yield->flags = 0;
yield->heapframes = NULL;
yield->heapframes_size = 0;
return yield;
}
/*************************************************
* Create a match data block using pattern data *
*************************************************/
/* If no context is supplied, use the memory allocator from the code. This code
assumes that a general context contains nothing other than a memory allocator.
If that ever changes, this code will need fixing. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
pcre2_general_context *gcontext)
{
if (code == NULL) return NULL;
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
gcontext);
}
/*************************************************
* Free a match data block *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
{
if (match_data->heapframes != NULL)
match_data->memctl.free(match_data->heapframes,
match_data->memctl.memory_data);
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
match_data->memctl.free((void *)match_data->subject,
match_data->memctl.memory_data);
match_data->memctl.free(match_data, match_data->memctl.memory_data);
}
}
/*************************************************
* Get last mark in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
pcre2_get_mark(pcre2_match_data *match_data)
{
return match_data->mark;
}
/*************************************************
* Get pointer to ovector *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
{
return match_data->ovector;
}
/*************************************************
* Get number of ovector slots *
*************************************************/
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
pcre2_get_ovector_count(pcre2_match_data *match_data)
{
return match_data->oveccount;
}
/*************************************************
* Get starting code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_startchar(pcre2_match_data *match_data)
{
return match_data->startchar;
}
/*************************************************
* Get size of match data block *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_match_data_size(pcre2_match_data *match_data)
{
return offsetof(pcre2_match_data, ovector) +
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
}
/*************************************************
* Get heapframes size *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
{
return match_data->heapframes_size;
}
/* End of pcre2_match_data.c */

171
deps/pcre2/pcre2_match_next.c vendored Normal file
View File

@@ -0,0 +1,171 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/* Advance the offset by one code unit, and return the new value.
It is only called when the offset is not at the end of the subject. */
static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
PCRE2_SIZE offset)
{
PCRE2_SPTR subject = match_data->subject;
PCRE2_SIZE subject_length = match_data->subject_length;
#ifdef SUPPORT_UNICODE
BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
#endif
/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
sequence. */
if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
subject[offset + 1] == CHAR_LF)
{
switch(match_data->code->newline_convention)
{
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
return offset + 2;
}
}
/* Advance by one full character if in UTF mode. */
#ifdef SUPPORT_UNICODE
if (utf)
{
PCRE2_SPTR next = subject + offset + 1;
PCRE2_SPTR subject_end = subject + subject_length;
(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
FORWARDCHARTEST(next, subject_end);
return next - subject;
}
#endif
return offset + 1;
}
/*************************************************
* Advance the match *
*************************************************/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,
uint32_t *poptions)
{
int rc = match_data->rc;
PCRE2_SIZE start_offset = match_data->start_offset;
PCRE2_SIZE *ovector = match_data->ovector;
/* Match error, or no match: no further iteration possible. In previous versions
of PCRE2, we recommended that clients use a strategy which involved retrying in
certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */
if (rc < 0)
return FALSE;
/* Match succeeded: get the start offset for the next match */
/* Although \K can affect the position of ovector[0], there are no ways to do
anything surprising with ovector[1], which must always be >= start_offset. */
PCRE2_ASSERT(ovector[1] >= start_offset);
/* Special handling for patterns which contain \K in a lookaround, which enables
the match start to be pushed back to before the starting search offset
(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
This is not a problem if ovector[1] > start_offset, because in this case, we can
just attempt the next match at ovector[1]: we are making progress, which is all
that we require.
However, if we have ovector[1] == start_offset, then we have a very rare case
which must be handled specially, because it's a non-empty match which
nonetheless fails to make progress through the subject. */
if (ovector[0] != start_offset && ovector[1] == start_offset)
{
/* If the match end is at the end of the subject, we are done. */
if (start_offset >= match_data->subject_length)
return FALSE;
/* Otherwise, bump along by one code unit, and do a normal search. */
*pstart_offset = do_bumpalong(match_data, ovector[1]);
*poptions = 0;
return TRUE;
}
/* If the previous match was for an empty string, we are finished if we are at
the end of the subject. Otherwise, arrange to run another match at the same
point to see if a non-empty match can be found. */
if (ovector[0] == ovector[1])
{
/* If the match is at the end of the subject, we are done. */
if (ovector[0] >= match_data->subject_length)
return FALSE;
/* Otherwise, continue at this exact same point, but we must set the flag
which ensures that we don't return the exact same empty match again. */
*pstart_offset = ovector[1];
*poptions = PCRE2_NOTEMPTY_ATSTART;
return TRUE;
}
/* Finally, we must be in the happy state of a non-empty match, where the end of
the match is further on in the subject than start_offset, so we are easily able
to continue and make progress. */
*pstart_offset = ovector[1];
*poptions = 0;
return TRUE;
}
/* End of pcre2_match_next.c */

239
deps/pcre2/pcre2_newline.c vendored Normal file
View File

@@ -0,0 +1,239 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE2 supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#include "pcre2_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
#ifdef SUPPORT_UNICODE
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* This function is called only via the WAS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
value of ptr is greater than the start of the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
ptr--;
#ifdef SUPPORT_UNICODE
if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
/* End of pcre2_newline.c */

118
deps/pcre2/pcre2_ord2utf.c vendored Normal file
View File

@@ -0,0 +1,118 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains a function that converts a Unicode character code point
into a UTF string. The behaviour is different for each code unit width. */
#include "pcre2_internal.h"
/* If SUPPORT_UNICODE is not defined, this function will never be called.
Supply a dummy function because some compilers do not like empty source
modules. */
#ifndef SUPPORT_UNICODE
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
(void)(cvalue);
(void)(buffer);
return 0;
}
#else /* SUPPORT_UNICODE */
/*************************************************
* Convert code point to UTF *
*************************************************/
/*
Arguments:
cvalue the character value
buffer pointer to buffer for result
Returns: number of code units placed in the buffer
*/
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
/* Convert to UTF-8 */
#if PCRE2_CODE_UNIT_WIDTH == 8
unsigned int i;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
for (unsigned int j = i; j != 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = (PCRE2_UCHAR)(PRIV(utf8_table2)[i] | (int)cvalue);
return i + 1;
/* Convert to UTF-16 */
#elif PCRE2_CODE_UNIT_WIDTH == 16
if (cvalue <= 0xffff)
{
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
}
cvalue -= 0x10000;
*buffer++ = 0xd800 | (cvalue >> 10);
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;
/* Convert to UTF-32 */
#else
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
#endif
}
#endif /* SUPPORT_UNICODE */
/* End of pcre2_ord2utf.c */

430
deps/pcre2/pcre2_pattern_info.c vendored Normal file
View File

@@ -0,0 +1,430 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/*
Arguments:
code points to compiled code
what what information is required
where where to put the information; if NULL, return length
Returns: 0 when data returned
> 0 when length requested
< 0 on error or unset value
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
{
const pcre2_real_code *re = (const pcre2_real_code *)code;
if (where == NULL) /* Requests field length */
{
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
case PCRE2_INFO_ARGOPTIONS:
case PCRE2_INFO_BACKREFMAX:
case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_DEPTHLIMIT:
case PCRE2_INFO_EXTRAOPTIONS:
case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_HEAPLIMIT:
case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT:
case PCRE2_INFO_MATCHEMPTY:
case PCRE2_INFO_MATCHLIMIT:
case PCRE2_INFO_MAXLOOKBEHIND:
case PCRE2_INFO_MINLENGTH:
case PCRE2_INFO_NAMEENTRYSIZE:
case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE:
return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP:
return sizeof(const uint8_t *);
case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE:
case PCRE2_INFO_FRAMESIZE:
return sizeof(size_t);
case PCRE2_INFO_NAMETABLE:
return sizeof(PCRE2_SPTR);
}
}
if (re == NULL) return PCRE2_ERROR_NULL;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
*((uint32_t *)where) = re->overall_options;
break;
case PCRE2_INFO_ARGOPTIONS:
*((uint32_t *)where) = re->compile_options;
break;
case PCRE2_INFO_BACKREFMAX:
*((uint32_t *)where) = re->top_backref;
break;
case PCRE2_INFO_BSR:
*((uint32_t *)where) = re->bsr_convention;
break;
case PCRE2_INFO_CAPTURECOUNT:
*((uint32_t *)where) = re->top_bracket;
break;
case PCRE2_INFO_DEPTHLIMIT:
*((uint32_t *)where) = re->limit_depth;
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_EXTRAOPTIONS:
*((uint32_t *)where) = re->extra_options;
break;
case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
break;
case PCRE2_INFO_FIRSTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
re->first_codeunit : 0;
break;
case PCRE2_INFO_FIRSTBITMAP:
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
&(re->start_bitmap[0]) : NULL;
break;
case PCRE2_INFO_FRAMESIZE:
*((size_t *)where) = offsetof(heapframe, ovector) +
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
break;
case PCRE2_INFO_HASBACKSLASHC:
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
break;
case PCRE2_INFO_HASCRORLF:
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break;
case PCRE2_INFO_HEAPLIMIT:
*((uint32_t *)where) = re->limit_heap;
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break;
case PCRE2_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) = (re->executable_jit != NULL)?
PRIV(jit_get_size)(re->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE2_INFO_LASTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
break;
case PCRE2_INFO_LASTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
re->last_codeunit : 0;
break;
case PCRE2_INFO_MATCHEMPTY:
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
break;
case PCRE2_INFO_MATCHLIMIT:
*((uint32_t *)where) = re->limit_match;
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_MAXLOOKBEHIND:
*((uint32_t *)where) = re->max_lookbehind;
break;
case PCRE2_INFO_MINLENGTH:
*((uint32_t *)where) = re->minlength;
break;
case PCRE2_INFO_NAMEENTRYSIZE:
*((uint32_t *)where) = re->name_entry_size;
break;
case PCRE2_INFO_NAMECOUNT:
*((uint32_t *)where) = re->name_count;
break;
case PCRE2_INFO_NAMETABLE:
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +
sizeof(pcre2_real_code));
break;
case PCRE2_INFO_NEWLINE:
*((uint32_t *)where) = re->newline_convention;
break;
case PCRE2_INFO_SIZE:
*((size_t *)where) = re->blocksize;
break;
default: return PCRE2_ERROR_BADOPTION;
}
return 0;
}
/*************************************************
* Callout enumerator *
*************************************************/
/*
Arguments:
code points to compiled code
callback function called for each callout block
callout_data user data passed to the callback
Returns: 0 when successfully completed
< 0 on local error
!= 0 for callback error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code *code,
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
{
const pcre2_real_code *re = (const pcre2_real_code *)code;
pcre2_callout_enumerate_block cb;
PCRE2_SPTR cc;
#ifdef SUPPORT_UNICODE
BOOL utf;
#endif
if (re == NULL) return PCRE2_ERROR_NULL;
#ifdef SUPPORT_UNICODE
utf = (re->overall_options & PCRE2_UTF) != 0;
#endif
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
cb.version = 0;
cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
while (TRUE)
{
int rc;
switch (*cc)
{
case OP_END:
return 0;
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_UPTO:
case OP_MINUPTO:
case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
case OP_TYPEPOSUPTO:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
#endif
break;
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
case OP_ECLASS:
cc += GET(cc, 1);
break;
#endif
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
cc += PRIV(OP_lengths)[*cc] + cc[1];
break;
case OP_CALLOUT:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = cc[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string_length = 0;
cb.callout_string = NULL;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += PRIV(OP_lengths)[*cc];
break;
case OP_CALLOUT_STR:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = 0;
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
cb.callout_string_length =
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += GET(cc, 1 + 2*LINK_SIZE);
break;
default:
cc += PRIV(OP_lengths)[*cc];
break;
}
}
}
/* End of pcre2_pattern_info.c */

1098
deps/pcre2/pcre2_printint_inc.h vendored Normal file

File diff suppressed because it is too large Load Diff

343
deps/pcre2/pcre2_script_run.c vendored Normal file
View File

@@ -0,0 +1,343 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the function for checking a script run. */
#include "pcre2_internal.h"
/*************************************************
* Check script run *
*************************************************/
/* A script run is conceptually a sequence of characters all in the same
Unicode script. However, it isn't quite that simple. There are special rules
for scripts that are commonly used together, and also special rules for digits.
This function implements the appropriate checks, which is possible only when
PCRE2 is compiled with Unicode support. The function returns TRUE if there is
no Unicode support; however, it should never be called in that circumstance
because an error is given by pcre2_compile() if a script run is called for in a
version of PCRE2 compiled without Unicode support.
Arguments:
pgr point to the first character
endptr point after the last character
utf TRUE if in UTF mode
Returns: TRUE if this is a valid script run
*/
/* These are states in the checking process. */
enum { SCRIPT_UNSET, /* Requirement as yet unknown */
SCRIPT_MAP, /* Bitmap contains acceptable scripts */
SCRIPT_HANPENDING, /* Have had only Han characters */
SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */
SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */
SCRIPT_HANHANGUL /* Expect Han or Hangul */
};
#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
BOOL
PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
{
#ifdef SUPPORT_UNICODE
uint32_t require_state = SCRIPT_UNSET;
uint32_t require_map[FULL_MAPSIZE];
uint32_t map[FULL_MAPSIZE];
uint32_t require_digitset = 0;
uint32_t c;
#if PCRE2_CODE_UNIT_WIDTH == 32
(void)utf; /* Avoid compiler warning */
#endif
/* Any string containing fewer than 2 characters is a valid script run. */
if (ptr >= endptr) return TRUE;
GETCHARINCTEST(c, ptr);
if (ptr >= endptr) return TRUE;
/* Initialize the require map. This is a full-size bitmap that has a bit for
every script, as opposed to the maps in ucd_script_sets, which only have bits
for scripts less than ucp_Unknown - those that appear in script extension
lists. */
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
/* Scan strings of two or more characters, checking the Unicode characteristics
of each code point. There is special code for scripts that can be combined with
characters from the Han Chinese script. This may be used in conjunction with
four other scripts in these combinations:
. Han with Hiragana and Katakana is allowed (for Japanese).
. Han with Bopomofo is allowed (for Taiwanese Mandarin).
. Han with Hangul is allowed (for Korean).
If the first significant character's script is one of the four, the required
script type is immediately known. However, if the first significant
character's script is Han, we have to keep checking for a non-Han character.
Hence the SCRIPT_HANPENDING state. */
for (;;)
{
const ucd_record *ucd = GET_UCD(c);
uint32_t script = ucd->script;
/* If the script is Unknown, the string is not a valid script run. Such
characters can only form script runs of length one (see test above). */
if (script == ucp_Unknown) return FALSE;
/* A character without any script extensions whose script is Inherited or
Common is always accepted with any script. If there are extensions, the
following processing happens for all scripts. */
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
{
BOOL OK;
/* Set up a full-sized map for this character that can include bits for all
scripts. Copy the scriptx map for this character (which covers those
scripts that appear in script extension lists), set the remaining values to
zero, and then, except for Common or Inherited, add this script's bit to
the map. */
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
/* Handle the different checking states */
switch(require_state)
{
/* First significant character - it might follow Common or Inherited
characters that do not have any script extensions. */
case SCRIPT_UNSET:
switch(script)
{
case ucp_Han:
require_state = SCRIPT_HANPENDING;
break;
case ucp_Hiragana:
case ucp_Katakana:
require_state = SCRIPT_HANHIRAKATA;
break;
case ucp_Bopomofo:
require_state = SCRIPT_HANBOPOMOFO;
break;
case ucp_Hangul:
require_state = SCRIPT_HANHANGUL;
break;
default:
memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
require_state = SCRIPT_MAP;
break;
}
break;
/* The first significant character was Han. An inspection of the Unicode
11.0.0 files shows that there are the following types of Script Extension
list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
scripts:
. Bopomofo + Han
. Han + Hiragana + Katakana
. Hiragana + Katakana
. Bopopmofo + Hangul + Han + Hiragana + Katakana
The following code tries to make sense of this. */
#define FOUND_BOPOMOFO 1
#define FOUND_HIRAGANA 2
#define FOUND_KATAKANA 4
#define FOUND_HANGUL 8
case SCRIPT_HANPENDING:
if (script != ucp_Han) /* Another Han does nothing */
{
uint32_t chspecial = 0;
if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL;
if (chspecial == 0) return FALSE; /* Not allowed with Han */
if (chspecial == FOUND_BOPOMOFO)
require_state = SCRIPT_HANBOPOMOFO;
else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
require_state = SCRIPT_HANHIRAKATA;
/* Otherwise this character must be allowed with all of them, so remain
in the pending state. */
}
break;
/* Previously encountered one of the "with Han" scripts. Check that
this character is appropriate. */
case SCRIPT_HANHIRAKATA:
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
MAPBIT(map, ucp_Katakana) == 0) return FALSE;
break;
case SCRIPT_HANBOPOMOFO:
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
break;
case SCRIPT_HANHANGUL:
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
break;
/* Previously encountered one or more characters that are allowed with a
list of scripts. */
case SCRIPT_MAP:
OK = FALSE;
for (int i = 0; i < FULL_MAPSIZE; i++)
{
if ((require_map[i] & map[i]) != 0)
{
OK = TRUE;
break;
}
}
if (!OK) return FALSE;
/* The rest of the string must be in this script, but we have to
allow for the Han complications. */
switch(script)
{
case ucp_Han:
require_state = SCRIPT_HANPENDING;
break;
case ucp_Hiragana:
case ucp_Katakana:
require_state = SCRIPT_HANHIRAKATA;
break;
case ucp_Bopomofo:
require_state = SCRIPT_HANBOPOMOFO;
break;
case ucp_Hangul:
require_state = SCRIPT_HANHANGUL;
break;
/* Compute the intersection of the required list of scripts and the
allowed scripts for this character. */
default:
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
break;
}
break;
}
} /* End checking character's script and extensions. */
/* The character is in an acceptable script. We must now ensure that all
decimal digits in the string come from the same set. Some scripts (e.g.
Common, Arabic) have more than one set of decimal digits. This code does
not allow mixing sets, even within the same script. The vector called
PRIV(ucd_digit_sets)[] contains, in its first element, the number of
following elements, and then, in ascending order, the code points of the
'9' characters in every set of 10 digits. Each set is identified by the
offset in the vector of its '9' character. An initial check of the first
value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
if (ucd->chartype == ucp_Nd)
{
uint32_t digitset;
if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
{
int mid;
int bot = 1;
int top = PRIV(ucd_digit_sets)[0];
for (;;)
{
if (top <= bot + 1) /* <= rather than == is paranoia */
{
digitset = top;
break;
}
mid = (top + bot) / 2;
if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
}
}
/* A required value of 0 means "unset". */
if (require_digitset == 0) require_digitset = digitset;
else if (digitset != require_digitset) return FALSE;
} /* End digit handling */
/* If we haven't yet got to the end, pick up the next character. */
if (ptr >= endptr) return TRUE;
GETCHARINCTEST(c, ptr);
} /* End checking loop */
#else /* NOT SUPPORT_UNICODE */
(void)ptr;
(void)endptr;
(void)utf;
return TRUE;
#endif /* SUPPORT_UNICODE */
}
/* End of pcre2_script_run.c */

284
deps/pcre2/pcre2_serialize.c vendored Normal file
View File

@@ -0,0 +1,284 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains functions for serializing and deserializing
a sequence of compiled codes. */
#include "pcre2_internal.h"
/* Magic number to provide a small check against being handed junk. */
#define SERIALIZED_DATA_MAGIC 0x50523253u
/* Deserialization is limited to the current PCRE version and
character width. */
#define SERIALIZED_DATA_VERSION \
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
#define SERIALIZED_DATA_CONFIG \
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
/*************************************************
* Serialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
pcre2_general_context *gcontext)
{
uint8_t *bytes;
uint8_t *dst_bytes;
int32_t i;
PCRE2_SIZE total_size;
const pcre2_real_code *re;
const uint8_t *tables;
pcre2_serialized_data *data;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
/* Compute total size. */
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
tables = NULL;
for (i = 0; i < number_of_codes; i++)
{
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
re = (const pcre2_real_code *)(codes[i]);
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
if (tables == NULL)
tables = re->tables;
else if (tables != re->tables)
return PCRE2_ERROR_MIXEDTABLES;
total_size += re->blocksize;
}
/* Initialize the byte stream. */
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
/* The controller is stored as a hidden parameter. */
memcpy(bytes, memctl, sizeof(pcre2_memctl));
bytes += sizeof(pcre2_memctl);
data = (pcre2_serialized_data *)bytes;
data->magic = SERIALIZED_DATA_MAGIC;
data->version = SERIALIZED_DATA_VERSION;
data->config = SERIALIZED_DATA_CONFIG;
data->number_of_codes = number_of_codes;
/* Copy all compiled code data. */
dst_bytes = bytes + sizeof(pcre2_serialized_data);
memcpy(dst_bytes, tables, TABLES_LENGTH);
dst_bytes += TABLES_LENGTH;
for (i = 0; i < number_of_codes; i++)
{
re = (const pcre2_real_code *)(codes[i]);
(void)memcpy(dst_bytes, (const char *)re, re->blocksize);
/* Certain fields in the compiled code block are re-set during
deserialization. In order to ensure that the serialized data stream is always
the same for the same pattern, set them to zero here. We can't assume the
copy of the pattern is correctly aligned for accessing the fields as part of
a structure. Note the use of sizeof(void *) in the second of these, to
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
pointer to uint8_t), gcc gives a warning because the first argument is also a
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
it didn't stop Coverity giving the same complaint. */
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
sizeof(pcre2_memctl));
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
sizeof(void *));
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
sizeof(void *));
dst_bytes += re->blocksize;
}
*serialized_bytes = bytes;
*serialized_size = total_size;
return number_of_codes;
}
/*************************************************
* Deserialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
const uint8_t *bytes, pcre2_general_context *gcontext)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
const uint8_t *src_bytes;
pcre2_real_code *dst_re;
uint8_t *tables;
int32_t i, j;
/* Sanity checks. */
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
if (number_of_codes > data->number_of_codes)
number_of_codes = data->number_of_codes;
src_bytes = bytes + sizeof(pcre2_serialized_data);
/* Decode tables. The reference count for the tables is stored immediately
following them. */
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(tables, src_bytes, TABLES_LENGTH);
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
src_bytes += TABLES_LENGTH;
/* Decode the byte stream. We must not try to read the size from the compiled
code block in the stream, because it might be unaligned, which causes errors on
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
of the blocksize field is given its own name to ensure that it is the same here
as in the block. */
for (i = 0; i < number_of_codes; i++)
{
CODE_BLOCKSIZE_TYPE blocksize;
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
sizeof(CODE_BLOCKSIZE_TYPE));
if (blocksize <= sizeof(pcre2_real_code))
return PCRE2_ERROR_BADSERIALIZEDDATA;
/* The allocator provided by gcontext replaces the original one. */
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
(pcre2_memctl *)gcontext);
if (dst_re == NULL)
{
memctl->free(tables, memctl->memory_data);
for (j = 0; j < i; j++)
{
memctl->free(codes[j], memctl->memory_data);
codes[j] = NULL;
}
return PCRE2_ERROR_NOMEMORY;
}
/* The new allocator must be preserved. */
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
if (dst_re->magic_number != MAGIC_NUMBER ||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
dst_re->name_count > MAX_NAME_COUNT)
{
memctl->free(dst_re, memctl->memory_data);
return PCRE2_ERROR_BADSERIALIZEDDATA;
}
/* At the moment only one table is supported. */
dst_re->tables = tables;
dst_re->executable_jit = NULL;
dst_re->flags |= PCRE2_DEREF_TABLES;
codes[i] = dst_re;
src_bytes += blocksize;
}
return number_of_codes;
}
/*************************************************
* Get the number of serialized patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
if (data == NULL) return PCRE2_ERROR_NULL;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
return data->number_of_codes;
}
/*************************************************
* Free the allocated stream *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_serialize_free(uint8_t *bytes)
{
if (bytes != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/* End of pcre2_serialize.c */

199
deps/pcre2/pcre2_string_utils.c vendored Normal file
View File

@@ -0,0 +1,199 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2018-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for comparing and finding the length
of strings. These are used instead of strcmp() etc because the standard
functions work only on 8-bit data. */
#include "pcre2_internal.h"
/*************************************************
* Compare two zero-terminated PCRE2 strings *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare two PCRE2 strings, given a length *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare PCRE2 string to 8-bit string by length *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Find the length of a PCRE2 string *
*************************************************/
/*
Argument: the string
Returns: the length
*/
PCRE2_SIZE
PRIV(strlen)(PCRE2_SPTR str)
{
PCRE2_SIZE c = 0;
while (*str++ != 0) c++;
return c;
}
/*************************************************
* Copy 8-bit 0-terminated string to PCRE2 string *
*************************************************/
/* Arguments:
str1 buffer to receive the string
str2 8-bit string to be copied
Returns: the number of code units used (excluding trailing zero)
*/
PCRE2_SIZE
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
{
PCRE2_UCHAR *t = str1;
while (*str2 != 0) *t++ = *str2++;
*t = 0;
return t - str1;
}
/* End of pcre2_string_utils.c */

2086
deps/pcre2/pcre2_study.c vendored Normal file

File diff suppressed because it is too large Load Diff

1761
deps/pcre2/pcre2_substitute.c vendored Normal file

File diff suppressed because it is too large Load Diff

553
deps/pcre2/pcre2_substring.c vendored Normal file
View File

@@ -0,0 +1,553 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#include "pcre2_internal.h"
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
match_data points to the match data
stringname the name of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: zero
if not successful, a negative error code:
(1) an error from nametable_scan()
(2) an error from copy_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Copy numbered captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by number.
Arguments:
match_data points to the match data
stringnumber the number of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: buffer too small
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
if (size != 0) memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
buffer[size] = 0;
*sizeptr = size;
return 0;
}
/*************************************************
* Extract named captured string *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new memory. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
match_data pointer to match_data
stringname the name of the required substring
stringptr where to put the pointer to the new memory
sizeptr where to put the length of the substring
Returns: if successful: zero
if not successful, a negative value:
(1) an error from nametable_scan()
(2) an error from get_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Extract captured string to new memory *
*************************************************/
/* This function copies a single captured substring into a piece of new
memory.
Arguments:
match_data points to match data
stringnumber the number of the required substring
stringptr where to put a pointer to the new memory
sizeptr where to put the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
PCRE2_UCHAR *yield;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
if (size != 0) memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
yield[size] = 0;
*stringptr = yield;
*sizeptr = size;
return 0;
}
/*************************************************
* Free memory obtained by get_substring *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_get_byxxx()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_free(PCRE2_UCHAR *string)
{
if (string != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Get length of a named substring *
*************************************************/
/* This function returns the length of a named captured substring. If the regex
permits duplicate names, the first substring that is set is chosen.
Arguments:
match_data pointer to match data
stringname the name of the required substring
sizeptr where to put the length, if not NULL
Returns: 0 if successful, else a negative error number
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Get length of a numbered substring *
*************************************************/
/* This function returns the length of a captured substring. If the start is
beyond the end (which can happen when \K is used in an assertion), it sets the
length to zero.
Arguments:
match_data pointer to match data
stringnumber the number of the required substring
sizeptr where to put the length, if not NULL
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector is too small
PCRE2_ERROR_UNSET: substring is not set
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
{
PCRE2_SIZE left, right;
int count = match_data->rc;
if (count == PCRE2_ERROR_PARTIAL)
{
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
count = 0;
}
else if (count < 0) return count; /* Match failed */
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
{
if (stringnumber > match_data->code->top_bracket)
return PCRE2_ERROR_NOSUBSTRING;
if (stringnumber >= match_data->oveccount)
return PCRE2_ERROR_UNAVAILABLE;
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
return PCRE2_ERROR_UNSET;
}
else /* Matched using pcre2_dfa_match() */
{
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
}
left = match_data->ovector[stringnumber*2];
right = match_data->ovector[stringnumber*2+1];
/* LCOV_EXCL_START - this appears to be unreachable, as the ovector and
subject_length should always be set consistently, no matter what misbehaviour
the caller has committed. */
if (left > match_data->subject_length || right > match_data->subject_length)
{
PCRE2_DEBUG_UNREACHABLE();
return PCRE2_ERROR_INVALIDOFFSET;
}
/* LCOV_EXCL_STOP */
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
return 0;
}
/*************************************************
* Extract all captured strings to new memory *
*************************************************/
/* This function gets one chunk of memory and builds a list of pointers and all
the captured substrings in it. A NULL pointer is put on the end of the list.
The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be
handled.
Arguments:
match_data points to the match data
listptr set to point to the list of pointers
lengthsptr set to point to the list of lengths (may be NULL)
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory,
or a match failure code
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
PCRE2_SIZE **lengthsptr)
{
int i, count, count2;
PCRE2_SIZE size;
PCRE2_SIZE *lensp;
pcre2_memctl *memp;
PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp;
PCRE2_SIZE *ovector;
if ((count = match_data->rc) < 0) return count; /* Match failed */
if (count == 0) count = match_data->oveccount; /* Ovector too small */
count2 = 2*count;
ovector = match_data->ovector;
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
for (i = 0; i < count2; i += 2)
{
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
}
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
if (lengthsptr == NULL)
{
sp = (PCRE2_UCHAR *)lensp;
lensp = NULL;
}
else
{
*lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
}
for (i = 0; i < count2; i += 2)
{
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
/* Size == 0 includes the case when the capture is unset. Avoid adding
PCRE2_UNSET to match_data->subject because it overflows, even though with
zero size calling memcpy() is harmless. */
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
*listp++ = sp;
if (lensp != NULL) *lensp++ = size;
sp += size;
*sp++ = 0;
}
*listp = NULL;
return 0;
}
/*************************************************
* Free memory obtained by substring_list_get *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_list_get()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_list_free(PCRE2_UCHAR **list)
{
if (list != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a unique group with the given name. If duplicate names are
permitted, and the name is not unique, an error is generated.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
otherwise, if firstptr and lastptr are NULL:
a group number for a unique substring
else PCRE2_ERROR_NOUNIQUESUBSTRING
otherwise:
the length of each entry, having set firstptr and lastptr
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
{
uint16_t bot = 0;
uint16_t top = code->name_count;
uint16_t entrysize = code->name_entry_size;
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
while (top > bot)
{
uint16_t mid = (top + bot) / 2;
PCRE2_SPTR entry = nametable + entrysize*mid;
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
if (c == 0)
{
PCRE2_SPTR first;
PCRE2_SPTR last;
PCRE2_SPTR lastentry;
lastentry = nametable + entrysize * (code->name_count - 1);
first = last = entry;
while (first > nametable)
{
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
if (firstptr == NULL) return (first == last)?
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
*firstptr = first;
*lastptr = last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE2_ERROR_NOSUBSTRING;
}
/*************************************************
* Find number for named string *
*************************************************/
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
when it is known that names are unique. If there are duplicate names, it is not
defined which number is returned.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parenthesis, or a negative number
PCRE2_ERROR_NOSUBSTRING if not found
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code *code,
PCRE2_SPTR stringname)
{
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
}
/* End of pcre2_substring.c */

310
deps/pcre2/pcre2_tables.c vendored Normal file
View File

@@ -0,0 +1,310 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some fixed tables that are used by more than one of the
PCRE2 code modules. The tables are also #included by the pcre2test program,
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
defined. */
#if !defined(PCRE2_PCRE2TEST) && !defined(PCRE2_DFTABLES) && \
!defined(PCRE2_PCRE2POSIX) /* We're compiling the library */
#include "pcre2_internal.h"
#endif
/* Utility macros */
#define ARR_SIZE(x) sizeof(x)/sizeof(x[0])
#if !defined(PCRE2_PCRE2TEST) && !defined(PCRE2_DFTABLES) && \
!defined(PCRE2_PCRE2POSIX)
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
This is mode-dependent, so it is skipped when this file is included by
pcre2test. */
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
/* Tables of horizontal and vertical whitespace characters, suitable for
adding to classes. */
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
#endif /* !PCRE2_PCRE2TEST && !PCRE2_DFTABLES && !PCRE2_PCRE2POSIX */
#if !defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX)
/* These tables are the pairs of delimiters that are valid for callout string
arguments. For each starting delimiter there must be a matching ending
delimiter, which in fact is different only for bracket-like delimiters. */
const uint32_t PRIV(callout_start_delims)[] = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
const uint32_t PRIV(callout_end_delims[]) = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
#endif /* !PCRE2_DFTABLES && !PCRE2_PCRE2POSIX */
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
handling wide characters. */
#if defined PCRE2_PCRE2TEST || \
(!defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX) && \
defined SUPPORT_UNICODE && \
defined PCRE2_CODE_UNIT_WIDTH && \
PCRE2_CODE_UNIT_WIDTH == 8)
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
const int PRIV(utf8_table1)[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff };
const unsigned PRIV(utf8_table1_size) = ARR_SIZE(PRIV(utf8_table1));
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uint8_t PRIV(utf8_table4)[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#endif /* UTF-8 support needed */
/* Tables concerned with Unicode properties are relevant only when Unicode
support is enabled. See also the pcre2_ucptables_inc.h file, which is generated by
a Python script from Unicode data files. */
#if !defined(PCRE2_DFTABLES) && !defined(PCRE2_PCRE2POSIX) && \
defined(SUPPORT_UNICODE)
/* Table to translate from particular type value to the general value. */
const uint32_t PRIV(ucp_gentype)[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
code points. The left property selects a word from the table, and the right
property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1u << right-property)
The value is non-zero if a grapheme break is NOT permitted between the relevant
two code points. The breaking rules are as follows:
1. Break at the start and end of text (pretty obviously).
2. Do not break between a CR and LF; otherwise, break before and after
controls.
3. Do not break Hangul syllable sequences, the rules for which are:
L may be followed by L, V, LV or LVT
LV or V may be followed by V or T
LVT or T may be followed by T
4. Do not break before extending characters or zero-width-joiner (ZWJ).
The following rules are only for extended grapheme clusters (but that's what we
are implementing).
5. Do not break before SpacingMarks.
6. Do not break after Prepend characters.
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
is, do not break between characters with the Extended_Pictographic property
if a ZWJ intervenes. Extend characters are allowed between the characters;
this cannot be represented in this table, the code has to deal with it.
8. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs.
9. Otherwise, break everywhere.
*/
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
(1u<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
ESZ, /* 3 Extend */
ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */
(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
(1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
(1u<<ucp_gbRegional_Indicator),
ESZ, /* 5 SpacingMark */
ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */
(1u<<ucp_gbLVT),
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 7 V */
ESZ|(1u<<ucp_gbT), /* 8 T */
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */
ESZ|(1u<<ucp_gbT), /* 10 LVT */
(1u<<ucp_gbRegional_Indicator), /* 11 Regional Indicator */
ESZ, /* 12 Other */
ESZ|(1u<<ucp_gbExtended_Pictographic), /* 13 ZWJ */
ESZ /* 14 Extended Pictographic */
};
#undef ESZ
#ifdef SUPPORT_JIT
/* This table reverses PRIV(ucp_gentype). We can save the cost
of a memory load. */
const int PRIV(ucp_typerange)[] = {
ucp_Cc, ucp_Cs,
ucp_Ll, ucp_Lu,
ucp_Mc, ucp_Mn,
ucp_Nd, ucp_No,
ucp_Pc, ucp_Ps,
ucp_Sc, ucp_So,
ucp_Zl, ucp_Zs,
};
#endif /* SUPPORT_JIT */
/* Finally, include the tables that are auto-generated from the Unicode data
files. */
#include "pcre2_ucptables_inc.h"
#endif /* Unicode support needed */
/*************************************************
* Tables for EBCDIC support *
*************************************************/
#if defined(EBCDIC) && \
(defined(PCRE2_PCRE2TEST) || defined(PCRE2_DFTABLES) || 'a' != 0x81)
const uint8_t PRIV(ebcdic_1047_to_ascii)[256] = {
0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f,
#ifdef EBCDIC_NL25
0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
#else
0x10,0x11,0x12,0x13,0x9d,0x0a,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
#endif
0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a,
0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c,
0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0x5e,
0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f,
0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22,
0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xfe,0xb1,
0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4,
0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0x5b,0xde,0xae,
0xac,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0xdd,0xa8,0xaf,0x5d,0xb4,0xd7,
0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5,
0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff,
0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f,
};
const uint8_t PRIV(ascii_to_ebcdic_1047)[256] = {
#ifdef EBCDIC_NL25
0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f,
#else
0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x15,0x0b,0x0c,0x0d,0x0e,0x0f,
#endif
0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f,
0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f,
0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xad,0xe0,0xbd,0x5f,0x6d,
0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96,
0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07,
#ifdef EBCDIC_NL25
0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
#else
0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
#endif
0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff,
0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbb,0xb4,0x9a,0x8a,0xb0,0xca,0xaf,0xbc,
0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab,
0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77,
0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xba,0xae,0x59,
0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57,
0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0x8e,0xdf,
};
#endif /* EBCDIC support needed */
/* End of pcre2_tables.c */

5805
deps/pcre2/pcre2_ucd.c vendored Normal file

File diff suppressed because it is too large Load Diff

408
deps/pcre2/pcre2_ucp.h vendored Normal file
View File

@@ -0,0 +1,408 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
a new version of this code.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
/* This file contains definitions of the Unicode property values that are
returned by the UCD access macros and used throughout PCRE2.
IMPORTANT: The specific values of the first two enums (general and particular
character categories) are assumed by the table called catposstab in the file
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
an update. */
/* These are the general character categories. */
enum {
ucp_C,
ucp_L,
ucp_M,
ucp_N,
ucp_P,
ucp_S,
ucp_Z,
};
/* These are the particular character categories. */
enum {
ucp_Cc, /* Control */
ucp_Cf, /* Format */
ucp_Cn, /* Unassigned */
ucp_Co, /* Private use */
ucp_Cs, /* Surrogate */
ucp_Ll, /* Lower case letter */
ucp_Lm, /* Modifier letter */
ucp_Lo, /* Other letter */
ucp_Lt, /* Title case letter */
ucp_Lu, /* Upper case letter */
ucp_Mc, /* Spacing mark */
ucp_Me, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */
ucp_Nd, /* Decimal number */
ucp_Nl, /* Letter number */
ucp_No, /* Other number */
ucp_Pc, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */
ucp_Pe, /* Close punctuation */
ucp_Pf, /* Final punctuation */
ucp_Pi, /* Initial punctuation */
ucp_Po, /* Other punctuation */
ucp_Ps, /* Open punctuation */
ucp_Sc, /* Currency symbol */
ucp_Sk, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs, /* Space separator */
};
/* These are Boolean properties. */
enum {
ucp_ASCII,
ucp_ASCII_Hex_Digit,
ucp_Alphabetic,
ucp_Bidi_Control,
ucp_Bidi_Mirrored,
ucp_Case_Ignorable,
ucp_Cased,
ucp_Changes_When_Casefolded,
ucp_Changes_When_Casemapped,
ucp_Changes_When_Lowercased,
ucp_Changes_When_Titlecased,
ucp_Changes_When_Uppercased,
ucp_Dash,
ucp_Default_Ignorable_Code_Point,
ucp_Deprecated,
ucp_Diacritic,
ucp_Emoji,
ucp_Emoji_Component,
ucp_Emoji_Modifier,
ucp_Emoji_Modifier_Base,
ucp_Emoji_Presentation,
ucp_Extended_Pictographic,
ucp_Extender,
ucp_Grapheme_Base,
ucp_Grapheme_Extend,
ucp_Grapheme_Link,
ucp_Hex_Digit,
ucp_IDS_Binary_Operator,
ucp_IDS_Trinary_Operator,
ucp_IDS_Unary_Operator,
ucp_ID_Compat_Math_Continue,
ucp_ID_Compat_Math_Start,
ucp_ID_Continue,
ucp_ID_Start,
ucp_Ideographic,
ucp_InCB,
ucp_Join_Control,
ucp_Logical_Order_Exception,
ucp_Lowercase,
ucp_Math,
ucp_Modifier_Combining_Mark,
ucp_Noncharacter_Code_Point,
ucp_Pattern_Syntax,
ucp_Pattern_White_Space,
ucp_Prepended_Concatenation_Mark,
ucp_Quotation_Mark,
ucp_Radical,
ucp_Regional_Indicator,
ucp_Sentence_Terminal,
ucp_Soft_Dotted,
ucp_Terminal_Punctuation,
ucp_Unified_Ideograph,
ucp_Uppercase,
ucp_Variation_Selector,
ucp_White_Space,
ucp_XID_Continue,
ucp_XID_Start,
/* This must be last */
ucp_Bprop_Count
};
/* Size of entries in ucd_boolprop_sets[] */
#define ucd_boolprop_sets_item_size 2
/* These are the bidi class values. */
enum {
ucp_bidiAL, /* Arabic_Letter */
ucp_bidiAN, /* Arabic_Number */
ucp_bidiB, /* Paragraph_Separator */
ucp_bidiBN, /* Boundary_Neutral */
ucp_bidiCS, /* Common_Separator */
ucp_bidiEN, /* European_Number */
ucp_bidiES, /* European_Separator */
ucp_bidiET, /* European_Terminator */
ucp_bidiFSI, /* First_Strong_Isolate */
ucp_bidiL, /* Left_To_Right */
ucp_bidiLRE, /* Left_To_Right_Embedding */
ucp_bidiLRI, /* Left_To_Right_Isolate */
ucp_bidiLRO, /* Left_To_Right_Override */
ucp_bidiNSM, /* Nonspacing_Mark */
ucp_bidiON, /* Other_Neutral */
ucp_bidiPDF, /* Pop_Directional_Format */
ucp_bidiPDI, /* Pop_Directional_Isolate */
ucp_bidiR, /* Right_To_Left */
ucp_bidiRLE, /* Right_To_Left_Embedding */
ucp_bidiRLI, /* Right_To_Left_Isolate */
ucp_bidiRLO, /* Right_To_Left_Override */
ucp_bidiS, /* Segment_Separator */
ucp_bidiWS, /* White_Space */
};
/* These are grapheme break properties. The Extended Pictographic property
comes from the emoji-data.txt file. */
enum {
ucp_gbCR, /* 0 */
ucp_gbLF, /* 1 */
ucp_gbControl, /* 2 */
ucp_gbExtend, /* 3 */
ucp_gbPrepend, /* 4 */
ucp_gbSpacingMark, /* 5 */
ucp_gbL, /* 6 Hangul syllable type L */
ucp_gbV, /* 7 Hangul syllable type V */
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegional_Indicator, /* 11 */
ucp_gbOther, /* 12 */
ucp_gbZWJ, /* 13 */
ucp_gbExtended_Pictographic, /* 14 */
};
/* These are the script identifications. */
enum {
/* Scripts which has characters in other scripts. */
ucp_Latin,
ucp_Greek,
ucp_Cyrillic,
ucp_Armenian,
ucp_Hebrew,
ucp_Arabic,
ucp_Syriac,
ucp_Thaana,
ucp_Devanagari,
ucp_Bengali,
ucp_Gurmukhi,
ucp_Gujarati,
ucp_Oriya,
ucp_Tamil,
ucp_Telugu,
ucp_Kannada,
ucp_Malayalam,
ucp_Sinhala,
ucp_Thai,
ucp_Tibetan,
ucp_Myanmar,
ucp_Georgian,
ucp_Hangul,
ucp_Ethiopic,
ucp_Cherokee,
ucp_Runic,
ucp_Mongolian,
ucp_Hiragana,
ucp_Katakana,
ucp_Bopomofo,
ucp_Han,
ucp_Yi,
ucp_Gothic,
ucp_Tagalog,
ucp_Hanunoo,
ucp_Buhid,
ucp_Tagbanwa,
ucp_Limbu,
ucp_Tai_Le,
ucp_Linear_B,
ucp_Shavian,
ucp_Cypriot,
ucp_Buginese,
ucp_Coptic,
ucp_Glagolitic,
ucp_Tifinagh,
ucp_Syloti_Nagri,
ucp_Phags_Pa,
ucp_Nko,
ucp_Kayah_Li,
ucp_Lycian,
ucp_Carian,
ucp_Lydian,
ucp_Avestan,
ucp_Samaritan,
ucp_Lisu,
ucp_Javanese,
ucp_Old_Turkic,
ucp_Kaithi,
ucp_Mandaic,
ucp_Chakma,
ucp_Meroitic_Hieroglyphs,
ucp_Sharada,
ucp_Takri,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Modi,
ucp_Old_Permic,
ucp_Psalter_Pahlavi,
ucp_Khudawadi,
ucp_Tirhuta,
ucp_Multani,
ucp_Old_Hungarian,
ucp_Adlam,
ucp_Osage,
ucp_Tangut,
ucp_Masaram_Gondi,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Sogdian,
ucp_Nandinagari,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
ucp_Toto,
ucp_Garay,
ucp_Gurung_Khema,
ucp_Ol_Onal,
ucp_Sunuwar,
ucp_Todhri,
ucp_Tulu_Tigalari,
/* Scripts which has no characters in other scripts. */
ucp_Unknown,
ucp_Common,
ucp_Lao,
ucp_Canadian_Aboriginal,
ucp_Ogham,
ucp_Khmer,
ucp_Old_Italic,
ucp_Deseret,
ucp_Inherited,
ucp_Ugaritic,
ucp_Osmanya,
ucp_Braille,
ucp_New_Tai_Lue,
ucp_Old_Persian,
ucp_Kharoshthi,
ucp_Balinese,
ucp_Cuneiform,
ucp_Phoenician,
ucp_Sundanese,
ucp_Lepcha,
ucp_Ol_Chiki,
ucp_Vai,
ucp_Saurashtra,
ucp_Rejang,
ucp_Cham,
ucp_Tai_Tham,
ucp_Tai_Viet,
ucp_Egyptian_Hieroglyphs,
ucp_Bamum,
ucp_Meetei_Mayek,
ucp_Imperial_Aramaic,
ucp_Old_South_Arabian,
ucp_Inscriptional_Parthian,
ucp_Inscriptional_Pahlavi,
ucp_Batak,
ucp_Brahmi,
ucp_Meroitic_Cursive,
ucp_Miao,
ucp_Sora_Sompeng,
ucp_Bassa_Vah,
ucp_Pahawh_Hmong,
ucp_Mende_Kikakui,
ucp_Mro,
ucp_Old_North_Arabian,
ucp_Nabataean,
ucp_Palmyrene,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Warang_Citi,
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_SignWriting,
ucp_Bhaiksuki,
ucp_Marchen,
ucp_Newa,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Elymaic,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho,
ucp_Chorasmian,
ucp_Dives_Akuru,
ucp_Khitan_Small_Script,
ucp_Tangsa,
ucp_Vithkuqi,
ucp_Kawi,
ucp_Nag_Mundari,
ucp_Kirat_Rai,
/* This must be last */
ucp_Script_Count
};
/* Size of entries in ucd_script_sets[] */
#define ucd_script_sets_item_size 4
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */

1596
deps/pcre2/pcre2_ucptables_inc.h vendored Normal file

File diff suppressed because it is too large Load Diff

179
deps/pcre2/pcre2_util.h vendored Normal file
View File

@@ -0,0 +1,179 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
/* Assertion macros */
#ifdef PCRE2_DEBUG
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
#include <assert.h>
#endif
/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
that the code below doesn't support. It is a NOP for non debug builds
but in debug builds will print information about the location of the
code where it triggered and crash.
It is meant to work like assert(), and therefore the expression used
should indicate what the expected state is, and shouldn't have any
side-effects. */
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
#define PCRE2_ASSERT(x) assert(x)
#else
#define PCRE2_ASSERT(x) do \
{ \
if (!(x)) \
{ \
fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
abort(); \
} \
} while(0)
#endif
/* LCOV_EXCL_START */
/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
shouldn't be reached. In non debug builds is defined as a hint for
the compiler to eliminate any code after it, so it is useful also for
performance reasons, but should be used with care because if it is
ever reached will trigger Undefined Behaviour and if you are lucky a
crash. In debug builds it will report the location where it was triggered
and crash. One important point to consider when using this macro, is
that it is only implemented for a few compilers, and therefore can't
be relied on to always be active either, so if it is followed by some
code it is important to make sure that the whole thing is safe to
use even if the macro is not there (ex: make sure there is a `break`
after it if used at the end of a `case`) and to test your code also
with a configuration where the macro will be a NOP. */
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
#define PCRE2_UNREACHABLE() \
assert(((void)"Execution reached unexpected point", 0))
#else
#define PCRE2_UNREACHABLE() do \
{ \
fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
":%d\n", __LINE__); \
abort(); \
} while(0)
#endif
/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
macro. It is meant to be used in places where the code is handling
an error situation in code that shouldn't be reached, but that has
some sort of fallback code to normally handle the error. When in
doubt you should use this instead of the previous macro. Like in
the previous case, it is a good idea to document as much as possible
the reason and the actions that should be taken if it ever triggers. */
#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
/* LCOV_EXCL_STOP */
#endif /* PCRE2_DEBUG */
#ifndef PCRE2_ASSERT
#define PCRE2_ASSERT(x) do {} while(0)
#endif
/* LCOV_EXCL_START */
#ifndef PCRE2_DEBUG_UNREACHABLE
#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
#endif
#ifndef PCRE2_UNREACHABLE
#ifdef HAVE_BUILTIN_UNREACHABLE
#define PCRE2_UNREACHABLE() __builtin_unreachable()
#elif defined(HAVE_BUILTIN_ASSUME)
#define PCRE2_UNREACHABLE() __assume(0)
#else
#define PCRE2_UNREACHABLE() do {} while(0)
#endif
#endif /* !PCRE2_UNREACHABLE */
/* LCOV_EXCL_STOP */
/* We define this fallthrough macro for suppressing -Wimplicit-fallthrough warnings.
Clang only allows this via an attribute, whereas other compilers (eg. GCC) match attributes
and also specially-formatted comments.
This macro should be used with no following semicolon, and ideally with a comment: */
// PCRE2_FALLTHROUGH /* Fall through */
#ifndef PCRE2_FALLTHROUGH
#if defined(__cplusplus) && __cplusplus >= 202002L && \
defined(__has_cpp_attribute)
/* Standards-compatible C++ variant. */
#if __has_cpp_attribute(fallthrough)
#define PCRE2_FALLTHROUGH [[fallthrough]];
#endif
#elif !defined(__cplusplus) && \
defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L && \
defined(__has_c_attribute)
/* Standards-compatible C variant. */
#if __has_c_attribute(fallthrough)
#define PCRE2_FALLTHROUGH [[fallthrough]];
#endif
#elif ((defined(__clang__) && __clang_major__ >= 10) || \
(defined(__GNUC__) && __GNUC__ >= 7)) && \
defined(__has_attribute)
/* Clang and GCC syntax. Rule out old versions because apparently Clang at
least has a broken implementation of __has_attribute. */
#if __has_attribute(fallthrough)
#define PCRE2_FALLTHROUGH __attribute__((fallthrough));
#endif
#endif
#endif /* !PCRE2_FALLTHROUGH */
#ifndef PCRE2_FALLTHROUGH
#define PCRE2_FALLTHROUGH
#endif
#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
/* End of pcre2_util.h */

397
deps/pcre2/pcre2_valid_utf.c vendored Normal file
View File

@@ -0,0 +1,397 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function for validating UTF character
strings. This file is also #included by the pcre2test program, which uses
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
with the library. In this case, PCRE2_PCRE2TEST is defined. */
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */
#ifndef SUPPORT_UNICODE
/*************************************************
* Dummy function when Unicode is not supported *
*************************************************/
/* This function should never be called when Unicode is not supported. */
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
(void)string;
(void)length;
(void)erroroffset;
return 0;
}
#else /* UTF is supported */
/*************************************************
* Validate a UTF string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
check that a supposed UTF string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying an
invalid string are then undefined.
Arguments:
string points to the string
length length of string
errp pointer to an error position offset variable
Returns: == 0 if the string is a valid UTF string
!= 0 otherwise, setting the offset of the bad character
*/
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
PCRE2_SPTR p;
uint32_t c;
/* ----------------- Check a UTF-8 string ----------------- */
#if PCRE2_CODE_UNIT_WIDTH == 8
/* Originally, this function checked according to RFC 2279, allowing for values
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
characters is still checked. Error returns are as follows:
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
*/
for (p = string; length > 0; p++)
{
uint32_t ab, d;
c = *p;
length--;
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
{
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR20;
}
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
{
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR21;
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < ab) /* Missing bytes */
{
*erroroffset = (PCRE2_SIZE)(p - string);
switch(ab - length)
{
case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5;
}
}
length -= ab; /* Length remaining */
/* Check top bits in the second byte */
if (((d = *(++p)) & 0xc0) != 0x80)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR6;
}
/* For each length, check that the remaining bytes start with the 0x80 bit
set and not the 0x40 bit. Then check for an overlong sequence, and for the
excluded range 0xd800 to 0xdfff. */
switch (ab)
{
/* 2-byte character. No further bytes to check for 0x80. Check first byte
for for xx00 000x (overlong sequence). */
case 1: if ((c & 0x3e) == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR15;
}
break;
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
case 2:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if (c == 0xe0 && (d & 0x20) == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR16;
}
if (c == 0xed && d >= 0xa0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR14;
}
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
character greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if (c == 0xf0 && (d & 0x30) == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR17;
}
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
{
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR13;
}
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
rejected by the length test below. However, we do the appropriate tests
here so that overlong sequences get diagnosed, and also in case there is
ever an option for handling these larger code points. */
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
1111 1000, xx00 0xxx */
case 4:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if (c == 0xf8 && (d & 0x38) == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR18;
}
break;
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
1111 1100, xx00 00xx. */
case 5:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
{
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR10;
}
if (c == 0xfc && (d & 0x3c) == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR19;
}
break;
}
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
excluded by RFC 3629. The pointer p is currently at the last byte of the
character. */
if (ab > 3)
{
*erroroffset = (PCRE2_SIZE)(p - string) - ab;
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
}
}
return 0;
/* ----------------- Check a UTF-16 string ----------------- */
#elif PCRE2_CODE_UNIT_WIDTH == 16
/* There's not so much work, nor so many errors, for UTF-16.
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
*/
for (p = string; length > 0; p++)
{
c = *p;
length--;
if ((c & 0xf800) != 0xd800)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
}
else if ((c & 0x0400) == 0)
{
/* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF16_ERR1;
}
p++;
length--;
if ((*p & 0xfc00) != 0xdc00)
{
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF16_ERR2;
}
}
else
{
/* Isolated low surrogate. Always an error. */
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF16_ERR3;
}
}
return 0;
/* ----------------- Check a UTF-32 string ----------------- */
#else
/* There is very little to do for a UTF-32 string.
PCRE2_ERROR_UTF32_ERR1 Surrogate character
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
*/
for (p = string; length > 0; length--, p++)
{
c = *p;
if ((c & 0xfffff800u) != 0xd800u)
{
/* Normal UTF-32 code point. Neither high nor low surrogate. */
if (c > 0x10ffffu)
{
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF32_ERR2;
}
}
else
{
/* A surrogate */
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF32_ERR1;
}
}
return 0;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
/* End of pcre2_valid_utf.c */

547
deps/pcre2/pcre2_xclass.c vendored Normal file
View File

@@ -0,0 +1,547 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains two internal functions that are used to match
OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both
pcre2_match() and pcre2_dfa_match(). */
#include "pcre2_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain codepoints above 255 and/or Unicode properties.
Arguments:
c the character
data points to the flag code unit of the XCLASS data
utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE
*/
BOOL
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf)
{
/* Update PRIV(update_classbits) when this function is changed. */
PCRE2_UCHAR t;
BOOL not_negated = (*data & XCL_NOT) == 0;
uint32_t type, max_index, min_index, value;
const uint8_t *next_char;
#if PCRE2_CODE_UNIT_WIDTH == 8
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
utf = TRUE;
#endif
/* Code points < 256 are matched against a bitmap, if one is present. */
if ((*data++ & XCL_MAP) != 0)
{
if (c < 256)
return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0;
/* Skip bitmap. */
data += 32 / sizeof(PCRE2_UCHAR);
}
/* Match against the list of Unicode properties. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
#ifdef SUPPORT_UNICODE
if (*data == XCL_PROP || *data == XCL_NOTPROP)
{
/* The UCD record is the same for all properties. */
const ucd_record *prop = GET_UCD(c);
do
{
int chartype;
BOOL isprop = (*data++) == XCL_PROP;
BOOL ok;
switch(*data)
{
case PT_LAMP:
chartype = prop->chartype;
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
chartype == ucp_Lt) == isprop) return not_negated;
break;
case PT_GC:
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
return not_negated;
break;
case PT_PC:
if ((data[1] == prop->chartype) == isprop) return not_negated;
break;
case PT_SC:
if ((data[1] == prop->script) == isprop) return not_negated;
break;
case PT_SCX:
ok = (data[1] == prop->script ||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
if (ok == isprop) return not_negated;
break;
case PT_ALNUM:
chartype = prop->chartype;
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[chartype] == ucp_N) == isprop)
return not_negated;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
if (isprop) return not_negated;
break;
default:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
return not_negated;
break;
}
break;
case PT_WORD:
chartype = prop->chartype;
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[chartype] == ucp_N ||
chartype == ucp_Mn || chartype == ucp_Pc) == isprop)
return not_negated;
break;
case PT_UCNC:
if (c < 0xa0)
{
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT) == isprop)
return not_negated;
}
else
{
if ((c < 0xd800 || c > 0xdfff) == isprop)
return not_negated;
}
break;
case PT_BIDICL:
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
return not_negated;
break;
case PT_BOOL:
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
UCD_BPROPS_PROP(prop), data[1]) != 0;
if (ok == isprop) return not_negated;
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */
/* Graphic character. Implement this as not Z (space or separator) and
not C (other), except for Cf (format) with a few exceptions. This seems
to be what Perl does. The exceptional characters are:
U+061C Arabic Letter Mark
U+180E Mongolian Vowel Separator
U+2066 - U+2069 Various "isolate"s
*/
case PT_PXGRAPH:
chartype = prop->chartype;
if ((PRIV(ucp_gentype)[chartype] != ucp_Z &&
(PRIV(ucp_gentype)[chartype] != ucp_C ||
(chartype == ucp_Cf &&
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
)) == isprop)
return not_negated;
break;
/* Printable character: same as graphic, with the addition of Zs, i.e.
not Zl and not Zp, and U+180E. */
case PT_PXPRINT:
chartype = prop->chartype;
if ((chartype != ucp_Zl &&
chartype != ucp_Zp &&
(PRIV(ucp_gentype)[chartype] != ucp_C ||
(chartype == ucp_Cf &&
c != 0x061c && (c < 0x2066 || c > 0x2069))
)) == isprop)
return not_negated;
break;
/* Punctuation: all Unicode punctuation, plus ASCII characters that
Unicode treats as symbols rather than punctuation, for Perl
compatibility (these are $+<=>^`|~). */
case PT_PXPUNCT:
chartype = prop->chartype;
if ((PRIV(ucp_gentype)[chartype] == ucp_P ||
(c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop)
return not_negated;
break;
/* Perl has two sets of hex digits */
case PT_PXXDIGIT:
if (((c >= CHAR_0 && c <= CHAR_9) ||
(c >= CHAR_A && c <= CHAR_F) ||
(c >= CHAR_a && c <= CHAR_f) ||
(c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */
(c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */
(c >= 0xff41 && c <= 0xff46)) == isprop)
return not_negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
/* LCOV_EXCL_START */
default:
PCRE2_DEBUG_UNREACHABLE();
return FALSE;
/* LCOV_EXCL_STOP */
}
data += 2;
}
while (*data == XCL_PROP || *data == XCL_NOTPROP);
}
#else
(void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UNICODE */
/* Match against large chars or ranges that end with a large char. */
if (*data < XCL_LIST)
{
while ((t = *data++) != XCL_END)
{
uint32_t x, y;
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
}
else
#endif
x = *data++;
if (t == XCL_SINGLE)
{
/* Since character ranges follow the properties, and they are
sorted, early return is possible for all characters <= x. */
if (c <= x) return (c == x) ? not_negated : !not_negated;
continue;
}
PCRE2_ASSERT(t == XCL_RANGE);
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(y, data); /* macro generates multiple statements */
}
else
#endif
y = *data++;
/* Since character ranges follow the properties, and they are
sorted, early return is possible for all characters <= y. */
if (c <= y) return (c >= x) ? not_negated : !not_negated;
}
return !not_negated; /* char did not match */
}
#if PCRE2_CODE_UNIT_WIDTH == 8
type = (uint32_t)(data[0] << 8) | data[1];
data += 2;
#else
type = data[0];
data++;
#endif /* CODE_UNIT_WIDTH */
/* Align characters. */
next_char = char_lists_end - (GET(data, 0) << 1);
type &= XCL_TYPE_MASK;
/* Alignment check. */
PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0);
if (c >= XCL_CHAR_LIST_HIGH_16_START)
{
max_index = type & XCL_ITEM_COUNT_MASK;
if (max_index == XCL_ITEM_COUNT_MASK)
{
max_index = *(const uint16_t*)next_char;
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
next_char += 2;
}
next_char += max_index << 1;
type >>= XCL_TYPE_BIT_LEN;
}
if (c < XCL_CHAR_LIST_LOW_32_START)
{
max_index = type & XCL_ITEM_COUNT_MASK;
c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
if (max_index == XCL_ITEM_COUNT_MASK)
{
max_index = *(const uint16_t*)next_char;
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
next_char += 2;
}
if (max_index == 0 || c < *(const uint16_t*)next_char)
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
min_index = 0;
value = ((const uint16_t*)next_char)[--max_index];
if (c >= value)
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
max_index--;
/* Binary search of a range. */
while (TRUE)
{
uint32_t mid_index = (min_index + max_index) >> 1;
value = ((const uint16_t*)next_char)[mid_index];
if (c < value)
max_index = mid_index - 1;
else if (((const uint16_t*)next_char)[mid_index + 1] <= c)
min_index = mid_index + 1;
else
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
}
}
/* Skip the 16 bit ranges. */
max_index = type & XCL_ITEM_COUNT_MASK;
if (max_index == XCL_ITEM_COUNT_MASK)
{
max_index = *(const uint16_t*)next_char;
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
next_char += 2;
}
next_char += (max_index << 1);
type >>= XCL_TYPE_BIT_LEN;
/* Alignment check. */
PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0);
max_index = type & XCL_ITEM_COUNT_MASK;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= XCL_CHAR_LIST_HIGH_32_START)
{
if (max_index == XCL_ITEM_COUNT_MASK)
{
max_index = *(const uint32_t*)next_char;
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
next_char += 4;
}
next_char += max_index << 2;
type >>= XCL_TYPE_BIT_LEN;
max_index = type & XCL_ITEM_COUNT_MASK;
}
#endif
c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
if (max_index == XCL_ITEM_COUNT_MASK)
{
max_index = *(const uint32_t*)next_char;
next_char += 4;
}
if (max_index == 0 || c < *(const uint32_t*)next_char)
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
min_index = 0;
value = ((const uint32_t*)next_char)[--max_index];
if (c >= value)
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
max_index--;
/* Binary search of a range. */
while (TRUE)
{
uint32_t mid_index = (min_index + max_index) >> 1;
value = ((const uint32_t*)next_char)[mid_index];
if (c < value)
max_index = mid_index - 1;
else if (((const uint32_t*)next_char)[mid_index + 1] <= c)
min_index = mid_index + 1;
else
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
}
}
/*************************************************
* Match character against an ECLASS *
*************************************************/
/* This function is called to match a character against an extended class
used for describing characters using boolean operations on sets.
Arguments:
c the character
data_start points to the start of the ECLASS data
data_end points one-past-the-last of the ECLASS data
utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE
*/
BOOL
PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end,
const uint8_t *char_lists_end, BOOL utf)
{
PCRE2_SPTR ptr = data_start;
PCRE2_UCHAR flags;
uint32_t stack = 0;
int stack_depth = 0;
PCRE2_ASSERT(data_start < data_end);
flags = *ptr++;
PCRE2_ASSERT((flags & ECL_MAP) == 0 ||
(data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR));
/* Code points < 256 are matched against a bitmap, if one is present.
Otherwise all codepoints are checked later. */
if ((flags & ECL_MAP) != 0)
{
if (c < 256)
return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0;
/* Skip the bitmap. */
ptr += 32 / sizeof(PCRE2_UCHAR);
}
/* Do a little loop, until we reach the end of the ECLASS. */
while (ptr < data_end)
{
switch (*ptr)
{
case ECL_AND:
++ptr;
stack = (stack >> 1) & (stack | ~(uint32_t)1u);
PCRE2_ASSERT(stack_depth >= 2);
--stack_depth;
break;
case ECL_OR:
++ptr;
stack = (stack >> 1) | (stack & (uint32_t)1u);
PCRE2_ASSERT(stack_depth >= 2);
--stack_depth;
break;
case ECL_XOR:
++ptr;
stack = (stack >> 1) ^ (stack & (uint32_t)1u);
PCRE2_ASSERT(stack_depth >= 2);
--stack_depth;
break;
case ECL_NOT:
++ptr;
stack ^= (uint32_t)1u;
PCRE2_ASSERT(stack_depth >= 1);
break;
case ECL_XCLASS:
{
uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf);
ptr += GET(ptr, 1);
stack = (stack << 1) | matched;
++stack_depth;
break;
}
/* This should never occur, but compilers may mutter if there is no
default. */
/* LCOV_EXCL_START */
default:
PCRE2_DEBUG_UNREACHABLE();
return FALSE;
/* LCOV_EXCL_STOP */
}
}
PCRE2_ASSERT(stack_depth == 1);
(void)stack_depth; /* Ignore unused variable, if assertions are disabled. */
/* The final bit left on the stack now holds the match result. */
return (stack & 1u) != 0;
}
/* End of pcre2_xclass.c */