mirror of
https://github.com/libgit2/libgit2.git
synced 2026-06-22 06:26:26 +00:00
Ye olde PCRE (8.45) was end-of-lifed several years ago. For our bundled regular expression implementation, we want to include the new, still-maintained PCRE2 framework. Include PCRE2 v10.47.
172 lines
6.0 KiB
C
172 lines
6.0 KiB
C
/*************************************************
|
|
* Perl-Compatible Regular Expressions *
|
|
*************************************************/
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
Written by Philip Hazel
|
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
|
|
|
-----------------------------------------------------------------------------
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
|
|
#include "pcre2_internal.h"
|
|
|
|
|
|
|
|
/* Advance the offset by one code unit, and return the new value.
|
|
It is only called when the offset is not at the end of the subject. */
|
|
|
|
static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
|
|
PCRE2_SIZE offset)
|
|
{
|
|
PCRE2_SPTR subject = match_data->subject;
|
|
PCRE2_SIZE subject_length = match_data->subject_length;
|
|
#ifdef SUPPORT_UNICODE
|
|
BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
|
|
#endif
|
|
|
|
/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
|
|
sequence. */
|
|
|
|
if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
|
|
subject[offset + 1] == CHAR_LF)
|
|
{
|
|
switch(match_data->code->newline_convention)
|
|
{
|
|
case PCRE2_NEWLINE_CRLF:
|
|
case PCRE2_NEWLINE_ANY:
|
|
case PCRE2_NEWLINE_ANYCRLF:
|
|
return offset + 2;
|
|
}
|
|
}
|
|
|
|
/* Advance by one full character if in UTF mode. */
|
|
|
|
#ifdef SUPPORT_UNICODE
|
|
if (utf)
|
|
{
|
|
PCRE2_SPTR next = subject + offset + 1;
|
|
PCRE2_SPTR subject_end = subject + subject_length;
|
|
|
|
(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
|
|
FORWARDCHARTEST(next, subject_end);
|
|
return next - subject;
|
|
}
|
|
#endif
|
|
|
|
return offset + 1;
|
|
}
|
|
|
|
|
|
|
|
/*************************************************
|
|
* Advance the match *
|
|
*************************************************/
|
|
|
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
|
pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,
|
|
uint32_t *poptions)
|
|
{
|
|
int rc = match_data->rc;
|
|
PCRE2_SIZE start_offset = match_data->start_offset;
|
|
PCRE2_SIZE *ovector = match_data->ovector;
|
|
|
|
/* Match error, or no match: no further iteration possible. In previous versions
|
|
of PCRE2, we recommended that clients use a strategy which involved retrying in
|
|
certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */
|
|
|
|
if (rc < 0)
|
|
return FALSE;
|
|
|
|
/* Match succeeded: get the start offset for the next match */
|
|
|
|
/* Although \K can affect the position of ovector[0], there are no ways to do
|
|
anything surprising with ovector[1], which must always be >= start_offset. */
|
|
|
|
PCRE2_ASSERT(ovector[1] >= start_offset);
|
|
|
|
/* Special handling for patterns which contain \K in a lookaround, which enables
|
|
the match start to be pushed back to before the starting search offset
|
|
(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
|
|
This is not a problem if ovector[1] > start_offset, because in this case, we can
|
|
just attempt the next match at ovector[1]: we are making progress, which is all
|
|
that we require.
|
|
|
|
However, if we have ovector[1] == start_offset, then we have a very rare case
|
|
which must be handled specially, because it's a non-empty match which
|
|
nonetheless fails to make progress through the subject. */
|
|
|
|
if (ovector[0] != start_offset && ovector[1] == start_offset)
|
|
{
|
|
/* If the match end is at the end of the subject, we are done. */
|
|
|
|
if (start_offset >= match_data->subject_length)
|
|
return FALSE;
|
|
|
|
/* Otherwise, bump along by one code unit, and do a normal search. */
|
|
|
|
*pstart_offset = do_bumpalong(match_data, ovector[1]);
|
|
*poptions = 0;
|
|
return TRUE;
|
|
}
|
|
|
|
/* If the previous match was for an empty string, we are finished if we are at
|
|
the end of the subject. Otherwise, arrange to run another match at the same
|
|
point to see if a non-empty match can be found. */
|
|
|
|
if (ovector[0] == ovector[1])
|
|
{
|
|
/* If the match is at the end of the subject, we are done. */
|
|
|
|
if (ovector[0] >= match_data->subject_length)
|
|
return FALSE;
|
|
|
|
/* Otherwise, continue at this exact same point, but we must set the flag
|
|
which ensures that we don't return the exact same empty match again. */
|
|
|
|
*pstart_offset = ovector[1];
|
|
*poptions = PCRE2_NOTEMPTY_ATSTART;
|
|
return TRUE;
|
|
}
|
|
|
|
/* Finally, we must be in the happy state of a non-empty match, where the end of
|
|
the match is further on in the subject than start_offset, so we are easily able
|
|
to continue and make progress. */
|
|
|
|
*pstart_offset = ovector[1];
|
|
*poptions = 0;
|
|
return TRUE;
|
|
}
|
|
|
|
/* End of pcre2_match_next.c */
|