source/i18n/regexcmp.cpp - Issue 2440913002: Update ICU to 58.1

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: source/i18n/regexcmp.cpp

Issue 2440913002: Update ICU to 58.1

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/i18n/regexcmp.cpp

diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp

index d56dce37ba24c9063bc117f7fa99cdeeb3bbe715..2657cf37cbecca17b4886c5960287e9fa10f2a99 100644

--- a/source/i18n/regexcmp.cpp

+++ b/source/i18n/regexcmp.cpp

@@ -1,7 +1,9 @@

+// License & terms of use: http://www.unicode.org/copyright.html

// file: regexcmp.cpp

// This file contains the ICU regular expression compiler, which is responsible

@@ -1753,8 +1755,6 @@ UBool RegexCompile::doParseActions(int32_t action)

case doSetNamedRange:

// We have scanned literal-\N{CHAR NAME}. Add the range to the set.

// The left character is already in the set, and is saved in fLastSetLiteral.

- // Nonetheless, check if |fLastSetLiteral| is indeed set because it's

- // not set in some edge cases.

// The right side needs to be picked up, the scan is at the 'N'.

// Lower Limit > Upper limit being an error matches both Java

// and ICU UnicodeSet behavior.

@@ -1825,12 +1825,11 @@ UBool RegexCompile::doParseActions(int32_t action)

case doSetRange:

// We have scanned literal-literal. Add the range to the set.

// The left character is already in the set, and is saved in fLastSetLiteral.

- // Nonetheless, check if |fLastSetLiteral| is indeed set because it's

- // not set in some edge cases.

// The right side is the current character.

// Lower Limit > Upper limit being an error matches both Java

// and ICU UnicodeSet behavior.

{

if (fLastSetLiteral == U_SENTINEL || fLastSetLiteral > fC.fChar) {

error(U_REGEX_INVALID_RANGE);

}

@@ -2606,7 +2605,11 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh

// End of machine generated data.

- if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) {

+ if (c < UCHAR_MIN_VALUE || c > UCHAR_MAX_VALUE) {

+ // This function should never be called with an invalid input character.

+ U_ASSERT(FALSE);

+ starterChars->clear();

+ } else if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) {

UChar32 caseFoldedC = u_foldCase(c, U_FOLD_CASE_DEFAULT);

starterChars->set(caseFoldedC, caseFoldedC);

@@ -2899,6 +2902,7 @@ void RegexCompile::matchStartType() {

case URX_JMPX:

loc++; // Except for extra operand on URX_JMPX, same as URX_JMP.

+ U_FALLTHROUGH;

case URX_JMP:

{

int32_t jmpDest = URX_VAL(op);

@@ -3261,6 +3265,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {

case URX_JMPX:

loc++; // URX_JMPX has an extra operand, ignored here,

// otherwise processed identically to URX_JMP.

+ U_FALLTHROUGH;

case URX_JMP:

{

int32_t jmpDest = URX_VAL(op);

« no previous file with comments | « source/i18n/regexcmp.h ('k') | source/i18n/regexcst.h » ('j') | no next file with comments »