Index: source/i18n/regexcmp.cpp |
diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp |
index d56dce37ba24c9063bc117f7fa99cdeeb3bbe715..2657cf37cbecca17b4886c5960287e9fa10f2a99 100644 |
--- a/source/i18n/regexcmp.cpp |
+++ b/source/i18n/regexcmp.cpp |
@@ -1,7 +1,9 @@ |
+// Copyright (C) 2016 and later: Unicode, Inc. and others. |
+// License & terms of use: http://www.unicode.org/copyright.html |
// |
// file: regexcmp.cpp |
// |
-// Copyright (C) 2002-2015 International Business Machines Corporation and others. |
+// Copyright (C) 2002-2016 International Business Machines Corporation and others. |
// All Rights Reserved. |
// |
// This file contains the ICU regular expression compiler, which is responsible |
@@ -1753,8 +1755,6 @@ UBool RegexCompile::doParseActions(int32_t action) |
case doSetNamedRange: |
// We have scanned literal-\N{CHAR NAME}. Add the range to the set. |
// The left character is already in the set, and is saved in fLastSetLiteral. |
- // Nonetheless, check if |fLastSetLiteral| is indeed set because it's |
- // not set in some edge cases. |
// The right side needs to be picked up, the scan is at the 'N'. |
// Lower Limit > Upper limit being an error matches both Java |
// and ICU UnicodeSet behavior. |
@@ -1825,12 +1825,11 @@ UBool RegexCompile::doParseActions(int32_t action) |
case doSetRange: |
// We have scanned literal-literal. Add the range to the set. |
// The left character is already in the set, and is saved in fLastSetLiteral. |
- // Nonetheless, check if |fLastSetLiteral| is indeed set because it's |
- // not set in some edge cases. |
// The right side is the current character. |
// Lower Limit > Upper limit being an error matches both Java |
// and ICU UnicodeSet behavior. |
{ |
+ |
if (fLastSetLiteral == U_SENTINEL || fLastSetLiteral > fC.fChar) { |
error(U_REGEX_INVALID_RANGE); |
} |
@@ -2606,7 +2605,11 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh |
// End of machine generated data. |
- if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) { |
+ if (c < UCHAR_MIN_VALUE || c > UCHAR_MAX_VALUE) { |
+ // This function should never be called with an invalid input character. |
+ U_ASSERT(FALSE); |
+ starterChars->clear(); |
+ } else if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) { |
UChar32 caseFoldedC = u_foldCase(c, U_FOLD_CASE_DEFAULT); |
starterChars->set(caseFoldedC, caseFoldedC); |
@@ -2899,6 +2902,7 @@ void RegexCompile::matchStartType() { |
case URX_JMPX: |
loc++; // Except for extra operand on URX_JMPX, same as URX_JMP. |
+ U_FALLTHROUGH; |
case URX_JMP: |
{ |
int32_t jmpDest = URX_VAL(op); |
@@ -3261,6 +3265,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { |
case URX_JMPX: |
loc++; // URX_JMPX has an extra operand, ignored here, |
// otherwise processed identically to URX_JMP. |
+ U_FALLTHROUGH; |
case URX_JMP: |
{ |
int32_t jmpDest = URX_VAL(op); |