| Index: source/i18n/regexcmp.cpp
|
| diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp
|
| index d56dce37ba24c9063bc117f7fa99cdeeb3bbe715..2657cf37cbecca17b4886c5960287e9fa10f2a99 100644
|
| --- a/source/i18n/regexcmp.cpp
|
| +++ b/source/i18n/regexcmp.cpp
|
| @@ -1,7 +1,9 @@
|
| +// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
| +// License & terms of use: http://www.unicode.org/copyright.html
|
| //
|
| // file: regexcmp.cpp
|
| //
|
| -// Copyright (C) 2002-2015 International Business Machines Corporation and others.
|
| +// Copyright (C) 2002-2016 International Business Machines Corporation and others.
|
| // All Rights Reserved.
|
| //
|
| // This file contains the ICU regular expression compiler, which is responsible
|
| @@ -1753,8 +1755,6 @@ UBool RegexCompile::doParseActions(int32_t action)
|
| case doSetNamedRange:
|
| // We have scanned literal-\N{CHAR NAME}. Add the range to the set.
|
| // The left character is already in the set, and is saved in fLastSetLiteral.
|
| - // Nonetheless, check if |fLastSetLiteral| is indeed set because it's
|
| - // not set in some edge cases.
|
| // The right side needs to be picked up, the scan is at the 'N'.
|
| // Lower Limit > Upper limit being an error matches both Java
|
| // and ICU UnicodeSet behavior.
|
| @@ -1825,12 +1825,11 @@ UBool RegexCompile::doParseActions(int32_t action)
|
| case doSetRange:
|
| // We have scanned literal-literal. Add the range to the set.
|
| // The left character is already in the set, and is saved in fLastSetLiteral.
|
| - // Nonetheless, check if |fLastSetLiteral| is indeed set because it's
|
| - // not set in some edge cases.
|
| // The right side is the current character.
|
| // Lower Limit > Upper limit being an error matches both Java
|
| // and ICU UnicodeSet behavior.
|
| {
|
| +
|
| if (fLastSetLiteral == U_SENTINEL || fLastSetLiteral > fC.fChar) {
|
| error(U_REGEX_INVALID_RANGE);
|
| }
|
| @@ -2606,7 +2605,11 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh
|
|
|
| // End of machine generated data.
|
|
|
| - if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) {
|
| + if (c < UCHAR_MIN_VALUE || c > UCHAR_MAX_VALUE) {
|
| + // This function should never be called with an invalid input character.
|
| + U_ASSERT(FALSE);
|
| + starterChars->clear();
|
| + } else if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) {
|
| UChar32 caseFoldedC = u_foldCase(c, U_FOLD_CASE_DEFAULT);
|
| starterChars->set(caseFoldedC, caseFoldedC);
|
|
|
| @@ -2899,6 +2902,7 @@ void RegexCompile::matchStartType() {
|
|
|
| case URX_JMPX:
|
| loc++; // Except for extra operand on URX_JMPX, same as URX_JMP.
|
| + U_FALLTHROUGH;
|
| case URX_JMP:
|
| {
|
| int32_t jmpDest = URX_VAL(op);
|
| @@ -3261,6 +3265,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
| case URX_JMPX:
|
| loc++; // URX_JMPX has an extra operand, ignored here,
|
| // otherwise processed identically to URX_JMP.
|
| + U_FALLTHROUGH;
|
| case URX_JMP:
|
| {
|
| int32_t jmpDest = URX_VAL(op);
|
|
|