Index: source/common/rbbiscan.cpp |
diff --git a/source/common/rbbiscan.cpp b/source/common/rbbiscan.cpp |
index 1dc6b704d01094c7233e9d3d5f3b19566b7d5b3e..df30f2c204acfbf5e1c43627d30a4b95459d8ef3 100644 |
--- a/source/common/rbbiscan.cpp |
+++ b/source/common/rbbiscan.cpp |
@@ -1,7 +1,9 @@ |
+// Copyright (C) 2016 and later: Unicode, Inc. and others. |
+// License & terms of use: http://www.unicode.org/copyright.html |
// |
// file: rbbiscan.cpp |
// |
-// Copyright (C) 2002-2015, International Business Machines Corporation and others. |
+// Copyright (C) 2002-2016, International Business Machines Corporation and others. |
// All Rights Reserved. |
// |
// This file contains the Rule Based Break Iterator Rule Builder functions for |
@@ -87,24 +89,27 @@ U_NAMESPACE_BEGIN |
RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) |
{ |
fRB = rb; |
+ fScanIndex = 0; |
+ fNextIndex = 0; |
+ fQuoteMode = FALSE; |
+ fLineNum = 1; |
+ fCharNum = 0; |
+ fLastChar = 0; |
+ |
+ fStateTable = NULL; |
+ fStack[0] = 0; |
fStackPtr = 0; |
- fStack[fStackPtr] = 0; |
- fNodeStackPtr = 0; |
- fRuleNum = 0; |
fNodeStack[0] = NULL; |
- |
- fSymbolTable = NULL; |
- fSetTable = NULL; |
- |
- fScanIndex = 0; |
- fNextIndex = 0; |
+ fNodeStackPtr = 0; |
fReverseRule = FALSE; |
fLookAheadRule = FALSE; |
+ fNoChainInRule = FALSE; |
- fLineNum = 1; |
- fCharNum = 0; |
- fQuoteMode = FALSE; |
+ fSymbolTable = NULL; |
+ fSetTable = NULL; |
+ fRuleNum = 0; |
+ fOptionStart = 0; |
// Do not check status until after all critical fields are sufficiently initialized |
// that the destructor can run cleanly. |
@@ -205,6 +210,12 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) |
break; |
+ case doNoChain: |
+ // Scanned a '^' while on the rule start state. |
+ fNoChainInRule = TRUE; |
+ break; |
+ |
+ |
case doExprOrOperator: |
{ |
fixOpStack(RBBINode::precOpCat); |
@@ -318,11 +329,11 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) |
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");} |
#endif |
U_ASSERT(fNodeStackPtr == 1); |
+ RBBINode *thisRule = fNodeStack[fNodeStackPtr]; |
// If this rule includes a look-ahead '/', add a endMark node to the |
// expression tree. |
if (fLookAheadRule) { |
- RBBINode *thisRule = fNodeStack[fNodeStackPtr]; |
RBBINode *endNode = pushNewNode(RBBINode::endMark); |
RBBINode *catNode = pushNewNode(RBBINode::opCat); |
if (U_FAILURE(*fRB->fStatus)) { |
@@ -334,8 +345,24 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) |
fNodeStack[fNodeStackPtr] = catNode; |
endNode->fVal = fRuleNum; |
endNode->fLookAheadEnd = TRUE; |
+ thisRule = catNode; |
+ |
+ // TODO: Disable chaining out of look-ahead (hard break) rules. |
+ // The break on rule match is forced, so there is no point in building up |
+ // the state table to chain into another rule for a longer match. |
} |
+ // Mark this node as being the root of a rule. |
+ thisRule->fRuleRoot = TRUE; |
+ |
+ // Flag if chaining into this rule is wanted. |
+ // |
+ if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain |
+ !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule |
+ thisRule->fChainIn = TRUE; |
+ } |
+ |
+ |
// All rule expressions are ORed together. |
// The ';' that terminates an expression really just functions as a '|' with |
// a low operator prededence. |
@@ -372,6 +399,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) |
} |
fReverseRule = FALSE; // in preparation for the next rule. |
fLookAheadRule = FALSE; |
+ fNoChainInRule = FALSE; |
fNodeStackPtr = 0; |
} |
break; |
@@ -994,7 +1022,7 @@ void RBBIRuleScanner::parse() { |
for (;;) { |
#ifdef RBBI_DEBUG |
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");} |
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);} |
#endif |
if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) { |
// Table row specified an individual character, not a set, and |
@@ -1077,6 +1105,17 @@ void RBBIRuleScanner::parse() { |
} |
+ if (U_FAILURE(*fRB->fStatus)) { |
+ return; |
+ } |
+ |
+ // If there are no forward rules set an error. |
+ // |
+ if (fRB->fForwardTree == NULL) { |
+ error(U_BRK_RULE_SYNTAX); |
+ return; |
+ } |
+ |
// |
// If there were NO user specified reverse rules, set up the equivalent of ".*;" |
// |
@@ -1100,16 +1139,15 @@ void RBBIRuleScanner::parse() { |
// |
#ifdef RBBI_DEBUG |
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();} |
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) |
- { |
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) { |
RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n"); |
- fRB->fForwardTree->printTree(TRUE); |
+ RBBINode::printTree(fRB->fForwardTree, TRUE); |
RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n"); |
- fRB->fReverseTree->printTree(TRUE); |
+ RBBINode::printTree(fRB->fReverseTree, TRUE); |
RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n"); |
- fRB->fSafeFwdTree->printTree(TRUE); |
+ RBBINode::printTree(fRB->fSafeFwdTree, TRUE); |
RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n"); |
- fRB->fSafeRevTree->printTree(TRUE); |
+ RBBINode::printTree(fRB->fSafeRevTree, TRUE); |
} |
#endif |
} |
@@ -1124,7 +1162,7 @@ void RBBIRuleScanner::parse() { |
void RBBIRuleScanner::printNodeStack(const char *title) { |
int i; |
RBBIDebugPrintf("%s. Dumping node stack...\n", title); |
- for (i=fNodeStackPtr; i>0; i--) {fNodeStack[i]->printTree(TRUE);} |
+ for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);} |
} |
#endif |