| Index: source/common/rbbiscan.cpp
|
| diff --git a/source/common/rbbiscan.cpp b/source/common/rbbiscan.cpp
|
| index 1dc6b704d01094c7233e9d3d5f3b19566b7d5b3e..df30f2c204acfbf5e1c43627d30a4b95459d8ef3 100644
|
| --- a/source/common/rbbiscan.cpp
|
| +++ b/source/common/rbbiscan.cpp
|
| @@ -1,7 +1,9 @@
|
| +// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
| +// License & terms of use: http://www.unicode.org/copyright.html
|
| //
|
| // file: rbbiscan.cpp
|
| //
|
| -// Copyright (C) 2002-2015, International Business Machines Corporation and others.
|
| +// Copyright (C) 2002-2016, International Business Machines Corporation and others.
|
| // All Rights Reserved.
|
| //
|
| // This file contains the Rule Based Break Iterator Rule Builder functions for
|
| @@ -87,24 +89,27 @@ U_NAMESPACE_BEGIN
|
| RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
|
| {
|
| fRB = rb;
|
| + fScanIndex = 0;
|
| + fNextIndex = 0;
|
| + fQuoteMode = FALSE;
|
| + fLineNum = 1;
|
| + fCharNum = 0;
|
| + fLastChar = 0;
|
| +
|
| + fStateTable = NULL;
|
| + fStack[0] = 0;
|
| fStackPtr = 0;
|
| - fStack[fStackPtr] = 0;
|
| - fNodeStackPtr = 0;
|
| - fRuleNum = 0;
|
| fNodeStack[0] = NULL;
|
| -
|
| - fSymbolTable = NULL;
|
| - fSetTable = NULL;
|
| -
|
| - fScanIndex = 0;
|
| - fNextIndex = 0;
|
| + fNodeStackPtr = 0;
|
|
|
| fReverseRule = FALSE;
|
| fLookAheadRule = FALSE;
|
| + fNoChainInRule = FALSE;
|
|
|
| - fLineNum = 1;
|
| - fCharNum = 0;
|
| - fQuoteMode = FALSE;
|
| + fSymbolTable = NULL;
|
| + fSetTable = NULL;
|
| + fRuleNum = 0;
|
| + fOptionStart = 0;
|
|
|
| // Do not check status until after all critical fields are sufficiently initialized
|
| // that the destructor can run cleanly.
|
| @@ -205,6 +210,12 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
| break;
|
|
|
|
|
| + case doNoChain:
|
| + // Scanned a '^' while on the rule start state.
|
| + fNoChainInRule = TRUE;
|
| + break;
|
| +
|
| +
|
| case doExprOrOperator:
|
| {
|
| fixOpStack(RBBINode::precOpCat);
|
| @@ -318,11 +329,11 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
| if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");}
|
| #endif
|
| U_ASSERT(fNodeStackPtr == 1);
|
| + RBBINode *thisRule = fNodeStack[fNodeStackPtr];
|
|
|
| // If this rule includes a look-ahead '/', add a endMark node to the
|
| // expression tree.
|
| if (fLookAheadRule) {
|
| - RBBINode *thisRule = fNodeStack[fNodeStackPtr];
|
| RBBINode *endNode = pushNewNode(RBBINode::endMark);
|
| RBBINode *catNode = pushNewNode(RBBINode::opCat);
|
| if (U_FAILURE(*fRB->fStatus)) {
|
| @@ -334,8 +345,24 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
| fNodeStack[fNodeStackPtr] = catNode;
|
| endNode->fVal = fRuleNum;
|
| endNode->fLookAheadEnd = TRUE;
|
| + thisRule = catNode;
|
| +
|
| + // TODO: Disable chaining out of look-ahead (hard break) rules.
|
| + // The break on rule match is forced, so there is no point in building up
|
| + // the state table to chain into another rule for a longer match.
|
| }
|
|
|
| + // Mark this node as being the root of a rule.
|
| + thisRule->fRuleRoot = TRUE;
|
| +
|
| + // Flag if chaining into this rule is wanted.
|
| + //
|
| + if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain
|
| + !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule
|
| + thisRule->fChainIn = TRUE;
|
| + }
|
| +
|
| +
|
| // All rule expressions are ORed together.
|
| // The ';' that terminates an expression really just functions as a '|' with
|
| // a low operator prededence.
|
| @@ -372,6 +399,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
| }
|
| fReverseRule = FALSE; // in preparation for the next rule.
|
| fLookAheadRule = FALSE;
|
| + fNoChainInRule = FALSE;
|
| fNodeStackPtr = 0;
|
| }
|
| break;
|
| @@ -994,7 +1022,7 @@ void RBBIRuleScanner::parse() {
|
|
|
| for (;;) {
|
| #ifdef RBBI_DEBUG
|
| - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");}
|
| + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);}
|
| #endif
|
| if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) {
|
| // Table row specified an individual character, not a set, and
|
| @@ -1077,6 +1105,17 @@ void RBBIRuleScanner::parse() {
|
|
|
| }
|
|
|
| + if (U_FAILURE(*fRB->fStatus)) {
|
| + return;
|
| + }
|
| +
|
| + // If there are no forward rules set an error.
|
| + //
|
| + if (fRB->fForwardTree == NULL) {
|
| + error(U_BRK_RULE_SYNTAX);
|
| + return;
|
| + }
|
| +
|
| //
|
| // If there were NO user specified reverse rules, set up the equivalent of ".*;"
|
| //
|
| @@ -1100,16 +1139,15 @@ void RBBIRuleScanner::parse() {
|
| //
|
| #ifdef RBBI_DEBUG
|
| if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();}
|
| - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree"))
|
| - {
|
| + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) {
|
| RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n");
|
| - fRB->fForwardTree->printTree(TRUE);
|
| + RBBINode::printTree(fRB->fForwardTree, TRUE);
|
| RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n");
|
| - fRB->fReverseTree->printTree(TRUE);
|
| + RBBINode::printTree(fRB->fReverseTree, TRUE);
|
| RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n");
|
| - fRB->fSafeFwdTree->printTree(TRUE);
|
| + RBBINode::printTree(fRB->fSafeFwdTree, TRUE);
|
| RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n");
|
| - fRB->fSafeRevTree->printTree(TRUE);
|
| + RBBINode::printTree(fRB->fSafeRevTree, TRUE);
|
| }
|
| #endif
|
| }
|
| @@ -1124,7 +1162,7 @@ void RBBIRuleScanner::parse() {
|
| void RBBIRuleScanner::printNodeStack(const char *title) {
|
| int i;
|
| RBBIDebugPrintf("%s. Dumping node stack...\n", title);
|
| - for (i=fNodeStackPtr; i>0; i--) {fNodeStack[i]->printTree(TRUE);}
|
| + for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);}
|
| }
|
| #endif
|
|
|
|
|