| Index: source/common/rbbi.cpp
 | 
| diff --git a/source/common/rbbi.cpp b/source/common/rbbi.cpp
 | 
| index 19494af26a564a38909aac4af915be6d459f8b9c..2680bf216c789e9f9ee761b7fc939c6b847b6933 100644
 | 
| --- a/source/common/rbbi.cpp
 | 
| +++ b/source/common/rbbi.cpp
 | 
| @@ -1,6 +1,8 @@
 | 
| +// Copyright (C) 2016 and later: Unicode, Inc. and others.
 | 
| +// License & terms of use: http://www.unicode.org/copyright.html
 | 
|  /*
 | 
|  ***************************************************************************
 | 
| -*   Copyright (C) 1999-2014 International Business Machines Corporation
 | 
| +*   Copyright (C) 1999-2016 International Business Machines Corporation
 | 
|  *   and others. All rights reserved.
 | 
|  ***************************************************************************
 | 
|  */
 | 
| @@ -72,21 +74,6 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
 | 
|      }
 | 
|  }
 | 
|  
 | 
| -/**
 | 
| - * Same as above but does not adopt memory
 | 
| - */
 | 
| -RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
 | 
| -{
 | 
| -    init();
 | 
| -    fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); // status checked in constructor
 | 
| -    if (U_FAILURE(status)) {return;}
 | 
| -    if(fData == 0) {
 | 
| -        status = U_MEMORY_ALLOCATION_ERROR;
 | 
| -        return;
 | 
| -    }
 | 
| -}
 | 
| -
 | 
| -
 | 
|  //
 | 
|  //  Construct from precompiled binary rules (tables).  This constructor is public API,
 | 
|  //  taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
 | 
| @@ -715,7 +702,7 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
 | 
|      // Move requested offset to a code point start. It might be on a trail surrogate,
 | 
|      // or on a trail byte if the input is UTF-8.
 | 
|      utext_setNativeIndex(fText, offset);
 | 
| -    offset = utext_getNativeIndex(fText);
 | 
| +    offset = (int32_t)utext_getNativeIndex(fText);
 | 
|  
 | 
|      // if we have cached break positions and offset is in the range
 | 
|      // covered by them, use them
 | 
| @@ -826,7 +813,7 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
 | 
|      // Move requested offset to a code point start. It might be on a trail surrogate,
 | 
|      // or on a trail byte if the input is UTF-8.
 | 
|      utext_setNativeIndex(fText, offset);
 | 
| -    offset = utext_getNativeIndex(fText);
 | 
| +    offset = (int32_t)utext_getNativeIndex(fText);
 | 
|  
 | 
|      // if we have cached break positions and offset is in the range
 | 
|      // covered by them, use them
 | 
| @@ -983,6 +970,54 @@ enum RBBIRunMode {
 | 
|  };
 | 
|  
 | 
|  
 | 
| +// Map from look-ahead break states (corresponds to rules) to boundary positions.
 | 
| +// Allows multiple lookahead break rules to be in flight at the same time.
 | 
| +//
 | 
| +// This is a temporary approach for ICU 57. A better fix is to make the look-ahead numbers
 | 
| +// in the state table be sequential, then we can just index an array. And the
 | 
| +// table could also tell us in advance how big that array needs to be.
 | 
| +//
 | 
| +// Before ICU 57 there was just a single simple variable for a look-ahead match that
 | 
| +// was in progress. Two rules at once did not work.
 | 
| +
 | 
| +static const int32_t kMaxLookaheads = 8;
 | 
| +struct LookAheadResults {
 | 
| +    int32_t    fUsedSlotLimit;
 | 
| +    int32_t    fPositions[8];
 | 
| +    int16_t    fKeys[8];
 | 
| +
 | 
| +    LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {};
 | 
| +
 | 
| +    int32_t getPosition(int16_t key) {
 | 
| +        for (int32_t i=0; i<fUsedSlotLimit; ++i) {
 | 
| +            if (fKeys[i] == key) {
 | 
| +                return fPositions[i];
 | 
| +            }
 | 
| +        }
 | 
| +        U_ASSERT(FALSE);
 | 
| +        return -1;
 | 
| +    }
 | 
| +
 | 
| +    void setPosition(int16_t key, int32_t position) {
 | 
| +        int32_t i;
 | 
| +        for (i=0; i<fUsedSlotLimit; ++i) {
 | 
| +            if (fKeys[i] == key) {
 | 
| +                fPositions[i] = position;
 | 
| +                return;
 | 
| +            }
 | 
| +        }
 | 
| +        if (i >= kMaxLookaheads) {
 | 
| +            U_ASSERT(FALSE);
 | 
| +            i = kMaxLookaheads - 1;
 | 
| +        }
 | 
| +        fKeys[i] = key;
 | 
| +        fPositions[i] = position;
 | 
| +        U_ASSERT(fUsedSlotLimit == i);
 | 
| +        fUsedSlotLimit = i + 1;
 | 
| +    }
 | 
| +};
 | 
| +
 | 
| +
 | 
|  //-----------------------------------------------------------------------------------
 | 
|  //
 | 
|  //  handleNext(stateTable)
 | 
| @@ -1000,14 +1035,11 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
 | 
|      
 | 
|      RBBIStateTableRow  *row;
 | 
|      UChar32             c;
 | 
| -    int32_t             lookaheadStatus = 0;
 | 
| -    int32_t             lookaheadTagIdx = 0;
 | 
| -    int32_t             result          = 0;
 | 
| -    int32_t             initialPosition = 0;
 | 
| -    int32_t             lookaheadResult = 0;
 | 
| -    UBool               lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
 | 
| -    const char         *tableData       = statetable->fTableData;
 | 
| -    uint32_t            tableRowLen     = statetable->fRowLen;
 | 
| +    LookAheadResults    lookAheadMatches;
 | 
| +    int32_t             result             = 0;
 | 
| +    int32_t             initialPosition    = 0;
 | 
| +    const char         *tableData          = statetable->fTableData;
 | 
| +    uint32_t            tableRowLen        = statetable->fRowLen;
 | 
|  
 | 
|      #ifdef RBBI_DEBUG
 | 
|          if (fTrace) {
 | 
| @@ -1050,14 +1082,6 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
 | 
|                  // We have already run the loop one last time with the 
 | 
|                  //   character set to the psueudo {eof} value.  Now it is time
 | 
|                  //   to unconditionally bail out.
 | 
| -                if (lookaheadResult > result) {
 | 
| -                    // We ran off the end of the string with a pending look-ahead match.
 | 
| -                    // Treat this as if the look-ahead condition had been met, and return
 | 
| -                    //  the match at the / position from the look-ahead rule.
 | 
| -                    result               = lookaheadResult;
 | 
| -                    fLastRuleStatusIndex = lookaheadTagIdx;
 | 
| -                    lookaheadStatus = 0;
 | 
| -                } 
 | 
|                  break;
 | 
|              }
 | 
|              // Run the loop one last time with the fake end-of-input character category.
 | 
| @@ -1123,38 +1147,23 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
 | 
|              fLastRuleStatusIndex = row->fTagIdx;   // Remember the break status (tag) values.
 | 
|          }
 | 
|  
 | 
| -        if (row->fLookAhead != 0) {
 | 
| -            if (lookaheadStatus != 0
 | 
| -                && row->fAccepting == lookaheadStatus) {
 | 
| -                // Lookahead match is completed.  
 | 
| -                result               = lookaheadResult;
 | 
| -                fLastRuleStatusIndex = lookaheadTagIdx;
 | 
| -                lookaheadStatus      = 0;
 | 
| -                // TODO:  make a standalone hard break in a rule work.
 | 
| -                if (lookAheadHardBreak) {
 | 
| -                    UTEXT_SETNATIVEINDEX(fText, result);
 | 
| -                    return result;
 | 
| -                }
 | 
| -                // Look-ahead completed, but other rules may match further.  Continue on
 | 
| -                //  TODO:  junk this feature?  I don't think it's used anywhwere.
 | 
| -                goto continueOn;
 | 
| +        int16_t completedRule = row->fAccepting;
 | 
| +        if (completedRule > 0) {
 | 
| +            // Lookahead match is completed.  
 | 
| +            int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
 | 
| +            if (lookaheadResult >= 0) {
 | 
| +                fLastRuleStatusIndex = row->fTagIdx;
 | 
| +                UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
 | 
| +                return lookaheadResult;
 | 
|              }
 | 
| -
 | 
| -            int32_t  r = (int32_t)UTEXT_GETNATIVEINDEX(fText);
 | 
| -            lookaheadResult = r;
 | 
| -            lookaheadStatus = row->fLookAhead;
 | 
| -            lookaheadTagIdx = row->fTagIdx;
 | 
| -            goto continueOn;
 | 
|          }
 | 
| -
 | 
| -
 | 
| -        if (row->fAccepting != 0) {
 | 
| -            // Because this is an accepting state, any in-progress look-ahead match
 | 
| -            //   is no longer relavant.  Clear out the pending lookahead status.
 | 
| -            lookaheadStatus = 0;           // clear out any pending look-ahead match.
 | 
| +        int16_t rule = row->fLookAhead;
 | 
| +        if (rule != 0) {
 | 
| +            // At the position of a '/' in a look-ahead match. Record it.
 | 
| +            int32_t  pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
 | 
| +            lookAheadMatches.setPosition(rule, pos);
 | 
|          }
 | 
|  
 | 
| -continueOn:
 | 
|          if (state == STOP_STATE) {
 | 
|              // This is the normal exit from the lookup state machine.
 | 
|              // We have advanced through the string until it is certain that no
 | 
| @@ -1216,11 +1225,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
 | 
|      RBBIRunMode         mode;
 | 
|      RBBIStateTableRow  *row;
 | 
|      UChar32             c;
 | 
| -    int32_t             lookaheadStatus = 0;
 | 
| +    LookAheadResults    lookAheadMatches;
 | 
|      int32_t             result          = 0;
 | 
|      int32_t             initialPosition = 0;
 | 
| -    int32_t             lookaheadResult = 0;
 | 
| -    UBool               lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
 | 
|  
 | 
|      #ifdef RBBI_DEBUG
 | 
|          if (fTrace) {
 | 
| @@ -1266,13 +1273,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
 | 
|                  // We have already run the loop one last time with the 
 | 
|                  //   character set to the psueudo {eof} value.  Now it is time
 | 
|                  //   to unconditionally bail out.
 | 
| -                if (lookaheadResult < result) {
 | 
| -                    // We ran off the end of the string with a pending look-ahead match.
 | 
| -                    // Treat this as if the look-ahead condition had been met, and return
 | 
| -                    //  the match at the / position from the look-ahead rule.
 | 
| -                    result               = lookaheadResult;
 | 
| -                    lookaheadStatus = 0;
 | 
| -                } else if (result == initialPosition) {
 | 
| +                if (result == initialPosition) {
 | 
|                      // Ran off start, no match found.
 | 
|                      // move one index one (towards the start, since we are doing a previous())
 | 
|                      UTEXT_SETNATIVEINDEX(fText, initialPosition);
 | 
| @@ -1338,36 +1339,22 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
 | 
|              result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
 | 
|          }
 | 
|  
 | 
| -        if (row->fLookAhead != 0) {
 | 
| -            if (lookaheadStatus != 0
 | 
| -                && row->fAccepting == lookaheadStatus) {
 | 
| -                // Lookahead match is completed.  
 | 
| -                result               = lookaheadResult;
 | 
| -                lookaheadStatus      = 0;
 | 
| -                // TODO:  make a standalone hard break in a rule work.
 | 
| -                if (lookAheadHardBreak) {
 | 
| -                    UTEXT_SETNATIVEINDEX(fText, result);
 | 
| -                    return result;
 | 
| -                }
 | 
| -                // Look-ahead completed, but other rules may match further.  Continue on
 | 
| -                //  TODO:  junk this feature?  I don't think it's used anywhwere.
 | 
| -                goto continueOn;
 | 
| +        int16_t completedRule = row->fAccepting;
 | 
| +        if (completedRule > 0) {
 | 
| +            // Lookahead match is completed.  
 | 
| +            int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
 | 
| +            if (lookaheadResult >= 0) {
 | 
| +                UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
 | 
| +                return lookaheadResult;
 | 
|              }
 | 
| -
 | 
| -            int32_t  r = (int32_t)UTEXT_GETNATIVEINDEX(fText);
 | 
| -            lookaheadResult = r;
 | 
| -            lookaheadStatus = row->fLookAhead;
 | 
| -            goto continueOn;
 | 
|          }
 | 
| -
 | 
| -
 | 
| -        if (row->fAccepting != 0) {
 | 
| -            // Because this is an accepting state, any in-progress look-ahead match
 | 
| -            //   is no longer relavant.  Clear out the pending lookahead status.
 | 
| -            lookaheadStatus = 0;    
 | 
| +        int16_t rule = row->fLookAhead;
 | 
| +        if (rule != 0) {
 | 
| +            // At the position of a '/' in a look-ahead match. Record it.
 | 
| +            int32_t  pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
 | 
| +            lookAheadMatches.setPosition(rule, pos);
 | 
|          }
 | 
|  
 | 
| -continueOn:
 | 
|          if (state == STOP_STATE) {
 | 
|              // This is the normal exit from the lookup state machine.
 | 
|              // We have advanced through the string until it is certain that no
 | 
| 
 |