| Index: source/common/unistr.cpp
|
| diff --git a/source/common/unistr.cpp b/source/common/unistr.cpp
|
| index f957c536e187ea388875d161e226ee438fe24fde..bdd58ecd267c2eea3ea18cca12f2972b2325a555 100644
|
| --- a/source/common/unistr.cpp
|
| +++ b/source/common/unistr.cpp
|
| @@ -1,6 +1,8 @@
|
| +// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
| +// License & terms of use: http://www.unicode.org/copyright.html
|
| /*
|
| ******************************************************************************
|
| -* Copyright (C) 1999-2015, International Business Machines Corporation and
|
| +* Copyright (C) 1999-2016, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ******************************************************************************
|
| *
|
| @@ -82,7 +84,7 @@ us_arrayCopy(const UChar *src, int32_t srcStart,
|
| UChar *dst, int32_t dstStart, int32_t count)
|
| {
|
| if(count>0) {
|
| - uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
|
| + uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
|
| }
|
| }
|
|
|
| @@ -151,41 +153,39 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
|
| if(count <= 0 || (uint32_t)c > 0x10ffff) {
|
| // just allocate and do not do anything else
|
| allocate(capacity);
|
| - } else {
|
| - // count > 0, allocate and fill the new string with count c's
|
| - int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
|
| + } else if(c <= 0xffff) {
|
| + int32_t length = count;
|
| if(capacity < length) {
|
| capacity = length;
|
| }
|
| if(allocate(capacity)) {
|
| UChar *array = getArrayStart();
|
| - int32_t i = 0;
|
| -
|
| - // fill the new string with c
|
| - if(unitCount == 1) {
|
| - // fill with length UChars
|
| - while(i < length) {
|
| - array[i++] = (UChar)c;
|
| - }
|
| - } else {
|
| - // get the code units for c
|
| - UChar units[U16_MAX_LENGTH];
|
| - U16_APPEND_UNSAFE(units, i, c);
|
| -
|
| - // now it must be i==unitCount
|
| - i = 0;
|
| -
|
| - // for Unicode, unitCount can only be 1, 2, 3, or 4
|
| - // 1 is handled above
|
| - while(i < length) {
|
| - int32_t unitIdx = 0;
|
| - while(unitIdx < unitCount) {
|
| - array[i++]=units[unitIdx++];
|
| - }
|
| - }
|
| + UChar unit = (UChar)c;
|
| + for(int32_t i = 0; i < length; ++i) {
|
| + array[i] = unit;
|
| + }
|
| + setLength(length);
|
| + }
|
| + } else { // supplementary code point, write surrogate pairs
|
| + if(count > (INT32_MAX / 2)) {
|
| + // We would get more than 2G UChars.
|
| + allocate(capacity);
|
| + return;
|
| + }
|
| + int32_t length = count * 2;
|
| + if(capacity < length) {
|
| + capacity = length;
|
| + }
|
| + if(allocate(capacity)) {
|
| + UChar *array = getArrayStart();
|
| + UChar lead = U16_LEAD(c);
|
| + UChar trail = U16_TRAIL(c);
|
| + for(int32_t i = 0; i < length; i += 2) {
|
| + array[i] = lead;
|
| + array[i + 1] = trail;
|
| }
|
| + setLength(length);
|
| }
|
| - setLength(length);
|
| }
|
| }
|
|
|
| @@ -342,33 +342,60 @@ UnicodeString::clone() const {
|
| // array allocation
|
| //========================================
|
|
|
| +namespace {
|
| +
|
| +const int32_t kGrowSize = 128;
|
| +
|
| +// The number of bytes for one int32_t reference counter and capacity UChars
|
| +// must fit into a 32-bit size_t (at least when on a 32-bit platform).
|
| +// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
|
| +// and round up to a multiple of 16 bytes.
|
| +// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
|
| +// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
|
| +// but that does not seem worth it.)
|
| +const int32_t kMaxCapacity = 0x7ffffff5;
|
| +
|
| +int32_t getGrowCapacity(int32_t newLength) {
|
| + int32_t growSize = (newLength >> 2) + kGrowSize;
|
| + if(growSize <= (kMaxCapacity - newLength)) {
|
| + return newLength + growSize;
|
| + } else {
|
| + return kMaxCapacity;
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| UBool
|
| UnicodeString::allocate(int32_t capacity) {
|
| if(capacity <= US_STACKBUF_SIZE) {
|
| fUnion.fFields.fLengthAndFlags = kShortString;
|
| - } else {
|
| - // count bytes for the refCounter and the string capacity, and
|
| - // round up to a multiple of 16; then divide by 4 and allocate int32_t's
|
| - // to be safely aligned for the refCount
|
| - // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
|
| - int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
|
| - int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
|
| - if(array != 0) {
|
| + return TRUE;
|
| + }
|
| + if(capacity <= kMaxCapacity) {
|
| + ++capacity; // for the NUL
|
| + // Switch to size_t which is unsigned so that we can allocate up to 4GB.
|
| + // Reference counter + UChars.
|
| + size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
|
| + // Round up to a multiple of 16.
|
| + numBytes = (numBytes + 15) & ~15;
|
| + int32_t *array = (int32_t *) uprv_malloc(numBytes);
|
| + if(array != NULL) {
|
| // set initial refCount and point behind the refCount
|
| *array++ = 1;
|
| + numBytes -= sizeof(int32_t);
|
|
|
| // have fArray point to the first UChar
|
| fUnion.fFields.fArray = (UChar *)array;
|
| - fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
|
| + fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
|
| fUnion.fFields.fLengthAndFlags = kLongString;
|
| - } else {
|
| - fUnion.fFields.fLengthAndFlags = kIsBogus;
|
| - fUnion.fFields.fArray = 0;
|
| - fUnion.fFields.fCapacity = 0;
|
| - return FALSE;
|
| + return TRUE;
|
| }
|
| }
|
| - return TRUE;
|
| + fUnion.fFields.fLengthAndFlags = kIsBogus;
|
| + fUnion.fFields.fArray = 0;
|
| + fUnion.fFields.fCapacity = 0;
|
| + return FALSE;
|
| }
|
|
|
| //========================================
|
| @@ -415,7 +442,7 @@ UnicodeString::~UnicodeString()
|
| // Factory methods
|
| //========================================
|
|
|
| -UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
|
| +UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
|
| UnicodeString result;
|
| result.setToUTF8(utf8);
|
| return result;
|
| @@ -521,15 +548,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
|
| }
|
| // else if(!fastCopy) fall through to case kWritableAlias
|
| // -> allocate a new buffer and copy the contents
|
| + U_FALLTHROUGH;
|
| case kWritableAlias: {
|
| // src is a writable alias; we make a copy of that instead
|
| int32_t srcLength = src.length();
|
| if(allocate(srcLength)) {
|
| - uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
|
| + u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
|
| setLength(srcLength);
|
| break;
|
| }
|
| // if there is not enough memory, then fall through to setting to bogus
|
| + U_FALLTHROUGH;
|
| }
|
| default:
|
| // if src is bogus, set ourselves to bogus
|
| @@ -853,7 +882,7 @@ UnicodeString::extract(UChar *dest, int32_t destCapacity,
|
| } else {
|
| const UChar *array = getArrayStart();
|
| if(len>0 && len<=destCapacity && array!=dest) {
|
| - uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
|
| + u_memcpy(dest, array, len);
|
| }
|
| return u_terminateUChars(dest, destCapacity, len, &errorCode);
|
| }
|
| @@ -1215,7 +1244,7 @@ UnicodeString::getTerminatedBuffer() {
|
| return array;
|
| }
|
| }
|
| - if(cloneArrayIfNeeded(len+1)) {
|
| + if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
|
| array = getArrayStart();
|
| array[len] = 0;
|
| return array;
|
| @@ -1297,7 +1326,7 @@ UnicodeString::setTo(UChar *buffer,
|
| return *this;
|
| }
|
|
|
| -UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
|
| +UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
|
| unBogus();
|
| int32_t length = utf8.length();
|
| int32_t capacity;
|
| @@ -1426,8 +1455,14 @@ UnicodeString::doReplace(int32_t start,
|
| // pin the indices to legal values
|
| pinIndices(start, length);
|
|
|
| - // calculate the size of the string after the replace
|
| - int32_t newLength = oldLength - length + srcLength;
|
| + // Calculate the size of the string after the replace.
|
| + // Avoid int32_t overflow.
|
| + int32_t newLength = oldLength - length;
|
| + if(srcLength > (INT32_MAX - newLength)) {
|
| + setToBogus();
|
| + return *this;
|
| + }
|
| + newLength += srcLength;
|
|
|
| // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
|
| // therefore we need to keep the current fArray
|
| @@ -1444,7 +1479,7 @@ UnicodeString::doReplace(int32_t start,
|
|
|
| // clone our array and allocate a bigger array if needed
|
| int32_t *bufferToDelete = 0;
|
| - if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
|
| + if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
|
| FALSE, &bufferToDelete)
|
| ) {
|
| return *this;
|
| @@ -1511,7 +1546,7 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
|
| int32_t newLength = oldLength + srcLength;
|
| // optimize append() onto a large-enough, owned string
|
| if((newLength <= getCapacity() && isBufferWritable()) ||
|
| - cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize)) {
|
| + cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
|
| UChar *newArray = getArrayStart();
|
| // Do not copy characters when
|
| // UChar *buffer=str.getAppendBuffer(...);
|
| @@ -1859,7 +1894,9 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
|
| return NULL;
|
| }
|
| int32_t oldLength = str.length();
|
| - if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
|
| + if(minCapacity <= (kMaxCapacity - oldLength) &&
|
| + desiredCapacityHint <= (kMaxCapacity - oldLength) &&
|
| + str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
|
| *resultCapacity = str.getCapacity() - oldLength;
|
| return str.getArrayStart() + oldLength;
|
| }
|
|
|