Index: source/common/unistr.cpp |
diff --git a/source/common/unistr.cpp b/source/common/unistr.cpp |
index f957c536e187ea388875d161e226ee438fe24fde..bdd58ecd267c2eea3ea18cca12f2972b2325a555 100644 |
--- a/source/common/unistr.cpp |
+++ b/source/common/unistr.cpp |
@@ -1,6 +1,8 @@ |
+// Copyright (C) 2016 and later: Unicode, Inc. and others. |
+// License & terms of use: http://www.unicode.org/copyright.html |
/* |
****************************************************************************** |
-* Copyright (C) 1999-2015, International Business Machines Corporation and |
+* Copyright (C) 1999-2016, International Business Machines Corporation and |
* others. All Rights Reserved. |
****************************************************************************** |
* |
@@ -82,7 +84,7 @@ us_arrayCopy(const UChar *src, int32_t srcStart, |
UChar *dst, int32_t dstStart, int32_t count) |
{ |
if(count>0) { |
- uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); |
+ uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src)); |
} |
} |
@@ -151,41 +153,39 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { |
if(count <= 0 || (uint32_t)c > 0x10ffff) { |
// just allocate and do not do anything else |
allocate(capacity); |
- } else { |
- // count > 0, allocate and fill the new string with count c's |
- int32_t unitCount = U16_LENGTH(c), length = count * unitCount; |
+ } else if(c <= 0xffff) { |
+ int32_t length = count; |
if(capacity < length) { |
capacity = length; |
} |
if(allocate(capacity)) { |
UChar *array = getArrayStart(); |
- int32_t i = 0; |
- |
- // fill the new string with c |
- if(unitCount == 1) { |
- // fill with length UChars |
- while(i < length) { |
- array[i++] = (UChar)c; |
- } |
- } else { |
- // get the code units for c |
- UChar units[U16_MAX_LENGTH]; |
- U16_APPEND_UNSAFE(units, i, c); |
- |
- // now it must be i==unitCount |
- i = 0; |
- |
- // for Unicode, unitCount can only be 1, 2, 3, or 4 |
- // 1 is handled above |
- while(i < length) { |
- int32_t unitIdx = 0; |
- while(unitIdx < unitCount) { |
- array[i++]=units[unitIdx++]; |
- } |
- } |
+ UChar unit = (UChar)c; |
+ for(int32_t i = 0; i < length; ++i) { |
+ array[i] = unit; |
+ } |
+ setLength(length); |
+ } |
+ } else { // supplementary code point, write surrogate pairs |
+ if(count > (INT32_MAX / 2)) { |
+ // We would get more than 2G UChars. |
+ allocate(capacity); |
+ return; |
+ } |
+ int32_t length = count * 2; |
+ if(capacity < length) { |
+ capacity = length; |
+ } |
+ if(allocate(capacity)) { |
+ UChar *array = getArrayStart(); |
+ UChar lead = U16_LEAD(c); |
+ UChar trail = U16_TRAIL(c); |
+ for(int32_t i = 0; i < length; i += 2) { |
+ array[i] = lead; |
+ array[i + 1] = trail; |
} |
+ setLength(length); |
} |
- setLength(length); |
} |
} |
@@ -342,33 +342,60 @@ UnicodeString::clone() const { |
// array allocation |
//======================================== |
+namespace { |
+ |
+const int32_t kGrowSize = 128; |
+ |
+// The number of bytes for one int32_t reference counter and capacity UChars |
+// must fit into a 32-bit size_t (at least when on a 32-bit platform). |
+// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(), |
+// and round up to a multiple of 16 bytes. |
+// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5. |
+// (With more complicated checks we could go up to 0x7ffffffd without rounding up, |
+// but that does not seem worth it.) |
+const int32_t kMaxCapacity = 0x7ffffff5; |
+ |
+int32_t getGrowCapacity(int32_t newLength) { |
+ int32_t growSize = (newLength >> 2) + kGrowSize; |
+ if(growSize <= (kMaxCapacity - newLength)) { |
+ return newLength + growSize; |
+ } else { |
+ return kMaxCapacity; |
+ } |
+} |
+ |
+} // namespace |
+ |
UBool |
UnicodeString::allocate(int32_t capacity) { |
if(capacity <= US_STACKBUF_SIZE) { |
fUnion.fFields.fLengthAndFlags = kShortString; |
- } else { |
- // count bytes for the refCounter and the string capacity, and |
- // round up to a multiple of 16; then divide by 4 and allocate int32_t's |
- // to be safely aligned for the refCount |
- // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() |
- int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); |
- int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); |
- if(array != 0) { |
+ return TRUE; |
+ } |
+ if(capacity <= kMaxCapacity) { |
+ ++capacity; // for the NUL |
+ // Switch to size_t which is unsigned so that we can allocate up to 4GB. |
+ // Reference counter + UChars. |
+ size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR; |
+ // Round up to a multiple of 16. |
+ numBytes = (numBytes + 15) & ~15; |
+ int32_t *array = (int32_t *) uprv_malloc(numBytes); |
+ if(array != NULL) { |
// set initial refCount and point behind the refCount |
*array++ = 1; |
+ numBytes -= sizeof(int32_t); |
// have fArray point to the first UChar |
fUnion.fFields.fArray = (UChar *)array; |
- fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); |
+ fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR); |
fUnion.fFields.fLengthAndFlags = kLongString; |
- } else { |
- fUnion.fFields.fLengthAndFlags = kIsBogus; |
- fUnion.fFields.fArray = 0; |
- fUnion.fFields.fCapacity = 0; |
- return FALSE; |
+ return TRUE; |
} |
} |
- return TRUE; |
+ fUnion.fFields.fLengthAndFlags = kIsBogus; |
+ fUnion.fFields.fArray = 0; |
+ fUnion.fFields.fCapacity = 0; |
+ return FALSE; |
} |
//======================================== |
@@ -415,7 +442,7 @@ UnicodeString::~UnicodeString() |
// Factory methods |
//======================================== |
-UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { |
+UnicodeString UnicodeString::fromUTF8(StringPiece utf8) { |
UnicodeString result; |
result.setToUTF8(utf8); |
return result; |
@@ -521,15 +548,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { |
} |
// else if(!fastCopy) fall through to case kWritableAlias |
// -> allocate a new buffer and copy the contents |
+ U_FALLTHROUGH; |
case kWritableAlias: { |
// src is a writable alias; we make a copy of that instead |
int32_t srcLength = src.length(); |
if(allocate(srcLength)) { |
- uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); |
+ u_memcpy(getArrayStart(), src.getArrayStart(), srcLength); |
setLength(srcLength); |
break; |
} |
// if there is not enough memory, then fall through to setting to bogus |
+ U_FALLTHROUGH; |
} |
default: |
// if src is bogus, set ourselves to bogus |
@@ -853,7 +882,7 @@ UnicodeString::extract(UChar *dest, int32_t destCapacity, |
} else { |
const UChar *array = getArrayStart(); |
if(len>0 && len<=destCapacity && array!=dest) { |
- uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); |
+ u_memcpy(dest, array, len); |
} |
return u_terminateUChars(dest, destCapacity, len, &errorCode); |
} |
@@ -1215,7 +1244,7 @@ UnicodeString::getTerminatedBuffer() { |
return array; |
} |
} |
- if(cloneArrayIfNeeded(len+1)) { |
+ if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) { |
array = getArrayStart(); |
array[len] = 0; |
return array; |
@@ -1297,7 +1326,7 @@ UnicodeString::setTo(UChar *buffer, |
return *this; |
} |
-UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { |
+UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) { |
unBogus(); |
int32_t length = utf8.length(); |
int32_t capacity; |
@@ -1426,8 +1455,14 @@ UnicodeString::doReplace(int32_t start, |
// pin the indices to legal values |
pinIndices(start, length); |
- // calculate the size of the string after the replace |
- int32_t newLength = oldLength - length + srcLength; |
+ // Calculate the size of the string after the replace. |
+ // Avoid int32_t overflow. |
+ int32_t newLength = oldLength - length; |
+ if(srcLength > (INT32_MAX - newLength)) { |
+ setToBogus(); |
+ return *this; |
+ } |
+ newLength += srcLength; |
// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; |
// therefore we need to keep the current fArray |
@@ -1444,7 +1479,7 @@ UnicodeString::doReplace(int32_t start, |
// clone our array and allocate a bigger array if needed |
int32_t *bufferToDelete = 0; |
- if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, |
+ if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength), |
FALSE, &bufferToDelete) |
) { |
return *this; |
@@ -1511,7 +1546,7 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng |
int32_t newLength = oldLength + srcLength; |
// optimize append() onto a large-enough, owned string |
if((newLength <= getCapacity() && isBufferWritable()) || |
- cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize)) { |
+ cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { |
UChar *newArray = getArrayStart(); |
// Do not copy characters when |
// UChar *buffer=str.getAppendBuffer(...); |
@@ -1859,7 +1894,9 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, |
return NULL; |
} |
int32_t oldLength = str.length(); |
- if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { |
+ if(minCapacity <= (kMaxCapacity - oldLength) && |
+ desiredCapacityHint <= (kMaxCapacity - oldLength) && |
+ str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { |
*resultCapacity = str.getCapacity() - oldLength; |
return str.getArrayStart() + oldLength; |
} |