Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(814)

Unified Diff: source/common/unistr.cpp

Issue 2440913002: Update ICU to 58.1
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/common/unisetspan.cpp ('k') | source/common/unistr_case.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/common/unistr.cpp
diff --git a/source/common/unistr.cpp b/source/common/unistr.cpp
index f957c536e187ea388875d161e226ee438fe24fde..bdd58ecd267c2eea3ea18cca12f2972b2325a555 100644
--- a/source/common/unistr.cpp
+++ b/source/common/unistr.cpp
@@ -1,6 +1,8 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
-* Copyright (C) 1999-2015, International Business Machines Corporation and
+* Copyright (C) 1999-2016, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*
@@ -82,7 +84,7 @@ us_arrayCopy(const UChar *src, int32_t srcStart,
UChar *dst, int32_t dstStart, int32_t count)
{
if(count>0) {
- uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
+ uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
}
}
@@ -151,41 +153,39 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
if(count <= 0 || (uint32_t)c > 0x10ffff) {
// just allocate and do not do anything else
allocate(capacity);
- } else {
- // count > 0, allocate and fill the new string with count c's
- int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
+ } else if(c <= 0xffff) {
+ int32_t length = count;
if(capacity < length) {
capacity = length;
}
if(allocate(capacity)) {
UChar *array = getArrayStart();
- int32_t i = 0;
-
- // fill the new string with c
- if(unitCount == 1) {
- // fill with length UChars
- while(i < length) {
- array[i++] = (UChar)c;
- }
- } else {
- // get the code units for c
- UChar units[U16_MAX_LENGTH];
- U16_APPEND_UNSAFE(units, i, c);
-
- // now it must be i==unitCount
- i = 0;
-
- // for Unicode, unitCount can only be 1, 2, 3, or 4
- // 1 is handled above
- while(i < length) {
- int32_t unitIdx = 0;
- while(unitIdx < unitCount) {
- array[i++]=units[unitIdx++];
- }
- }
+ UChar unit = (UChar)c;
+ for(int32_t i = 0; i < length; ++i) {
+ array[i] = unit;
+ }
+ setLength(length);
+ }
+ } else { // supplementary code point, write surrogate pairs
+ if(count > (INT32_MAX / 2)) {
+ // We would get more than 2G UChars.
+ allocate(capacity);
+ return;
+ }
+ int32_t length = count * 2;
+ if(capacity < length) {
+ capacity = length;
+ }
+ if(allocate(capacity)) {
+ UChar *array = getArrayStart();
+ UChar lead = U16_LEAD(c);
+ UChar trail = U16_TRAIL(c);
+ for(int32_t i = 0; i < length; i += 2) {
+ array[i] = lead;
+ array[i + 1] = trail;
}
+ setLength(length);
}
- setLength(length);
}
}
@@ -342,33 +342,60 @@ UnicodeString::clone() const {
// array allocation
//========================================
+namespace {
+
+const int32_t kGrowSize = 128;
+
+// The number of bytes for one int32_t reference counter and capacity UChars
+// must fit into a 32-bit size_t (at least when on a 32-bit platform).
+// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
+// and round up to a multiple of 16 bytes.
+// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
+// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
+// but that does not seem worth it.)
+const int32_t kMaxCapacity = 0x7ffffff5;
+
+int32_t getGrowCapacity(int32_t newLength) {
+ int32_t growSize = (newLength >> 2) + kGrowSize;
+ if(growSize <= (kMaxCapacity - newLength)) {
+ return newLength + growSize;
+ } else {
+ return kMaxCapacity;
+ }
+}
+
+} // namespace
+
UBool
UnicodeString::allocate(int32_t capacity) {
if(capacity <= US_STACKBUF_SIZE) {
fUnion.fFields.fLengthAndFlags = kShortString;
- } else {
- // count bytes for the refCounter and the string capacity, and
- // round up to a multiple of 16; then divide by 4 and allocate int32_t's
- // to be safely aligned for the refCount
- // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
- int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
- int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
- if(array != 0) {
+ return TRUE;
+ }
+ if(capacity <= kMaxCapacity) {
+ ++capacity; // for the NUL
+ // Switch to size_t which is unsigned so that we can allocate up to 4GB.
+ // Reference counter + UChars.
+ size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
+ // Round up to a multiple of 16.
+ numBytes = (numBytes + 15) & ~15;
+ int32_t *array = (int32_t *) uprv_malloc(numBytes);
+ if(array != NULL) {
// set initial refCount and point behind the refCount
*array++ = 1;
+ numBytes -= sizeof(int32_t);
// have fArray point to the first UChar
fUnion.fFields.fArray = (UChar *)array;
- fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
+ fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
fUnion.fFields.fLengthAndFlags = kLongString;
- } else {
- fUnion.fFields.fLengthAndFlags = kIsBogus;
- fUnion.fFields.fArray = 0;
- fUnion.fFields.fCapacity = 0;
- return FALSE;
+ return TRUE;
}
}
- return TRUE;
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
+ fUnion.fFields.fArray = 0;
+ fUnion.fFields.fCapacity = 0;
+ return FALSE;
}
//========================================
@@ -415,7 +442,7 @@ UnicodeString::~UnicodeString()
// Factory methods
//========================================
-UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
+UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
UnicodeString result;
result.setToUTF8(utf8);
return result;
@@ -521,15 +548,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
}
// else if(!fastCopy) fall through to case kWritableAlias
// -> allocate a new buffer and copy the contents
+ U_FALLTHROUGH;
case kWritableAlias: {
// src is a writable alias; we make a copy of that instead
int32_t srcLength = src.length();
if(allocate(srcLength)) {
- uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
+ u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
setLength(srcLength);
break;
}
// if there is not enough memory, then fall through to setting to bogus
+ U_FALLTHROUGH;
}
default:
// if src is bogus, set ourselves to bogus
@@ -853,7 +882,7 @@ UnicodeString::extract(UChar *dest, int32_t destCapacity,
} else {
const UChar *array = getArrayStart();
if(len>0 && len<=destCapacity && array!=dest) {
- uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
+ u_memcpy(dest, array, len);
}
return u_terminateUChars(dest, destCapacity, len, &errorCode);
}
@@ -1215,7 +1244,7 @@ UnicodeString::getTerminatedBuffer() {
return array;
}
}
- if(cloneArrayIfNeeded(len+1)) {
+ if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
array = getArrayStart();
array[len] = 0;
return array;
@@ -1297,7 +1326,7 @@ UnicodeString::setTo(UChar *buffer,
return *this;
}
-UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
+UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
unBogus();
int32_t length = utf8.length();
int32_t capacity;
@@ -1426,8 +1455,14 @@ UnicodeString::doReplace(int32_t start,
// pin the indices to legal values
pinIndices(start, length);
- // calculate the size of the string after the replace
- int32_t newLength = oldLength - length + srcLength;
+ // Calculate the size of the string after the replace.
+ // Avoid int32_t overflow.
+ int32_t newLength = oldLength - length;
+ if(srcLength > (INT32_MAX - newLength)) {
+ setToBogus();
+ return *this;
+ }
+ newLength += srcLength;
// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
// therefore we need to keep the current fArray
@@ -1444,7 +1479,7 @@ UnicodeString::doReplace(int32_t start,
// clone our array and allocate a bigger array if needed
int32_t *bufferToDelete = 0;
- if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
+ if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
FALSE, &bufferToDelete)
) {
return *this;
@@ -1511,7 +1546,7 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
int32_t newLength = oldLength + srcLength;
// optimize append() onto a large-enough, owned string
if((newLength <= getCapacity() && isBufferWritable()) ||
- cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize)) {
+ cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
UChar *newArray = getArrayStart();
// Do not copy characters when
// UChar *buffer=str.getAppendBuffer(...);
@@ -1859,7 +1894,9 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
return NULL;
}
int32_t oldLength = str.length();
- if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
+ if(minCapacity <= (kMaxCapacity - oldLength) &&
+ desiredCapacityHint <= (kMaxCapacity - oldLength) &&
+ str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
*resultCapacity = str.getCapacity() - oldLength;
return str.getArrayStart() + oldLength;
}
« no previous file with comments | « source/common/unisetspan.cpp ('k') | source/common/unistr_case.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698