source/i18n/identifier_info.cpp - Issue 2440913002: Update ICU to 58.1

Unified Diff: source/i18n/identifier_info.cpp

Issue 2440913002: Update ICU to 58.1

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/i18n/identifier_info.cpp

diff --git a/source/i18n/identifier_info.cpp b/source/i18n/identifier_info.cpp

deleted file mode 100644

index 0c0706f1ccdb1273f1ce84ea182b77e8444e065b..0000000000000000000000000000000000000000

--- a/source/i18n/identifier_info.cpp

+++ /dev/null

@@ -1,311 +0,0 @@

-/*

-**********************************************************************

-*/

-#include "unicode/utypes.h"

-#include "unicode/uchar.h"

-#include "unicode/utf16.h"

-#include "identifier_info.h"

-#include "mutex.h"

-#include "scriptset.h"

-#include "ucln_in.h"

-#include "uvector.h"

-U_NAMESPACE_BEGIN

-static UnicodeSet *ASCII;

-static ScriptSet *JAPANESE;

-static ScriptSet *CHINESE;

-static ScriptSet *KOREAN;

-static ScriptSet *CONFUSABLE_WITH_LATIN;

-static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;

-U_CDECL_BEGIN

-static UBool U_CALLCONV

-IdentifierInfo_cleanup(void) {

- delete ASCII;

- ASCII = NULL;

- delete JAPANESE;

- JAPANESE = NULL;

- delete CHINESE;

- CHINESE = NULL;

- delete KOREAN;

- KOREAN = NULL;

- delete CONFUSABLE_WITH_LATIN;

- CONFUSABLE_WITH_LATIN = NULL;

- gIdentifierInfoInitOnce.reset();

- return TRUE;

-static void U_CALLCONV

-IdentifierInfo_init(UErrorCode &status) {

- ASCII = new UnicodeSet(0, 0x7f);

- JAPANESE = new ScriptSet();

- CHINESE = new ScriptSet();

- KOREAN = new ScriptSet();

- CONFUSABLE_WITH_LATIN = new ScriptSet();

- if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL

- || CONFUSABLE_WITH_LATIN == NULL) {

- status = U_MEMORY_ALLOCATION_ERROR;

- return;

- }

- ASCII->freeze();

- JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)

- .set(USCRIPT_KATAKANA, status);

- CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);

- KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);

- CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)

- .set(USCRIPT_CHEROKEE, status);

- ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);

-U_CDECL_END

-IdentifierInfo::IdentifierInfo(UErrorCode &status):

- fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),

- fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {

- umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);

- if (U_FAILURE(status)) {

- return;

- }

- fIdentifier = new UnicodeString();

- fRequiredScripts = new ScriptSet();

- fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);

- uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);

- fCommonAmongAlternates = new ScriptSet();

- fNumerics = new UnicodeSet();

- fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);

- if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||

- fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {

- status = U_MEMORY_ALLOCATION_ERROR;

- }

-IdentifierInfo::~IdentifierInfo() {

- delete fIdentifier;

- delete fRequiredScripts;

- uhash_close(fScriptSetSet);

- delete fCommonAmongAlternates;

- delete fNumerics;

- delete fIdentifierProfile;

-IdentifierInfo &IdentifierInfo::clear() {

- fRequiredScripts->resetAll();

- uhash_removeAll(fScriptSetSet);

- fNumerics->clear();

- fCommonAmongAlternates->resetAll();

- return *this;

-IdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {

- *fIdentifierProfile = identifierProfile;

- return *this;

-const UnicodeSet &IdentifierInfo::getIdentifierProfile() const {

- return *fIdentifierProfile;

-IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {

- if (U_FAILURE(status)) {

- return *this;

- }

- *fIdentifier = identifier;

- clear();

- ScriptSet scriptsForCP;

- UChar32 cp;

- for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {

- cp = identifier.char32At(i);

- // Store a representative character for each kind of decimal digit

- if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {

- // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value

- fNumerics->add(cp - (UChar32)u_getNumericValue(cp));

- }

- UScriptCode extensions[500];

- int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);

- if (U_FAILURE(status)) {

- return *this;

- }

- scriptsForCP.resetAll();

- for (int32_t j=0; j<extensionsCount; j++) {

- scriptsForCP.set(extensions[j], status);

- }

- scriptsForCP.reset(USCRIPT_COMMON, status);

- scriptsForCP.reset(USCRIPT_INHERITED, status);

- switch (scriptsForCP.countMembers()) {

- case 0: break;

- case 1:

- // Single script, record it.

- fRequiredScripts->Union(scriptsForCP);

- break;

- default:

- if (!fRequiredScripts->intersects(scriptsForCP)

- && !uhash_geti(fScriptSetSet, &scriptsForCP)) {

- // If the set hasn't been added already, add it

- // (Add a copy, fScriptSetSet takes ownership of the copy.)

- uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);

- }

- break;

- }

- // Now make a final pass through ScriptSetSet to remove alternates that came before singles.

- // [Kana], [Kana Hira] => [Kana]

- // This is relatively infrequent, so doesn't have to be optimized.

- // We also compute any commonalities among the alternates.

- if (uhash_count(fScriptSetSet) > 0) {

- fCommonAmongAlternates->setAll();

- for (int32_t it = UHASH_FIRST;;) {

- const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);

- if (nextHashEl == NULL) {

- break;

- }

- ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);

- // [Kana], [Kana Hira] => [Kana]

- if (fRequiredScripts->intersects(*next)) {

- uhash_removeElement(fScriptSetSet, nextHashEl);

- } else {

- fCommonAmongAlternates->intersect(*next);

- // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]

- for (int32_t otherIt = UHASH_FIRST;;) {

- const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);

- if (otherHashEl == NULL) {

- break;

- }

- ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);

- if (next != other && next->contains(*other)) {

- uhash_removeElement(fScriptSetSet, nextHashEl);

- break;

- }

- if (uhash_count(fScriptSetSet) == 0) {

- fCommonAmongAlternates->resetAll();

- }

- return *this;

-const UnicodeString *IdentifierInfo::getIdentifier() const {

- return fIdentifier;

-const ScriptSet *IdentifierInfo::getScripts() const {

- return fRequiredScripts;

-const UHashtable *IdentifierInfo::getAlternates() const {

- return fScriptSetSet;

-const UnicodeSet *IdentifierInfo::getNumerics() const {

- return fNumerics;

-const ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {

- return fCommonAmongAlternates;

-#if !UCONFIG_NO_NORMALIZATION

-URestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {

- if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {

- return USPOOF_UNRESTRICTIVE;

- }

- if (ASCII->containsAll(*fIdentifier)) {

- return USPOOF_ASCII;

- }

- // This is a bit tricky. We look at a number of factors.

- // The number of scripts in the text.

- // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])

- // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)

- // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the

- // time it is created, in setIdentifier().

- int32_t cardinalityPlus = fRequiredScripts->countMembers() +

- (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);

- if (cardinalityPlus < 2) {

- return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;

- }

- if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)

- || containsWithAlternates(*KOREAN, *fRequiredScripts)) {

- return USPOOF_HIGHLY_RESTRICTIVE;

- }

- if (cardinalityPlus == 2 &&

- fRequiredScripts->test(USCRIPT_LATIN, status) &&

- !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {

- return USPOOF_MODERATELY_RESTRICTIVE;

- }

- return USPOOF_MINIMALLY_RESTRICTIVE;

-#endif /* !UCONFIG_NO_NORMALIZATION */

-int32_t IdentifierInfo::getScriptCount() const {

- // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.

- int32_t count = fRequiredScripts->countMembers() +

- (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);

- return count;

-UBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {

- if (!container.contains(containee)) {

- return FALSE;

- }

- for (int32_t iter = UHASH_FIRST; ;) {

- const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);

- if (hashEl == NULL) {

- break;

- }

- ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);

- if (!container.intersects(*alternatives)) {

- return false;

- }

- return true;

-UnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {

- UVector sorted(status);

- if (U_FAILURE(status)) {

- return dest;

- }

- for (int32_t pos = UHASH_FIRST; ;) {

- const UHashElement *el = uhash_nextElement(alternates, &pos);

- if (el == NULL) {

- break;

- }

- ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);

- sorted.addElement(ss, status);

- }

- sorted.sort(uhash_compareScriptSet, status);

- UnicodeString separator = UNICODE_STRING_SIMPLE("; ");

- for (int32_t i=0; i<sorted.size(); i++) {

- if (i>0) {

- dest.append(separator);

- }

- ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));

- ss->displayScripts(dest);

- }

- return dest;

-U_NAMESPACE_END

« no previous file with comments | « source/i18n/identifier_info.h ('k') | source/i18n/indiancal.cpp » ('j') | no next file with comments »