Chromium Code Reviews

Unified Diff: source/i18n/identifier_info.cpp

Issue 2440913002: Update ICU to 58.1
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
« no previous file with comments | « source/i18n/identifier_info.h ('k') | source/i18n/indiancal.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/identifier_info.cpp
diff --git a/source/i18n/identifier_info.cpp b/source/i18n/identifier_info.cpp
deleted file mode 100644
index 0c0706f1ccdb1273f1ce84ea182b77e8444e065b..0000000000000000000000000000000000000000
--- a/source/i18n/identifier_info.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 2012-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#include "unicode/uchar.h"
-#include "unicode/utf16.h"
-
-#include "identifier_info.h"
-#include "mutex.h"
-#include "scriptset.h"
-#include "ucln_in.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-static UnicodeSet *ASCII;
-static ScriptSet *JAPANESE;
-static ScriptSet *CHINESE;
-static ScriptSet *KOREAN;
-static ScriptSet *CONFUSABLE_WITH_LATIN;
-static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
-
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-IdentifierInfo_cleanup(void) {
- delete ASCII;
- ASCII = NULL;
- delete JAPANESE;
- JAPANESE = NULL;
- delete CHINESE;
- CHINESE = NULL;
- delete KOREAN;
- KOREAN = NULL;
- delete CONFUSABLE_WITH_LATIN;
- CONFUSABLE_WITH_LATIN = NULL;
- gIdentifierInfoInitOnce.reset();
- return TRUE;
-}
-
-static void U_CALLCONV
-IdentifierInfo_init(UErrorCode &status) {
- ASCII = new UnicodeSet(0, 0x7f);
- JAPANESE = new ScriptSet();
- CHINESE = new ScriptSet();
- KOREAN = new ScriptSet();
- CONFUSABLE_WITH_LATIN = new ScriptSet();
- if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
- || CONFUSABLE_WITH_LATIN == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ASCII->freeze();
- JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
- .set(USCRIPT_KATAKANA, status);
- CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
- KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
- CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
- .set(USCRIPT_CHEROKEE, status);
- ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
-}
-U_CDECL_END
-
-
-IdentifierInfo::IdentifierInfo(UErrorCode &status):
- fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
- fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
- umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
- if (U_FAILURE(status)) {
- return;
- }
-
- fIdentifier = new UnicodeString();
- fRequiredScripts = new ScriptSet();
- fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
- uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
- fCommonAmongAlternates = new ScriptSet();
- fNumerics = new UnicodeSet();
- fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
-
- if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
- fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-IdentifierInfo::~IdentifierInfo() {
- delete fIdentifier;
- delete fRequiredScripts;
- uhash_close(fScriptSetSet);
- delete fCommonAmongAlternates;
- delete fNumerics;
- delete fIdentifierProfile;
-}
-
-
-IdentifierInfo &IdentifierInfo::clear() {
- fRequiredScripts->resetAll();
- uhash_removeAll(fScriptSetSet);
- fNumerics->clear();
- fCommonAmongAlternates->resetAll();
- return *this;
-}
-
-
-IdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {
- *fIdentifierProfile = identifierProfile;
- return *this;
-}
-
-
-const UnicodeSet &IdentifierInfo::getIdentifierProfile() const {
- return *fIdentifierProfile;
-}
-
-
-IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return *this;
- }
- *fIdentifier = identifier;
- clear();
- ScriptSet scriptsForCP;
- UChar32 cp;
- for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
- cp = identifier.char32At(i);
- // Store a representative character for each kind of decimal digit
- if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
- // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
- fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
- }
- UScriptCode extensions[500];
- int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);
- if (U_FAILURE(status)) {
- return *this;
- }
- scriptsForCP.resetAll();
- for (int32_t j=0; j<extensionsCount; j++) {
- scriptsForCP.set(extensions[j], status);
- }
- scriptsForCP.reset(USCRIPT_COMMON, status);
- scriptsForCP.reset(USCRIPT_INHERITED, status);
- switch (scriptsForCP.countMembers()) {
- case 0: break;
- case 1:
- // Single script, record it.
- fRequiredScripts->Union(scriptsForCP);
- break;
- default:
- if (!fRequiredScripts->intersects(scriptsForCP)
- && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
- // If the set hasn't been added already, add it
- // (Add a copy, fScriptSetSet takes ownership of the copy.)
- uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
- }
- break;
- }
- }
- // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
- // [Kana], [Kana Hira] => [Kana]
- // This is relatively infrequent, so doesn't have to be optimized.
- // We also compute any commonalities among the alternates.
- if (uhash_count(fScriptSetSet) > 0) {
- fCommonAmongAlternates->setAll();
- for (int32_t it = UHASH_FIRST;;) {
- const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
- if (nextHashEl == NULL) {
- break;
- }
- ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
- // [Kana], [Kana Hira] => [Kana]
- if (fRequiredScripts->intersects(*next)) {
- uhash_removeElement(fScriptSetSet, nextHashEl);
- } else {
- fCommonAmongAlternates->intersect(*next);
- // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
- for (int32_t otherIt = UHASH_FIRST;;) {
- const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
- if (otherHashEl == NULL) {
- break;
- }
- ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
- if (next != other && next->contains(*other)) {
- uhash_removeElement(fScriptSetSet, nextHashEl);
- break;
- }
- }
- }
- }
- }
- if (uhash_count(fScriptSetSet) == 0) {
- fCommonAmongAlternates->resetAll();
- }
- return *this;
-}
-
-
-const UnicodeString *IdentifierInfo::getIdentifier() const {
- return fIdentifier;
-}
-
-const ScriptSet *IdentifierInfo::getScripts() const {
- return fRequiredScripts;
-}
-
-const UHashtable *IdentifierInfo::getAlternates() const {
- return fScriptSetSet;
-}
-
-
-const UnicodeSet *IdentifierInfo::getNumerics() const {
- return fNumerics;
-}
-
-const ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {
- return fCommonAmongAlternates;
-}
-
-#if !UCONFIG_NO_NORMALIZATION
-
-URestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {
- if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {
- return USPOOF_UNRESTRICTIVE;
- }
- if (ASCII->containsAll(*fIdentifier)) {
- return USPOOF_ASCII;
- }
- // This is a bit tricky. We look at a number of factors.
- // The number of scripts in the text.
- // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
- // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
-
- // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
- // time it is created, in setIdentifier().
- int32_t cardinalityPlus = fRequiredScripts->countMembers() +
- (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
- if (cardinalityPlus < 2) {
- return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
- }
- if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)
- || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
- return USPOOF_HIGHLY_RESTRICTIVE;
- }
- if (cardinalityPlus == 2 &&
- fRequiredScripts->test(USCRIPT_LATIN, status) &&
- !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
- return USPOOF_MODERATELY_RESTRICTIVE;
- }
- return USPOOF_MINIMALLY_RESTRICTIVE;
-}
-
-#endif /* !UCONFIG_NO_NORMALIZATION */
-
-int32_t IdentifierInfo::getScriptCount() const {
- // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
- int32_t count = fRequiredScripts->countMembers() +
- (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
- return count;
-}
-
-
-
-UBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {
- if (!container.contains(containee)) {
- return FALSE;
- }
- for (int32_t iter = UHASH_FIRST; ;) {
- const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);
- if (hashEl == NULL) {
- break;
- }
- ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);
- if (!container.intersects(*alternatives)) {
- return false;
- }
- }
- return true;
-}
-
-UnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {
- UVector sorted(status);
- if (U_FAILURE(status)) {
- return dest;
- }
- for (int32_t pos = UHASH_FIRST; ;) {
- const UHashElement *el = uhash_nextElement(alternates, &pos);
- if (el == NULL) {
- break;
- }
- ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);
- sorted.addElement(ss, status);
- }
- sorted.sort(uhash_compareScriptSet, status);
- UnicodeString separator = UNICODE_STRING_SIMPLE("; ");
- for (int32_t i=0; i<sorted.size(); i++) {
- if (i>0) {
- dest.append(separator);
- }
- ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
- ss->displayScripts(dest);
- }
- return dest;
-}
-
-U_NAMESPACE_END
-
« no previous file with comments | « source/i18n/identifier_info.h ('k') | source/i18n/indiancal.cpp » ('j') | no next file with comments »

Powered by Google App Engine