| Index: source/i18n/identifier_info.cpp
|
| diff --git a/source/i18n/identifier_info.cpp b/source/i18n/identifier_info.cpp
|
| deleted file mode 100644
|
| index 0c0706f1ccdb1273f1ce84ea182b77e8444e065b..0000000000000000000000000000000000000000
|
| --- a/source/i18n/identifier_info.cpp
|
| +++ /dev/null
|
| @@ -1,311 +0,0 @@
|
| -/*
|
| -**********************************************************************
|
| -* Copyright (C) 2012-2014, International Business Machines
|
| -* Corporation and others. All Rights Reserved.
|
| -**********************************************************************
|
| -*/
|
| -
|
| -#include "unicode/utypes.h"
|
| -
|
| -#include "unicode/uchar.h"
|
| -#include "unicode/utf16.h"
|
| -
|
| -#include "identifier_info.h"
|
| -#include "mutex.h"
|
| -#include "scriptset.h"
|
| -#include "ucln_in.h"
|
| -#include "uvector.h"
|
| -
|
| -U_NAMESPACE_BEGIN
|
| -
|
| -static UnicodeSet *ASCII;
|
| -static ScriptSet *JAPANESE;
|
| -static ScriptSet *CHINESE;
|
| -static ScriptSet *KOREAN;
|
| -static ScriptSet *CONFUSABLE_WITH_LATIN;
|
| -static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
|
| -
|
| -
|
| -U_CDECL_BEGIN
|
| -static UBool U_CALLCONV
|
| -IdentifierInfo_cleanup(void) {
|
| - delete ASCII;
|
| - ASCII = NULL;
|
| - delete JAPANESE;
|
| - JAPANESE = NULL;
|
| - delete CHINESE;
|
| - CHINESE = NULL;
|
| - delete KOREAN;
|
| - KOREAN = NULL;
|
| - delete CONFUSABLE_WITH_LATIN;
|
| - CONFUSABLE_WITH_LATIN = NULL;
|
| - gIdentifierInfoInitOnce.reset();
|
| - return TRUE;
|
| -}
|
| -
|
| -static void U_CALLCONV
|
| -IdentifierInfo_init(UErrorCode &status) {
|
| - ASCII = new UnicodeSet(0, 0x7f);
|
| - JAPANESE = new ScriptSet();
|
| - CHINESE = new ScriptSet();
|
| - KOREAN = new ScriptSet();
|
| - CONFUSABLE_WITH_LATIN = new ScriptSet();
|
| - if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
|
| - || CONFUSABLE_WITH_LATIN == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - ASCII->freeze();
|
| - JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
|
| - .set(USCRIPT_KATAKANA, status);
|
| - CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
|
| - KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
|
| - CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
|
| - .set(USCRIPT_CHEROKEE, status);
|
| - ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
|
| -}
|
| -U_CDECL_END
|
| -
|
| -
|
| -IdentifierInfo::IdentifierInfo(UErrorCode &status):
|
| - fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
|
| - fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
|
| - umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
|
| - if (U_FAILURE(status)) {
|
| - return;
|
| - }
|
| -
|
| - fIdentifier = new UnicodeString();
|
| - fRequiredScripts = new ScriptSet();
|
| - fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
|
| - uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
|
| - fCommonAmongAlternates = new ScriptSet();
|
| - fNumerics = new UnicodeSet();
|
| - fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
|
| -
|
| - if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
|
| - fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - }
|
| -}
|
| -
|
| -IdentifierInfo::~IdentifierInfo() {
|
| - delete fIdentifier;
|
| - delete fRequiredScripts;
|
| - uhash_close(fScriptSetSet);
|
| - delete fCommonAmongAlternates;
|
| - delete fNumerics;
|
| - delete fIdentifierProfile;
|
| -}
|
| -
|
| -
|
| -IdentifierInfo &IdentifierInfo::clear() {
|
| - fRequiredScripts->resetAll();
|
| - uhash_removeAll(fScriptSetSet);
|
| - fNumerics->clear();
|
| - fCommonAmongAlternates->resetAll();
|
| - return *this;
|
| -}
|
| -
|
| -
|
| -IdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {
|
| - *fIdentifierProfile = identifierProfile;
|
| - return *this;
|
| -}
|
| -
|
| -
|
| -const UnicodeSet &IdentifierInfo::getIdentifierProfile() const {
|
| - return *fIdentifierProfile;
|
| -}
|
| -
|
| -
|
| -IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
|
| - if (U_FAILURE(status)) {
|
| - return *this;
|
| - }
|
| - *fIdentifier = identifier;
|
| - clear();
|
| - ScriptSet scriptsForCP;
|
| - UChar32 cp;
|
| - for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
|
| - cp = identifier.char32At(i);
|
| - // Store a representative character for each kind of decimal digit
|
| - if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
|
| - // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
|
| - fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
|
| - }
|
| - UScriptCode extensions[500];
|
| - int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);
|
| - if (U_FAILURE(status)) {
|
| - return *this;
|
| - }
|
| - scriptsForCP.resetAll();
|
| - for (int32_t j=0; j<extensionsCount; j++) {
|
| - scriptsForCP.set(extensions[j], status);
|
| - }
|
| - scriptsForCP.reset(USCRIPT_COMMON, status);
|
| - scriptsForCP.reset(USCRIPT_INHERITED, status);
|
| - switch (scriptsForCP.countMembers()) {
|
| - case 0: break;
|
| - case 1:
|
| - // Single script, record it.
|
| - fRequiredScripts->Union(scriptsForCP);
|
| - break;
|
| - default:
|
| - if (!fRequiredScripts->intersects(scriptsForCP)
|
| - && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
|
| - // If the set hasn't been added already, add it
|
| - // (Add a copy, fScriptSetSet takes ownership of the copy.)
|
| - uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
|
| - }
|
| - break;
|
| - }
|
| - }
|
| - // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
|
| - // [Kana], [Kana Hira] => [Kana]
|
| - // This is relatively infrequent, so doesn't have to be optimized.
|
| - // We also compute any commonalities among the alternates.
|
| - if (uhash_count(fScriptSetSet) > 0) {
|
| - fCommonAmongAlternates->setAll();
|
| - for (int32_t it = UHASH_FIRST;;) {
|
| - const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
|
| - if (nextHashEl == NULL) {
|
| - break;
|
| - }
|
| - ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
|
| - // [Kana], [Kana Hira] => [Kana]
|
| - if (fRequiredScripts->intersects(*next)) {
|
| - uhash_removeElement(fScriptSetSet, nextHashEl);
|
| - } else {
|
| - fCommonAmongAlternates->intersect(*next);
|
| - // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
|
| - for (int32_t otherIt = UHASH_FIRST;;) {
|
| - const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
|
| - if (otherHashEl == NULL) {
|
| - break;
|
| - }
|
| - ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
|
| - if (next != other && next->contains(*other)) {
|
| - uhash_removeElement(fScriptSetSet, nextHashEl);
|
| - break;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - }
|
| - if (uhash_count(fScriptSetSet) == 0) {
|
| - fCommonAmongAlternates->resetAll();
|
| - }
|
| - return *this;
|
| -}
|
| -
|
| -
|
| -const UnicodeString *IdentifierInfo::getIdentifier() const {
|
| - return fIdentifier;
|
| -}
|
| -
|
| -const ScriptSet *IdentifierInfo::getScripts() const {
|
| - return fRequiredScripts;
|
| -}
|
| -
|
| -const UHashtable *IdentifierInfo::getAlternates() const {
|
| - return fScriptSetSet;
|
| -}
|
| -
|
| -
|
| -const UnicodeSet *IdentifierInfo::getNumerics() const {
|
| - return fNumerics;
|
| -}
|
| -
|
| -const ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {
|
| - return fCommonAmongAlternates;
|
| -}
|
| -
|
| -#if !UCONFIG_NO_NORMALIZATION
|
| -
|
| -URestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {
|
| - if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {
|
| - return USPOOF_UNRESTRICTIVE;
|
| - }
|
| - if (ASCII->containsAll(*fIdentifier)) {
|
| - return USPOOF_ASCII;
|
| - }
|
| - // This is a bit tricky. We look at a number of factors.
|
| - // The number of scripts in the text.
|
| - // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
|
| - // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
|
| -
|
| - // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
|
| - // time it is created, in setIdentifier().
|
| - int32_t cardinalityPlus = fRequiredScripts->countMembers() +
|
| - (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
|
| - if (cardinalityPlus < 2) {
|
| - return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
|
| - }
|
| - if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)
|
| - || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
|
| - return USPOOF_HIGHLY_RESTRICTIVE;
|
| - }
|
| - if (cardinalityPlus == 2 &&
|
| - fRequiredScripts->test(USCRIPT_LATIN, status) &&
|
| - !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
|
| - return USPOOF_MODERATELY_RESTRICTIVE;
|
| - }
|
| - return USPOOF_MINIMALLY_RESTRICTIVE;
|
| -}
|
| -
|
| -#endif /* !UCONFIG_NO_NORMALIZATION */
|
| -
|
| -int32_t IdentifierInfo::getScriptCount() const {
|
| - // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
|
| - int32_t count = fRequiredScripts->countMembers() +
|
| - (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
|
| - return count;
|
| -}
|
| -
|
| -
|
| -
|
| -UBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {
|
| - if (!container.contains(containee)) {
|
| - return FALSE;
|
| - }
|
| - for (int32_t iter = UHASH_FIRST; ;) {
|
| - const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);
|
| - if (hashEl == NULL) {
|
| - break;
|
| - }
|
| - ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);
|
| - if (!container.intersects(*alternatives)) {
|
| - return false;
|
| - }
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -UnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {
|
| - UVector sorted(status);
|
| - if (U_FAILURE(status)) {
|
| - return dest;
|
| - }
|
| - for (int32_t pos = UHASH_FIRST; ;) {
|
| - const UHashElement *el = uhash_nextElement(alternates, &pos);
|
| - if (el == NULL) {
|
| - break;
|
| - }
|
| - ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);
|
| - sorted.addElement(ss, status);
|
| - }
|
| - sorted.sort(uhash_compareScriptSet, status);
|
| - UnicodeString separator = UNICODE_STRING_SIMPLE("; ");
|
| - for (int32_t i=0; i<sorted.size(); i++) {
|
| - if (i>0) {
|
| - dest.append(separator);
|
| - }
|
| - ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
|
| - ss->displayScripts(dest);
|
| - }
|
| - return dest;
|
| -}
|
| -
|
| -U_NAMESPACE_END
|
| -
|
|
|