X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b331163bffd790ced0e88b73f44f86d49ccc48a5..a01113dcd0f39d5da295ef82785beff9ed86fe38:/icuSources/common/brkeng.cpp diff --git a/icuSources/common/brkeng.cpp b/icuSources/common/brkeng.cpp index 2398fe9c..80e11585 100644 --- a/icuSources/common/brkeng.cpp +++ b/icuSources/common/brkeng.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************************ - * Copyright (C) 2006-2014, International Business Machines Corporation + * Copyright (C) 2006-2016, International Business Machines Corporation * and others. All Rights Reserved. ************************************************************************************ */ @@ -9,8 +11,6 @@ #if !UCONFIG_NO_BREAK_ITERATION -#include "brkeng.h" -#include "dictbe.h" #include "unicode/uchar.h" #include "unicode/uniset.h" #include "unicode/chariter.h" @@ -21,8 +21,13 @@ #include "unicode/uscript.h" #include "unicode/ucharstrie.h" #include "unicode/bytestrie.h" + +#include "brkeng.h" +#include "cmemory.h" +#include "dictbe.h" #include "charstr.h" #include "dictionarydata.h" +#include "mutex.h" #include "uvector.h" #include "umutex.h" #include "uresimp.h" @@ -54,66 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() { ****************************************************************** */ -UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { - for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { - fHandled[i] = 0; - } +UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) { + (void)status; } UnhandledEngine::~UnhandledEngine() { - for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { - if (fHandled[i] != 0) { - delete fHandled[i]; - } - } + delete fHandled; + fHandled = nullptr; } UBool -UnhandledEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) - && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); +UnhandledEngine::handles(UChar32 c) const { + return fHandled && fHandled->contains(c); } int32_t UnhandledEngine::findBreaks( UText *text, - int32_t startPos, - int32_t endPos, - UBool reverse, - int32_t breakType, - UStack &/*foundBreaks*/ ) const { - if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { - UChar32 c = utext_current32(text); - if (reverse) { - while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { - c = utext_previous32(text); - } - } - else { - while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { - utext_next32(text); // TODO: recast loop to work with post-increment operations. - c = utext_current32(text); - } - } + int32_t /* startPos */, + int32_t endPos, + UVector32 &/*foundBreaks*/ ) const { + UChar32 c = utext_current32(text); + while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { + utext_next32(text); // TODO: recast loop to work with post-increment operations. + c = utext_current32(text); } return 0; } void -UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { - if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { - if (fHandled[breakType] == 0) { - fHandled[breakType] = new UnicodeSet(); - if (fHandled[breakType] == 0) { - return; - } - } - if (!fHandled[breakType]->contains(c)) { - UErrorCode status = U_ZERO_ERROR; - // Apply the entire script of the character. - int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); - fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); +UnhandledEngine::handleCharacter(UChar32 c) { + if (fHandled == nullptr) { + fHandled = new UnicodeSet(); + if (fHandled == nullptr) { + return; } } + if (!fHandled->contains(c)) { + UErrorCode status = U_ZERO_ERROR; + // Apply the entire script of the character. + int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); + fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); + } } /* @@ -139,90 +125,45 @@ U_CDECL_END U_NAMESPACE_BEGIN const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { - UBool needsInit; - int32_t i; +ICULanguageBreakFactory::getEngineFor(UChar32 c) { const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; - // TODO: The global mutex should not be used. - // The global mutex should only be used for short periods. - // A ICULanguageBreakFactory specific mutex should be used. - umtx_lock(NULL); - needsInit = (UBool)(fEngines == NULL); - if (!needsInit) { - i = fEngines->size(); - while (--i >= 0) { - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c, breakType)) { - break; - } - lbe = NULL; - } - } - umtx_unlock(NULL); - - if (lbe != NULL) { - return lbe; - } - - if (needsInit) { + static UMutex *gBreakEngineMutex = STATIC_NEW(UMutex); + Mutex m(gBreakEngineMutex); + + if (fEngines == NULL) { UStack *engines = new UStack(_deleteEngine, NULL, status); - if (U_SUCCESS(status) && engines == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } - else if (U_FAILURE(status)) { + if (U_FAILURE(status) || engines == NULL) { + // Note: no way to return error code to caller. delete engines; - engines = NULL; + return NULL; } - else { - umtx_lock(NULL); - if (fEngines == NULL) { - fEngines = engines; - engines = NULL; + fEngines = engines; + } else { + int32_t i = fEngines->size(); + while (--i >= 0) { + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); + if (lbe != NULL && lbe->handles(c)) { + return lbe; } - umtx_unlock(NULL); - delete engines; } } - if (fEngines == NULL) { - return NULL; - } - - // We didn't find an engine the first time through, or there was no - // stack. Create an engine. - const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); - - // Now get the lock, and see if someone else has created it in the - // meantime - umtx_lock(NULL); - i = fEngines->size(); - while (--i >= 0) { - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c, breakType)) { - break; - } - lbe = NULL; - } - if (lbe == NULL && newlbe != NULL) { - fEngines->push((void *)newlbe, status); - lbe = newlbe; - newlbe = NULL; + // We didn't find an engine. Create one. + lbe = loadEngineFor(c); + if (lbe != NULL) { + fEngines->push((void *)lbe, status); } - umtx_unlock(NULL); - - delete newlbe; - return lbe; } const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { - DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); + DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { const LanguageBreakEngine *engine = NULL; switch(code) { @@ -283,7 +224,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { } DictionaryMatcher * -ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { +ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { UErrorCode status = U_ZERO_ERROR; // open root from brkitr tree. UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);