X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..38fbf2fd31f5cd99b500914d6037b1d06b608645:/icuSources/i18n/rbt.cpp diff --git a/icuSources/i18n/rbt.cpp b/icuSources/i18n/rbt.cpp index 1b54056c..9cb1b0e9 100644 --- a/icuSources/i18n/rbt.cpp +++ b/icuSources/i18n/rbt.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** -* Copyright (C) 1999-2003, International Business Machines +* Copyright (C) 1999-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -18,44 +20,132 @@ #include "rbt_data.h" #include "rbt_rule.h" #include "rbt.h" +#include "mutex.h" +#include "umutex.h" U_NAMESPACE_BEGIN -const char RuleBasedTransliterator::fgClassID = 0; // Value is irrelevant +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) + +static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER; +static Replaceable *gLockedText = NULL; void RuleBasedTransliterator::_construct(const UnicodeString& rules, UTransDirection direction, UParseError& parseError, UErrorCode& status) { - data = 0; + fData = 0; isDataOwned = TRUE; if (U_FAILURE(status)) { return; } - TransliteratorParser parser; + TransliteratorParser parser(status); parser.parse(rules, direction, parseError, status); if (U_FAILURE(status)) { return; } - if (parser.idBlock.length() != 0 || - parser.compoundFilter != NULL) { + if (parser.idBlockVector.size() != 0 || + parser.compoundFilter != NULL || + parser.dataVector.size() == 0) { status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT return; } - data = parser.orphanData(); - setMaximumContextLength(data->ruleSet.getMaximumContextLength()); + fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); + setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); +} + +/** + * Constructs a new transliterator from the given rules. + * @param id the id for the transliterator. + * @param rules rules, separated by ';' + * @param direction either FORWARD or REVERSE. + * @param adoptedFilter the filter for this transliterator. + * @param parseError Struct to recieve information on position + * of error if an error is encountered + * @param status Output param set to success/failure code. + * @exception IllegalArgumentException if rules are malformed + * or direction is invalid. + */ +RuleBasedTransliterator::RuleBasedTransliterator( + const UnicodeString& id, + const UnicodeString& rules, + UTransDirection direction, + UnicodeFilter* adoptedFilter, + UParseError& parseError, + UErrorCode& status) : + Transliterator(id, adoptedFilter) { + _construct(rules, direction,parseError,status); } +/** + * Constructs a new transliterator from the given rules. + * @param id the id for the transliterator. + * @param rules rules, separated by ';' + * @param direction either FORWARD or REVERSE. + * @param adoptedFilter the filter for this transliterator. + * @param status Output param set to success/failure code. + * @exception IllegalArgumentException if rules are malformed + * or direction is invalid. + */ +/*RuleBasedTransliterator::RuleBasedTransliterator( + const UnicodeString& id, + const UnicodeString& rules, + UTransDirection direction, + UnicodeFilter* adoptedFilter, + UErrorCode& status) : + Transliterator(id, adoptedFilter) { + UParseError parseError; + _construct(rules, direction,parseError, status); +}*/ + +/** + * Covenience constructor with no filter. + */ +/*RuleBasedTransliterator::RuleBasedTransliterator( + const UnicodeString& id, + const UnicodeString& rules, + UTransDirection direction, + UErrorCode& status) : + Transliterator(id, 0) { + UParseError parseError; + _construct(rules, direction,parseError, status); +}*/ + +/** + * Covenience constructor with no filter and FORWARD direction. + */ +/*RuleBasedTransliterator::RuleBasedTransliterator( + const UnicodeString& id, + const UnicodeString& rules, + UErrorCode& status) : + Transliterator(id, 0) { + UParseError parseError; + _construct(rules, UTRANS_FORWARD, parseError, status); +}*/ + +/** + * Covenience constructor with FORWARD direction. + */ +/*RuleBasedTransliterator::RuleBasedTransliterator( + const UnicodeString& id, + const UnicodeString& rules, + UnicodeFilter* adoptedFilter, + UErrorCode& status) : + Transliterator(id, adoptedFilter) { + UParseError parseError; + _construct(rules, UTRANS_FORWARD,parseError, status); +}*/ + RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, const TransliterationRuleData* theData, UnicodeFilter* adoptedFilter) : Transliterator(id, adoptedFilter), - data((TransliterationRuleData*)theData), // cast away const + fData((TransliterationRuleData*)theData), // cast away const isDataOwned(FALSE) { - setMaximumContextLength(data->ruleSet.getMaximumContextLength()); + setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); } /** @@ -65,9 +155,9 @@ RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, TransliterationRuleData* theData, UBool isDataAdopted) : Transliterator(id, 0), - data(theData), + fData(theData), isDataOwned(isDataAdopted) { - setMaximumContextLength(data->ruleSet.getMaximumContextLength()); + setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); } /** @@ -75,7 +165,7 @@ RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, */ RuleBasedTransliterator::RuleBasedTransliterator( const RuleBasedTransliterator& other) : - Transliterator(other), data(other.data), + Transliterator(other), fData(other.fData), isDataOwned(other.isDataOwned) { // The data object may or may not be owned. If it is not owned we @@ -88,7 +178,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( // will be later deleted. System transliterators contain // non-owned data. if (isDataOwned) { - data = new TransliterationRuleData(*other.data); + fData = new TransliterationRuleData(*other.fData); } } @@ -98,7 +188,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( RuleBasedTransliterator::~RuleBasedTransliterator() { // Delete the data object only if we own it. if (isDataOwned) { - delete data; + delete fData; } } @@ -145,30 +235,70 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& loopLimit <<= 4; } - while (index.start < index.limit && - loopCount <= loopLimit && - data->ruleSet.transliterate(text, index, isIncremental)) { - ++loopCount; + // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent + // operations must be prevented. + // A Complication: compound transliterators can result in recursive entries to this + // function, sometimes with different "This" objects, always with the same text. + // Double-locking must be prevented in these cases. + // + + UBool lockedMutexAtThisLevel = FALSE; + + // Test whether this request is operating on the same text string as + // some other transliteration that is still in progress and holding the + // transliteration mutex. If so, do not lock the transliteration + // mutex again. + // + // gLockedText variable is protected by the global ICU mutex. + // Shared RBT data protected by transliteratorDataMutex. + // + // TODO(andy): Need a better scheme for handling this. + UBool needToLock; + { + Mutex m; + needToLock = (&text != gLockedText); + } + if (needToLock) { + umtx_lock(&transliteratorDataMutex); // Contention, longish waits possible here. + Mutex m; + gLockedText = &text; + lockedMutexAtThisLevel = TRUE; + } + + // Check to make sure we don't dereference a null pointer. + if (fData != NULL) { + while (index.start < index.limit && + loopCount <= loopLimit && + fData->ruleSet.transliterate(text, index, isIncremental)) { + ++loopCount; + } + } + if (lockedMutexAtThisLevel) { + { + Mutex m; + gLockedText = NULL; + } + umtx_unlock(&transliteratorDataMutex); } } UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, UBool escapeUnprintable) const { - return data->ruleSet.toRules(rulesSource, escapeUnprintable); + return fData->ruleSet.toRules(rulesSource, escapeUnprintable); } /** * Implement Transliterator framework */ void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { - data->ruleSet.getSourceTargetSet(result, FALSE); + fData->ruleSet.getSourceTargetSet(result, FALSE); } /** * Override Transliterator framework */ UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { - return data->ruleSet.getSourceTargetSet(result, TRUE); + return fData->ruleSet.getSourceTargetSet(result, TRUE); } U_NAMESPACE_END