1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1999-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/17/99 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
17 #include "unicode/rep.h"
18 #include "unicode/uniset.h"
28 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator
)
30 static Replaceable
*gLockedText
= NULL
;
32 void RuleBasedTransliterator::_construct(const UnicodeString
& rules
,
33 UTransDirection direction
,
34 UParseError
& parseError
,
38 if (U_FAILURE(status
)) {
42 TransliteratorParser
parser(status
);
43 parser
.parse(rules
, direction
, parseError
, status
);
44 if (U_FAILURE(status
)) {
48 if (parser
.idBlockVector
.size() != 0 ||
49 parser
.compoundFilter
!= NULL
||
50 parser
.dataVector
.size() == 0) {
51 status
= U_INVALID_RBT_SYNTAX
; // ::ID blocks disallowed in RBT
55 fData
= (TransliterationRuleData
*)parser
.dataVector
.orphanElementAt(0);
56 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
60 * Constructs a new transliterator from the given rules.
61 * @param id the id for the transliterator.
62 * @param rules rules, separated by ';'
63 * @param direction either FORWARD or REVERSE.
64 * @param adoptedFilter the filter for this transliterator.
65 * @param parseError Struct to recieve information on position
66 * of error if an error is encountered
67 * @param status Output param set to success/failure code.
68 * @exception IllegalArgumentException if rules are malformed
69 * or direction is invalid.
71 RuleBasedTransliterator::RuleBasedTransliterator(
72 const UnicodeString
& id
,
73 const UnicodeString
& rules
,
74 UTransDirection direction
,
75 UnicodeFilter
* adoptedFilter
,
76 UParseError
& parseError
,
78 Transliterator(id
, adoptedFilter
) {
79 _construct(rules
, direction
,parseError
,status
);
83 * Constructs a new transliterator from the given rules.
84 * @param id the id for the transliterator.
85 * @param rules rules, separated by ';'
86 * @param direction either FORWARD or REVERSE.
87 * @param adoptedFilter the filter for this transliterator.
88 * @param status Output param set to success/failure code.
89 * @exception IllegalArgumentException if rules are malformed
90 * or direction is invalid.
92 /*RuleBasedTransliterator::RuleBasedTransliterator(
93 const UnicodeString& id,
94 const UnicodeString& rules,
95 UTransDirection direction,
96 UnicodeFilter* adoptedFilter,
98 Transliterator(id, adoptedFilter) {
99 UParseError parseError;
100 _construct(rules, direction,parseError, status);
104 * Covenience constructor with no filter.
106 /*RuleBasedTransliterator::RuleBasedTransliterator(
107 const UnicodeString& id,
108 const UnicodeString& rules,
109 UTransDirection direction,
110 UErrorCode& status) :
111 Transliterator(id, 0) {
112 UParseError parseError;
113 _construct(rules, direction,parseError, status);
117 * Covenience constructor with no filter and FORWARD direction.
119 /*RuleBasedTransliterator::RuleBasedTransliterator(
120 const UnicodeString& id,
121 const UnicodeString& rules,
122 UErrorCode& status) :
123 Transliterator(id, 0) {
124 UParseError parseError;
125 _construct(rules, UTRANS_FORWARD, parseError, status);
129 * Covenience constructor with FORWARD direction.
131 /*RuleBasedTransliterator::RuleBasedTransliterator(
132 const UnicodeString& id,
133 const UnicodeString& rules,
134 UnicodeFilter* adoptedFilter,
135 UErrorCode& status) :
136 Transliterator(id, adoptedFilter) {
137 UParseError parseError;
138 _construct(rules, UTRANS_FORWARD,parseError, status);
141 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString
& id
,
142 const TransliterationRuleData
* theData
,
143 UnicodeFilter
* adoptedFilter
) :
144 Transliterator(id
, adoptedFilter
),
145 fData((TransliterationRuleData
*)theData
), // cast away const
147 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
151 * Internal constructor.
153 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString
& id
,
154 TransliterationRuleData
* theData
,
155 UBool isDataAdopted
) :
156 Transliterator(id
, 0),
158 isDataOwned(isDataAdopted
) {
159 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
165 RuleBasedTransliterator::RuleBasedTransliterator(
166 const RuleBasedTransliterator
& other
) :
167 Transliterator(other
), fData(other
.fData
),
168 isDataOwned(other
.isDataOwned
) {
170 // The data object may or may not be owned. If it is not owned we
171 // share it; it is invariant. If it is owned, it's still
172 // invariant, but we need to copy it to prevent double-deletion.
173 // If this becomes a performance issue (if people do a lot of RBT
174 // copying -- unlikely) we can reference count the data object.
176 // Only do a deep copy if this is owned data, that is, data that
177 // will be later deleted. System transliterators contain
180 fData
= new TransliterationRuleData(*other
.fData
);
187 RuleBasedTransliterator::~RuleBasedTransliterator() {
188 // Delete the data object only if we own it.
194 Transliterator
* // Covariant return NOT ALLOWED (for portability)
195 RuleBasedTransliterator::clone(void) const {
196 return new RuleBasedTransliterator(*this);
200 * Implements {@link Transliterator#handleTransliterate}.
203 RuleBasedTransliterator::handleTransliterate(Replaceable
& text
, UTransPosition
& index
,
204 UBool isIncremental
) const {
205 /* We keep contextStart and contextLimit fixed the entire time,
206 * relative to the text -- contextLimit may move numerically if
207 * text is inserted or removed. The start offset moves toward
208 * limit, with replacements happening under it.
210 * Example: rules 1. ab>x|y
213 * |eabcd begin - no match, advance start
214 * e|abcd match rule 1 - change text & adjust start
215 * ex|ycd match rule 2 - change text & adjust start
216 * exz|d no match, advance start
222 * creates an infinite loop. To prevent that, we put an arbitrary
223 * limit on the number of iterations that we take, one that is
224 * high enough that any reasonable rules are ok, but low enough to
225 * prevent a server from hanging. The limit is 16 times the
226 * number of characters n, unless n is so large that 16n exceeds a
229 uint32_t loopCount
= 0;
230 uint32_t loopLimit
= index
.limit
- index
.start
;
231 if (loopLimit
>= 0x10000000) {
232 loopLimit
= 0xFFFFFFFF;
237 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
238 // operations must be prevented.
239 // A Complication: compound transliterators can result in recursive entries to this
240 // function, sometimes with different "This" objects, always with the same text.
241 // Double-locking must be prevented in these cases.
244 UBool lockedMutexAtThisLevel
= FALSE
;
246 // Test whether this request is operating on the same text string as
247 // some other transliteration that is still in progress and holding the
248 // transliteration mutex. If so, do not lock the transliteration
251 // gLockedText variable is protected by the global ICU mutex.
252 // Shared RBT data protected by transliteratorDataMutex.
254 // TODO(andy): Need a better scheme for handling this.
256 static UMutex
*transliteratorDataMutex
= STATIC_NEW(UMutex
);
260 needToLock
= (&text
!= gLockedText
);
263 umtx_lock(transliteratorDataMutex
); // Contention, longish waits possible here.
266 lockedMutexAtThisLevel
= TRUE
;
269 // Check to make sure we don't dereference a null pointer.
271 while (index
.start
< index
.limit
&&
272 loopCount
<= loopLimit
&&
273 fData
->ruleSet
.transliterate(text
, index
, isIncremental
)) {
277 if (lockedMutexAtThisLevel
) {
282 umtx_unlock(transliteratorDataMutex
);
286 UnicodeString
& RuleBasedTransliterator::toRules(UnicodeString
& rulesSource
,
287 UBool escapeUnprintable
) const {
288 return fData
->ruleSet
.toRules(rulesSource
, escapeUnprintable
);
292 * Implement Transliterator framework
294 void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet
& result
) const {
295 fData
->ruleSet
.getSourceTargetSet(result
, FALSE
);
299 * Override Transliterator framework
301 UnicodeSet
& RuleBasedTransliterator::getTargetSet(UnicodeSet
& result
) const {
302 return fData
->ruleSet
.getSourceTargetSet(result
, TRUE
);
307 #endif /* #if !UCONFIG_NO_TRANSLITERATION */