]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f | 3 | /* |
374ca955 | 4 | ******************************************************************************* |
57a6839d | 5 | * Copyright (C) 1997-2009,2014 International Business Machines |
374ca955 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * Date Name Description | |
9 | * 06/21/00 aliu Creation. | |
10 | ******************************************************************************* | |
11 | */ | |
b75a7d8f A |
12 | |
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include "unicode/utrans.h" | |
18 | #include "unicode/putil.h" | |
19 | #include "unicode/rep.h" | |
20 | #include "unicode/translit.h" | |
21 | #include "unicode/unifilt.h" | |
22 | #include "unicode/uniset.h" | |
23 | #include "unicode/ustring.h" | |
374ca955 | 24 | #include "unicode/uenum.h" |
57a6839d | 25 | #include "unicode/uset.h" |
374ca955 | 26 | #include "uenumimp.h" |
b75a7d8f A |
27 | #include "cpputils.h" |
28 | #include "rbt.h" | |
29 | ||
30 | // Following macro is to be followed by <return value>';' or just ';' | |
31 | #define utrans_ENTRY(s) if ((s)==NULL || U_FAILURE(*(s))) return | |
32 | ||
33 | /******************************************************************** | |
34 | * Replaceable-UReplaceableCallbacks glue | |
35 | ********************************************************************/ | |
36 | ||
37 | /** | |
38 | * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object. | |
39 | */ | |
40 | U_NAMESPACE_BEGIN | |
41 | class ReplaceableGlue : public Replaceable { | |
42 | ||
43 | UReplaceable *rep; | |
0f5d89e8 | 44 | const UReplaceableCallbacks *func; |
b75a7d8f | 45 | |
b75a7d8f A |
46 | public: |
47 | ||
48 | ReplaceableGlue(UReplaceable *replaceable, | |
0f5d89e8 | 49 | const UReplaceableCallbacks *funcCallback); |
b75a7d8f A |
50 | |
51 | virtual ~ReplaceableGlue(); | |
52 | ||
53 | virtual void handleReplaceBetween(int32_t start, | |
54 | int32_t limit, | |
55 | const UnicodeString& text); | |
56 | ||
57 | virtual void extractBetween(int32_t start, | |
58 | int32_t limit, | |
59 | UnicodeString& target) const; | |
60 | ||
61 | virtual void copy(int32_t start, int32_t limit, int32_t dest); | |
62 | ||
63 | // virtual Replaceable *clone() const { return NULL; } same as default | |
64 | ||
65 | /** | |
66 | * ICU "poor man's RTTI", returns a UClassID for the actual class. | |
67 | * | |
68 | * @draft ICU 2.2 | |
69 | */ | |
73c04bcf | 70 | virtual UClassID getDynamicClassID() const; |
b75a7d8f A |
71 | |
72 | /** | |
73 | * ICU "poor man's RTTI", returns a UClassID for this class. | |
74 | * | |
75 | * @draft ICU 2.2 | |
76 | */ | |
73c04bcf | 77 | static UClassID U_EXPORT2 getStaticClassID(); |
b75a7d8f A |
78 | |
79 | protected: | |
80 | ||
81 | virtual int32_t getLength() const; | |
82 | ||
83 | virtual UChar getCharAt(int32_t offset) const; | |
84 | ||
85 | virtual UChar32 getChar32At(int32_t offset) const; | |
86 | }; | |
87 | ||
374ca955 | 88 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) |
b75a7d8f A |
89 | |
90 | ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, | |
0f5d89e8 | 91 | const UReplaceableCallbacks *funcCallback) |
b75a7d8f A |
92 | : Replaceable() |
93 | { | |
94 | this->rep = replaceable; | |
95 | this->func = funcCallback; | |
96 | } | |
97 | ||
98 | ReplaceableGlue::~ReplaceableGlue() {} | |
99 | ||
100 | int32_t ReplaceableGlue::getLength() const { | |
101 | return (*func->length)(rep); | |
102 | } | |
103 | ||
104 | UChar ReplaceableGlue::getCharAt(int32_t offset) const { | |
105 | return (*func->charAt)(rep, offset); | |
106 | } | |
107 | ||
108 | UChar32 ReplaceableGlue::getChar32At(int32_t offset) const { | |
109 | return (*func->char32At)(rep, offset); | |
110 | } | |
111 | ||
112 | void ReplaceableGlue::handleReplaceBetween(int32_t start, | |
113 | int32_t limit, | |
114 | const UnicodeString& text) { | |
115 | (*func->replace)(rep, start, limit, text.getBuffer(), text.length()); | |
116 | } | |
117 | ||
118 | void ReplaceableGlue::extractBetween(int32_t start, | |
119 | int32_t limit, | |
120 | UnicodeString& target) const { | |
121 | (*func->extract)(rep, start, limit, target.getBuffer(limit-start)); | |
122 | target.releaseBuffer(limit-start); | |
123 | } | |
124 | ||
125 | void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) { | |
126 | (*func->copy)(rep, start, limit, dest); | |
127 | } | |
128 | U_NAMESPACE_END | |
129 | /******************************************************************** | |
130 | * General API | |
131 | ********************************************************************/ | |
132 | U_NAMESPACE_USE | |
b75a7d8f | 133 | |
374ca955 A |
134 | U_CAPI UTransliterator* U_EXPORT2 |
135 | utrans_openU(const UChar *id, | |
136 | int32_t idLength, | |
137 | UTransDirection dir, | |
138 | const UChar *rules, | |
139 | int32_t rulesLength, | |
140 | UParseError *parseError, | |
141 | UErrorCode *status) { | |
142 | if(status==NULL || U_FAILURE(*status)) { | |
b75a7d8f A |
143 | return NULL; |
144 | } | |
b75a7d8f A |
145 | if (id == NULL) { |
146 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
147 | return NULL; | |
148 | } | |
149 | UParseError temp; | |
150 | ||
151 | if(parseError == NULL){ | |
152 | parseError = &temp; | |
153 | } | |
154 | ||
374ca955 | 155 | UnicodeString ID(idLength<0, id, idLength); // r-o alias |
b75a7d8f A |
156 | |
157 | if(rules==NULL){ | |
158 | ||
159 | Transliterator *trans = NULL; | |
160 | ||
161 | trans = Transliterator::createInstance(ID, dir, *parseError, *status); | |
162 | ||
163 | if(U_FAILURE(*status)){ | |
164 | return NULL; | |
165 | } | |
166 | return (UTransliterator*) trans; | |
167 | }else{ | |
168 | UnicodeString ruleStr(rulesLength < 0, | |
169 | rules, | |
170 | rulesLength); // r-o alias | |
171 | ||
729e4ab9 A |
172 | Transliterator *trans = NULL; |
173 | trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status); | |
174 | if(U_FAILURE(*status)) { | |
175 | return NULL; | |
b75a7d8f | 176 | } |
729e4ab9 | 177 | |
b75a7d8f A |
178 | return (UTransliterator*) trans; |
179 | } | |
180 | } | |
181 | ||
374ca955 A |
182 | U_CAPI UTransliterator* U_EXPORT2 |
183 | utrans_open(const char* id, | |
184 | UTransDirection dir, | |
185 | const UChar* rules, /* may be Null */ | |
186 | int32_t rulesLength, /* -1 if null-terminated */ | |
187 | UParseError* parseError, /* may be Null */ | |
188 | UErrorCode* status) { | |
73c04bcf | 189 | UnicodeString ID(id, -1, US_INV); // use invariant converter |
374ca955 A |
190 | return utrans_openU(ID.getBuffer(), ID.length(), dir, |
191 | rules, rulesLength, | |
192 | parseError, status); | |
193 | } | |
194 | ||
b75a7d8f A |
195 | U_CAPI UTransliterator* U_EXPORT2 |
196 | utrans_openInverse(const UTransliterator* trans, | |
197 | UErrorCode* status) { | |
198 | ||
199 | utrans_ENTRY(status) NULL; | |
200 | ||
201 | UTransliterator* result = | |
202 | (UTransliterator*) ((Transliterator*) trans)->createInverse(*status); | |
203 | ||
204 | return result; | |
205 | } | |
206 | ||
207 | U_CAPI UTransliterator* U_EXPORT2 | |
208 | utrans_clone(const UTransliterator* trans, | |
209 | UErrorCode* status) { | |
210 | ||
211 | utrans_ENTRY(status) NULL; | |
212 | ||
213 | if (trans == NULL) { | |
214 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
215 | return NULL; | |
216 | } | |
217 | ||
218 | Transliterator *t = ((Transliterator*) trans)->clone(); | |
219 | if (t == NULL) { | |
220 | *status = U_MEMORY_ALLOCATION_ERROR; | |
221 | } | |
222 | return (UTransliterator*) t; | |
223 | } | |
224 | ||
225 | U_CAPI void U_EXPORT2 | |
226 | utrans_close(UTransliterator* trans) { | |
227 | delete (Transliterator*) trans; | |
228 | } | |
229 | ||
374ca955 A |
230 | U_CAPI const UChar * U_EXPORT2 |
231 | utrans_getUnicodeID(const UTransliterator *trans, | |
232 | int32_t *resultLength) { | |
233 | // Transliterator keeps its ID NUL-terminated | |
234 | const UnicodeString &ID=((Transliterator*) trans)->getID(); | |
235 | if(resultLength!=NULL) { | |
236 | *resultLength=ID.length(); | |
237 | } | |
238 | return ID.getBuffer(); | |
239 | } | |
240 | ||
b75a7d8f A |
241 | U_CAPI int32_t U_EXPORT2 |
242 | utrans_getID(const UTransliterator* trans, | |
243 | char* buf, | |
244 | int32_t bufCapacity) { | |
73c04bcf | 245 | return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV); |
b75a7d8f A |
246 | } |
247 | ||
248 | U_CAPI void U_EXPORT2 | |
249 | utrans_register(UTransliterator* adoptedTrans, | |
250 | UErrorCode* status) { | |
251 | utrans_ENTRY(status); | |
252 | // status currently ignored; may remove later | |
253 | Transliterator::registerInstance((Transliterator*) adoptedTrans); | |
254 | } | |
255 | ||
374ca955 A |
256 | U_CAPI void U_EXPORT2 |
257 | utrans_unregisterID(const UChar* id, int32_t idLength) { | |
258 | UnicodeString ID(idLength<0, id, idLength); // r-o alias | |
259 | Transliterator::unregister(ID); | |
260 | } | |
261 | ||
b75a7d8f A |
262 | U_CAPI void U_EXPORT2 |
263 | utrans_unregister(const char* id) { | |
73c04bcf | 264 | UnicodeString ID(id, -1, US_INV); // use invariant converter |
b75a7d8f A |
265 | Transliterator::unregister(ID); |
266 | } | |
267 | ||
268 | U_CAPI void U_EXPORT2 | |
269 | utrans_setFilter(UTransliterator* trans, | |
270 | const UChar* filterPattern, | |
271 | int32_t filterPatternLen, | |
272 | UErrorCode* status) { | |
273 | ||
274 | utrans_ENTRY(status); | |
275 | UnicodeFilter* filter = NULL; | |
276 | if (filterPattern != NULL && *filterPattern != 0) { | |
277 | // Create read only alias of filterPattern: | |
278 | UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen); | |
279 | filter = new UnicodeSet(pat, *status); | |
280 | /* test for NULL */ | |
281 | if (filter == NULL) { | |
282 | *status = U_MEMORY_ALLOCATION_ERROR; | |
283 | return; | |
284 | } | |
285 | if (U_FAILURE(*status)) { | |
286 | delete filter; | |
287 | filter = NULL; | |
288 | } | |
289 | } | |
290 | ((Transliterator*) trans)->adoptFilter(filter); | |
291 | } | |
292 | ||
293 | U_CAPI int32_t U_EXPORT2 | |
294 | utrans_countAvailableIDs(void) { | |
295 | return Transliterator::countAvailableIDs(); | |
296 | } | |
297 | ||
298 | U_CAPI int32_t U_EXPORT2 | |
299 | utrans_getAvailableID(int32_t index, | |
300 | char* buf, // may be NULL | |
301 | int32_t bufCapacity) { | |
73c04bcf | 302 | return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV); |
b75a7d8f A |
303 | } |
304 | ||
374ca955 A |
305 | /* Transliterator UEnumeration ---------------------------------------------- */ |
306 | ||
307 | typedef struct UTransEnumeration { | |
308 | UEnumeration uenum; | |
309 | int32_t index, count; | |
310 | } UTransEnumeration; | |
311 | ||
312 | U_CDECL_BEGIN | |
313 | static int32_t U_CALLCONV | |
314 | utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) { | |
315 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
316 | return 0; | |
317 | } | |
318 | return ((UTransEnumeration *)uenum)->count; | |
319 | } | |
320 | ||
321 | static const UChar* U_CALLCONV | |
322 | utrans_enum_unext(UEnumeration *uenum, | |
323 | int32_t* resultLength, | |
324 | UErrorCode *pErrorCode) { | |
325 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
326 | return 0; | |
327 | } | |
328 | ||
329 | UTransEnumeration *ute=(UTransEnumeration *)uenum; | |
330 | int32_t index=ute->index; | |
331 | if(index<ute->count) { | |
332 | const UnicodeString &ID=Transliterator::getAvailableID(index); | |
333 | ute->index=index+1; | |
334 | if(resultLength!=NULL) { | |
335 | *resultLength=ID.length(); | |
336 | } | |
337 | // Transliterator keeps its ID NUL-terminated | |
338 | return ID.getBuffer(); | |
339 | } | |
340 | ||
341 | if(resultLength!=NULL) { | |
342 | *resultLength=0; | |
343 | } | |
344 | return NULL; | |
345 | } | |
346 | ||
347 | static void U_CALLCONV | |
348 | utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) { | |
349 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
350 | return; | |
351 | } | |
352 | ||
353 | UTransEnumeration *ute=(UTransEnumeration *)uenum; | |
354 | ute->index=0; | |
355 | ute->count=Transliterator::countAvailableIDs(); | |
356 | } | |
357 | ||
358 | static void U_CALLCONV | |
359 | utrans_enum_close(UEnumeration *uenum) { | |
360 | uprv_free(uenum); | |
361 | } | |
362 | U_CDECL_END | |
363 | ||
364 | static const UEnumeration utransEnumeration={ | |
365 | NULL, | |
366 | NULL, | |
367 | utrans_enum_close, | |
368 | utrans_enum_count, | |
369 | utrans_enum_unext, | |
370 | uenum_nextDefault, | |
371 | utrans_enum_reset | |
372 | }; | |
373 | ||
374 | U_CAPI UEnumeration * U_EXPORT2 | |
375 | utrans_openIDs(UErrorCode *pErrorCode) { | |
376 | UTransEnumeration *ute; | |
377 | ||
378 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
379 | return NULL; | |
380 | } | |
381 | ||
382 | ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration)); | |
383 | if(ute==NULL) { | |
384 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
385 | return NULL; | |
386 | } | |
387 | ||
388 | ute->uenum=utransEnumeration; | |
389 | ute->index=0; | |
390 | ute->count=Transliterator::countAvailableIDs(); | |
391 | return (UEnumeration *)ute; | |
392 | } | |
393 | ||
b75a7d8f A |
394 | /******************************************************************** |
395 | * Transliteration API | |
396 | ********************************************************************/ | |
397 | ||
398 | U_CAPI void U_EXPORT2 | |
399 | utrans_trans(const UTransliterator* trans, | |
400 | UReplaceable* rep, | |
0f5d89e8 | 401 | const UReplaceableCallbacks* repFunc, |
b75a7d8f A |
402 | int32_t start, |
403 | int32_t* limit, | |
404 | UErrorCode* status) { | |
405 | ||
406 | utrans_ENTRY(status); | |
407 | ||
408 | if (trans == 0 || rep == 0 || repFunc == 0 || limit == 0) { | |
409 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
410 | return; | |
411 | } | |
412 | ||
413 | ReplaceableGlue r(rep, repFunc); | |
414 | ||
415 | *limit = ((Transliterator*) trans)->transliterate(r, start, *limit); | |
416 | } | |
417 | ||
418 | U_CAPI void U_EXPORT2 | |
419 | utrans_transIncremental(const UTransliterator* trans, | |
420 | UReplaceable* rep, | |
0f5d89e8 | 421 | const UReplaceableCallbacks* repFunc, |
b75a7d8f A |
422 | UTransPosition* pos, |
423 | UErrorCode* status) { | |
424 | ||
425 | utrans_ENTRY(status); | |
426 | ||
427 | if (trans == 0 || rep == 0 || repFunc == 0 || pos == 0) { | |
428 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
429 | return; | |
430 | } | |
431 | ||
432 | ReplaceableGlue r(rep, repFunc); | |
433 | ||
434 | ((Transliterator*) trans)->transliterate(r, *pos, *status); | |
435 | } | |
436 | ||
437 | U_CAPI void U_EXPORT2 | |
438 | utrans_transUChars(const UTransliterator* trans, | |
439 | UChar* text, | |
440 | int32_t* textLength, | |
441 | int32_t textCapacity, | |
442 | int32_t start, | |
443 | int32_t* limit, | |
444 | UErrorCode* status) { | |
445 | ||
446 | utrans_ENTRY(status); | |
447 | ||
448 | if (trans == 0 || text == 0 || limit == 0) { | |
449 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
450 | return; | |
451 | } | |
452 | ||
453 | int32_t textLen = (textLength == NULL || *textLength < 0) | |
454 | ? u_strlen(text) : *textLength; | |
455 | // writeable alias: for this ct, len CANNOT be -1 (why?) | |
456 | UnicodeString str(text, textLen, textCapacity); | |
457 | ||
458 | *limit = ((Transliterator*) trans)->transliterate(str, start, *limit); | |
459 | ||
460 | // Copy the string buffer back to text (only if necessary) | |
461 | // and fill in *neededCapacity (if neededCapacity != NULL). | |
462 | textLen = str.extract(text, textCapacity, *status); | |
463 | if(textLength != NULL) { | |
464 | *textLength = textLen; | |
465 | } | |
466 | } | |
467 | ||
468 | U_CAPI void U_EXPORT2 | |
469 | utrans_transIncrementalUChars(const UTransliterator* trans, | |
470 | UChar* text, | |
471 | int32_t* textLength, | |
472 | int32_t textCapacity, | |
473 | UTransPosition* pos, | |
474 | UErrorCode* status) { | |
475 | ||
476 | utrans_ENTRY(status); | |
477 | ||
478 | if (trans == 0 || text == 0 || pos == 0) { | |
479 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
480 | return; | |
481 | } | |
482 | ||
483 | int32_t textLen = (textLength == NULL || *textLength < 0) | |
484 | ? u_strlen(text) : *textLength; | |
485 | // writeable alias: for this ct, len CANNOT be -1 (why?) | |
486 | UnicodeString str(text, textLen, textCapacity); | |
487 | ||
488 | ((Transliterator*) trans)->transliterate(str, *pos, *status); | |
489 | ||
490 | // Copy the string buffer back to text (only if necessary) | |
491 | // and fill in *neededCapacity (if neededCapacity != NULL). | |
492 | textLen = str.extract(text, textCapacity, *status); | |
493 | if(textLength != NULL) { | |
494 | *textLength = textLen; | |
495 | } | |
496 | } | |
497 | ||
57a6839d A |
498 | U_CAPI int32_t U_EXPORT2 |
499 | utrans_toRules( const UTransliterator* trans, | |
500 | UBool escapeUnprintable, | |
501 | UChar* result, int32_t resultLength, | |
502 | UErrorCode* status) { | |
503 | utrans_ENTRY(status) 0; | |
504 | if ( (result==NULL)? resultLength!=0: resultLength<0 ) { | |
505 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
506 | return 0; | |
507 | } | |
508 | ||
509 | UnicodeString res; | |
510 | res.setTo(result, 0, resultLength); | |
511 | ((Transliterator*) trans)->toRules(res, escapeUnprintable); | |
512 | return res.extract(result, resultLength, *status); | |
513 | } | |
514 | ||
515 | U_CAPI USet* U_EXPORT2 | |
516 | utrans_getSourceSet(const UTransliterator* trans, | |
517 | UBool ignoreFilter, | |
518 | USet* fillIn, | |
519 | UErrorCode* status) { | |
520 | utrans_ENTRY(status) fillIn; | |
521 | ||
522 | if (fillIn == NULL) { | |
523 | fillIn = uset_openEmpty(); | |
524 | } | |
525 | if (ignoreFilter) { | |
526 | ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn)); | |
527 | } else { | |
528 | ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn)); | |
529 | } | |
530 | return fillIn; | |
531 | } | |
532 | ||
b75a7d8f | 533 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |