]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubiditransform.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubiditransform.cpp
1 /*
2 ******************************************************************************
3 *
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html
6 *
7 ******************************************************************************
8 * file name: ubiditransform.c
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2016jul24
14 * created by: Lina Kemmel
15 *
16 */
17
18 #include "cmemory.h"
19 #include "unicode/ubidi.h"
20 #include "unicode/ustring.h"
21 #include "unicode/ushape.h"
22 #include "unicode/utf16.h"
23 #include "ustr_imp.h"
24 #include "unicode/ubiditransform.h"
25
26 /* Some convenience defines */
27 #define LTR UBIDI_LTR
28 #define RTL UBIDI_RTL
29 #define LOGICAL UBIDI_LOGICAL
30 #define VISUAL UBIDI_VISUAL
31 #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL
32 #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
33
34 #define CHECK_LEN(STR, LEN, ERROR) { \
35 if (LEN == 0) return 0; \
36 if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
37 if (LEN == -1) LEN = u_strlen(STR); \
38 }
39
40 #define MAX_ACTIONS 7
41
42 /**
43 * Typedef for a pointer to a function, which performs some operation (such as
44 * reordering, setting "inverse" mode, character mirroring, etc.). Return value
45 * indicates whether the text was changed in the course of this operation or
46 * not.
47 */
48 typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *);
49
50 /**
51 * Structure that holds a predefined reordering scheme, including the following
52 * information:
53 * <ul>
54 * <li>an input base direction,</li>
55 * <li>an input order,</li>
56 * <li>an output base direction,</li>
57 * <li>an output order,</li>
58 * <li>a digit shaping direction,</li>
59 * <li>a letter shaping direction,</li>
60 * <li>a base direction that should be applied when the reordering engine is
61 * invoked (which can not always be derived from the caller-defined
62 * options),</li>
63 * <li>an array of pointers to functions that accomplish the bidi layout
64 * transformation.</li>
65 * </ul>
66 */
67 typedef struct {
68 UBiDiLevel inLevel; /* input level */
69 UBiDiOrder inOrder; /* input order */
70 UBiDiLevel outLevel; /* output level */
71 UBiDiOrder outOrder; /* output order */
72 uint32_t digitsDir; /* digit shaping direction */
73 uint32_t lettersDir; /* letter shaping direction */
74 UBiDiLevel baseLevel; /* paragraph level to be used with setPara */
75 const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */
76 } ReorderingScheme;
77
78 struct UBiDiTransform {
79 UBiDi *pBidi; /* pointer to a UBiDi object */
80 const ReorderingScheme *pActiveScheme; /* effective reordering scheme */
81 UChar *src; /* input text */
82 UChar *dest; /* output text */
83 uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */
84 uint32_t srcSize; /* input text capacity excluding the trailing zero */
85 uint32_t destSize; /* output text capacity */
86 uint32_t *pDestLength; /* number of UChars written to dest */
87 uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */
88 uint32_t digits; /* digit option for ArabicShaping */
89 uint32_t letters; /* letter option for ArabicShaping */
90 };
91
92 U_DRAFT UBiDiTransform* U_EXPORT2
93 ubiditransform_open(UErrorCode *pErrorCode)
94 {
95 UBiDiTransform *pBiDiTransform = NULL;
96 if (U_SUCCESS(*pErrorCode)) {
97 pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform));
98 if (pBiDiTransform == NULL) {
99 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
100 }
101 }
102 return pBiDiTransform;
103 }
104
105 U_DRAFT void U_EXPORT2
106 ubiditransform_close(UBiDiTransform *pBiDiTransform)
107 {
108 if (pBiDiTransform != NULL) {
109 if (pBiDiTransform->pBidi != NULL) {
110 ubidi_close(pBiDiTransform->pBidi);
111 }
112 if (pBiDiTransform->src != NULL) {
113 uprv_free(pBiDiTransform->src);
114 }
115 uprv_free(pBiDiTransform);
116 }
117 }
118
119 /**
120 * Performs Bidi resolution of text.
121 *
122 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
123 * @param pErrorCode Pointer to the error code value.
124 *
125 * @return Whether or not this function modifies the text. Besides the return
126 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
127 */
128 static UBool
129 action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
130 {
131 ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
132 pTransform->pActiveScheme->baseLevel, NULL, pErrorCode);
133 return FALSE;
134 }
135
136 /**
137 * Performs basic reordering of text (Logical -> Visual LTR).
138 *
139 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
140 * @param pErrorCode Pointer to the error code value.
141 *
142 * @return Whether or not this function modifies the text. Besides the return
143 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
144 */
145 static UBool
146 action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
147 {
148 ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
149 static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode);
150
151 *pTransform->pDestLength = pTransform->srcLength;
152 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
153 return TRUE;
154 }
155
156 /**
157 * Sets "inverse" mode on the <code>UBiDi</code> object.
158 *
159 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
160 * @param pErrorCode Pointer to the error code value.
161 *
162 * @return Whether or not this function modifies the text. Besides the return
163 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
164 */
165 static UBool
166 action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
167 {
168 (void)pErrorCode;
169 ubidi_setInverse(pTransform->pBidi, TRUE);
170 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
171 return FALSE;
172 }
173
174 /**
175 * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
176 * transformation.
177 *
178 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
179 * @param pErrorCode Pointer to the error code value.
180 *
181 * @return Whether or not this function modifies the text. Besides the return
182 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
183 */
184 static UBool
185 action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
186 {
187 (void)pErrorCode;
188 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
189 return FALSE;
190 }
191
192 /**
193 * Performs string reverse.
194 *
195 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
196 * @param pErrorCode Pointer to the error code value.
197 *
198 * @return Whether or not this function modifies the text. Besides the return
199 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
200 */
201 static UBool
202 action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
203 {
204 ubidi_writeReverse(pTransform->src, pTransform->srcLength,
205 pTransform->dest, pTransform->destSize,
206 UBIDI_REORDER_DEFAULT, pErrorCode);
207 *pTransform->pDestLength = pTransform->srcLength;
208 return TRUE;
209 }
210
211 /**
212 * Applies a new value to the text that serves as input at the current
213 * processing step. This value is identical to the original one when we begin
214 * the processing, but usually changes as the transformation progresses.
215 *
216 * @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
217 * @param newSrc A pointer whose value is to be used as input text.
218 * @param newLength A length of the new text in <code>UChar</code>s.
219 * @param newSize A new source capacity in <code>UChar</code>s.
220 * @param pErrorCode Pointer to the error code value.
221 */
222 static void
223 updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength,
224 uint32_t newSize, UErrorCode *pErrorCode)
225 {
226 if (newSize < newLength) {
227 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
228 return;
229 }
230 if (newSize > pTransform->srcSize) {
231 newSize += 50; // allocate slightly more than needed right now
232 if (pTransform->src != NULL) {
233 uprv_free(pTransform->src);
234 pTransform->src = NULL;
235 }
236 pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar));
237 if (pTransform->src == NULL) {
238 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
239 //pTransform->srcLength = pTransform->srcSize = 0;
240 return;
241 }
242 pTransform->srcSize = newSize;
243 }
244 u_strncpy(pTransform->src, newSrc, newLength);
245 pTransform->srcLength = u_terminateUChars(pTransform->src,
246 pTransform->srcSize, newLength, pErrorCode);
247 }
248
249 /**
250 * Calls a lower level shaping function.
251 *
252 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
253 * @param options Shaping options.
254 * @param pErrorCode Pointer to the error code value.
255 */
256 static void
257 doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
258 {
259 *pTransform->pDestLength = u_shapeArabic(pTransform->src,
260 pTransform->srcLength, pTransform->dest, pTransform->destSize,
261 options, pErrorCode);
262 }
263
264 /**
265 * Performs digit and letter shaping.
266 *
267 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
268 * @param pErrorCode Pointer to the error code value.
269 *
270 * @return Whether or not this function modifies the text. Besides the return
271 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
272 */
273 static UBool
274 action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
275 {
276 if ((pTransform->letters | pTransform->digits) == 0) {
277 return FALSE;
278 }
279 if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
280 doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
281 pErrorCode);
282 } else {
283 doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
284 if (U_SUCCESS(*pErrorCode)) {
285 updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
286 *pTransform->pDestLength, pErrorCode);
287 doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
288 pErrorCode);
289 }
290 }
291 return TRUE;
292 }
293
294 /**
295 * Performs character mirroring.
296 *
297 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
298 * @param pErrorCode Pointer to the error code value.
299 *
300 * @return Whether or not this function modifies the text. Besides the return
301 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
302 */
303 static UBool
304 action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
305 {
306 UChar32 c;
307 uint32_t i = 0, j = 0;
308 if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
309 return FALSE;
310 }
311 if (pTransform->destSize < pTransform->srcLength) {
312 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
313 return FALSE;
314 }
315 do {
316 UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
317 U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
318 U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
319 } while (i < pTransform->srcLength);
320
321 *pTransform->pDestLength = pTransform->srcLength;
322 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
323 return TRUE;
324 }
325
326 /**
327 * All possible reordering schemes.
328 *
329 */
330 static const ReorderingScheme Schemes[] =
331 {
332 /* 0: Logical LTR => Visual LTR */
333 {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
334 {action_shapeArabic, action_resolve, action_reorder, NULL}},
335 /* 1: Logical RTL => Visual LTR */
336 {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
337 {action_resolve, action_reorder, action_shapeArabic, NULL}},
338 /* 2: Logical LTR => Visual RTL */
339 {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
340 {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}},
341 /* 3: Logical RTL => Visual RTL */
342 {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
343 {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}},
344 /* 4: Visual LTR => Logical RTL */
345 {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
346 {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
347 /* 5: Visual RTL => Logical RTL */
348 {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
349 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
350 /* 6: Visual LTR => Logical LTR */
351 {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
352 {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
353 /* 7: Visual RTL => Logical LTR */
354 {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
355 {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
356 /* 8: Logical LTR => Logical RTL */
357 {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
358 {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}},
359 /* 9: Logical RTL => Logical LTR */
360 {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
361 {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}},
362 /* 10: Visual LTR => Visual RTL */
363 {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
364 {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}},
365 /* 11: Visual RTL => Visual LTR */
366 {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
367 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}},
368 /* 12: Logical LTR => Logical LTR */
369 {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
370 {action_resolve, action_mirror, action_shapeArabic, NULL}},
371 /* 13: Logical RTL => Logical RTL */
372 {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
373 {action_resolve, action_mirror, action_shapeArabic, NULL}},
374 /* 14: Visual LTR => Visual LTR */
375 {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
376 {action_resolve, action_mirror, action_shapeArabic, NULL}},
377 /* 15: Visual RTL => Visual RTL */
378 {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
379 {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}}
380 };
381
382 static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
383
384 /**
385 * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
386 * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
387 * of the first strong bidi character.
388 */
389 static void
390 resolveBaseDirection(const UChar *text, uint32_t length,
391 UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
392 {
393 switch (*pInLevel) {
394 case UBIDI_DEFAULT_LTR:
395 case UBIDI_DEFAULT_RTL: {
396 UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length));
397 *pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level
398 : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR);
399 break;
400 }
401 default:
402 *pInLevel &= 1;
403 break;
404 }
405 switch (*pOutLevel) {
406 case UBIDI_DEFAULT_LTR:
407 case UBIDI_DEFAULT_RTL:
408 *pOutLevel = *pInLevel;
409 break;
410 default:
411 *pOutLevel &= 1;
412 break;
413 }
414 }
415
416 /**
417 * Finds a valid <code>ReorderingScheme</code> matching the
418 * caller-defined scheme.
419 *
420 * @return A valid <code>ReorderingScheme</code> object or NULL
421 */
422 static const ReorderingScheme*
423 findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
424 UBiDiOrder inOrder, UBiDiOrder outOrder)
425 {
426 uint32_t i;
427 for (i = 0; i < nSchemes; i++) {
428 const ReorderingScheme *pScheme = Schemes + i;
429 if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
430 && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
431 return pScheme;
432 }
433 }
434 return NULL;
435 }
436
437 U_DRAFT uint32_t U_EXPORT2
438 ubiditransform_transform(UBiDiTransform *pBiDiTransform,
439 const UChar *src, int32_t srcLength,
440 UChar *dest, int32_t destSize,
441 UBiDiLevel inParaLevel, UBiDiOrder inOrder,
442 UBiDiLevel outParaLevel, UBiDiOrder outOrder,
443 UBiDiMirroring doMirroring, uint32_t shapingOptions,
444 UErrorCode *pErrorCode)
445 {
446 uint32_t destLength = 0;
447 UBool textChanged = FALSE;
448 const UBiDiTransform *pOrigTransform = pBiDiTransform;
449 const UBiDiAction *action = NULL;
450
451 if (U_FAILURE(*pErrorCode)) {
452 return 0;
453 }
454 if (src == NULL || dest == NULL) {
455 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
456 return 0;
457 }
458 CHECK_LEN(src, srcLength, pErrorCode);
459 CHECK_LEN(dest, destSize, pErrorCode);
460
461 if (pBiDiTransform == NULL) {
462 pBiDiTransform = ubiditransform_open(pErrorCode);
463 if (U_FAILURE(*pErrorCode)) {
464 return 0;
465 }
466 }
467 /* Current limitation: in multiple paragraphs will be resolved according
468 to the 1st paragraph */
469 resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
470
471 pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
472 inOrder, outOrder);
473 if (pBiDiTransform->pActiveScheme == NULL) {
474 goto cleanup;
475 }
476 pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
477 : UBIDI_REORDER_DEFAULT;
478
479 /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
480 scheme at the time shaping is invoked. */
481 shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
482 pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
483 pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
484
485 updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
486 if (U_FAILURE(*pErrorCode)) {
487 goto cleanup;
488 }
489 if (pBiDiTransform->pBidi == NULL) {
490 pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
491 if (U_FAILURE(*pErrorCode)) {
492 goto cleanup;
493 }
494 }
495 pBiDiTransform->dest = dest;
496 pBiDiTransform->destSize = destSize;
497 pBiDiTransform->pDestLength = &destLength;
498
499 /* Checking for U_SUCCESS() within the loop to bail out on first failure. */
500 for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
501 if ((*action)(pBiDiTransform, pErrorCode)) {
502 if (action + 1) {
503 updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
504 *pBiDiTransform->pDestLength, pErrorCode);
505 }
506 textChanged = TRUE;
507 }
508 }
509 ubidi_setInverse(pBiDiTransform->pBidi, FALSE);
510
511 if (!textChanged && U_SUCCESS(*pErrorCode)) {
512 /* Text was not changed - just copy src to dest */
513 if (destSize < srcLength) {
514 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
515 } else {
516 u_strncpy(dest, src, srcLength);
517 destLength = srcLength;
518 }
519 }
520 cleanup:
521 if (pOrigTransform != pBiDiTransform) {
522 ubiditransform_close(pBiDiTransform);
523 } else {
524 pBiDiTransform->dest = NULL;
525 pBiDiTransform->pDestLength = NULL;
526 pBiDiTransform->srcLength = 0;
527 pBiDiTransform->destSize = 0;
528 }
529 return U_FAILURE(*pErrorCode) ? 0 : destLength;
530 }