2 ******************************************************************************
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html
7 ******************************************************************************
8 * file name: ubiditransform.c
10 * tab size: 8 (not used)
13 * created on: 2016jul24
14 * created by: Lina Kemmel
19 #include "unicode/ubidi.h"
20 #include "unicode/ustring.h"
21 #include "unicode/ushape.h"
22 #include "unicode/utf16.h"
24 #include "unicode/ubiditransform.h"
26 /* Some convenience defines */
29 #define LOGICAL UBIDI_LOGICAL
30 #define VISUAL UBIDI_VISUAL
31 #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL
32 #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
34 #define CHECK_LEN(STR, LEN, ERROR) { \
35 if (LEN == 0) return 0; \
36 if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
37 if (LEN == -1) LEN = u_strlen(STR); \
43 * Typedef for a pointer to a function, which performs some operation (such as
44 * reordering, setting "inverse" mode, character mirroring, etc.). Return value
45 * indicates whether the text was changed in the course of this operation or
48 typedef UBool (*UBiDiAction
)(UBiDiTransform
*, UErrorCode
*);
51 * Structure that holds a predefined reordering scheme, including the following
54 * <li>an input base direction,</li>
55 * <li>an input order,</li>
56 * <li>an output base direction,</li>
57 * <li>an output order,</li>
58 * <li>a digit shaping direction,</li>
59 * <li>a letter shaping direction,</li>
60 * <li>a base direction that should be applied when the reordering engine is
61 * invoked (which can not always be derived from the caller-defined
63 * <li>an array of pointers to functions that accomplish the bidi layout
64 * transformation.</li>
68 UBiDiLevel inLevel
; /* input level */
69 UBiDiOrder inOrder
; /* input order */
70 UBiDiLevel outLevel
; /* output level */
71 UBiDiOrder outOrder
; /* output order */
72 uint32_t digitsDir
; /* digit shaping direction */
73 uint32_t lettersDir
; /* letter shaping direction */
74 UBiDiLevel baseLevel
; /* paragraph level to be used with setPara */
75 const UBiDiAction actions
[MAX_ACTIONS
]; /* array of pointers to functions carrying out the transformation */
78 struct UBiDiTransform
{
79 UBiDi
*pBidi
; /* pointer to a UBiDi object */
80 const ReorderingScheme
*pActiveScheme
; /* effective reordering scheme */
81 UChar
*src
; /* input text */
82 UChar
*dest
; /* output text */
83 uint32_t srcLength
; /* input text length - not really needed as we are zero-terminated and can u_strlen */
84 uint32_t srcSize
; /* input text capacity excluding the trailing zero */
85 uint32_t destSize
; /* output text capacity */
86 uint32_t *pDestLength
; /* number of UChars written to dest */
87 uint32_t reorderingOptions
; /* reordering options - currently only suppot DO_MIRRORING */
88 uint32_t digits
; /* digit option for ArabicShaping */
89 uint32_t letters
; /* letter option for ArabicShaping */
92 U_DRAFT UBiDiTransform
* U_EXPORT2
93 ubiditransform_open(UErrorCode
*pErrorCode
)
95 UBiDiTransform
*pBiDiTransform
= NULL
;
96 if (U_SUCCESS(*pErrorCode
)) {
97 pBiDiTransform
= (UBiDiTransform
*) uprv_calloc(1, sizeof(UBiDiTransform
));
98 if (pBiDiTransform
== NULL
) {
99 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
102 return pBiDiTransform
;
105 U_DRAFT
void U_EXPORT2
106 ubiditransform_close(UBiDiTransform
*pBiDiTransform
)
108 if (pBiDiTransform
!= NULL
) {
109 if (pBiDiTransform
->pBidi
!= NULL
) {
110 ubidi_close(pBiDiTransform
->pBidi
);
112 if (pBiDiTransform
->src
!= NULL
) {
113 uprv_free(pBiDiTransform
->src
);
115 uprv_free(pBiDiTransform
);
120 * Performs Bidi resolution of text.
122 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
123 * @param pErrorCode Pointer to the error code value.
125 * @return Whether or not this function modifies the text. Besides the return
126 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
129 action_resolve(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
131 ubidi_setPara(pTransform
->pBidi
, pTransform
->src
, pTransform
->srcLength
,
132 pTransform
->pActiveScheme
->baseLevel
, NULL
, pErrorCode
);
137 * Performs basic reordering of text (Logical -> Visual LTR).
139 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
140 * @param pErrorCode Pointer to the error code value.
142 * @return Whether or not this function modifies the text. Besides the return
143 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
146 action_reorder(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
148 ubidi_writeReordered(pTransform
->pBidi
, pTransform
->dest
, pTransform
->destSize
,
149 pTransform
->reorderingOptions
, pErrorCode
);
151 *pTransform
->pDestLength
= pTransform
->srcLength
;
152 pTransform
->reorderingOptions
= UBIDI_REORDER_DEFAULT
;
157 * Sets "inverse" mode on the <code>UBiDi</code> object.
159 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
160 * @param pErrorCode Pointer to the error code value.
162 * @return Whether or not this function modifies the text. Besides the return
163 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
166 action_setInverse(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
169 ubidi_setInverse(pTransform
->pBidi
, TRUE
);
170 ubidi_setReorderingMode(pTransform
->pBidi
, UBIDI_REORDER_INVERSE_LIKE_DIRECT
);
175 * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
178 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
179 * @param pErrorCode Pointer to the error code value.
181 * @return Whether or not this function modifies the text. Besides the return
182 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
185 action_setRunsOnly(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
188 ubidi_setReorderingMode(pTransform
->pBidi
, UBIDI_REORDER_RUNS_ONLY
);
193 * Performs string reverse.
195 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
196 * @param pErrorCode Pointer to the error code value.
198 * @return Whether or not this function modifies the text. Besides the return
199 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
202 action_reverse(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
204 ubidi_writeReverse(pTransform
->src
, pTransform
->srcLength
,
205 pTransform
->dest
, pTransform
->destSize
,
206 UBIDI_REORDER_DEFAULT
, pErrorCode
);
207 *pTransform
->pDestLength
= pTransform
->srcLength
;
212 * Applies a new value to the text that serves as input at the current
213 * processing step. This value is identical to the original one when we begin
214 * the processing, but usually changes as the transformation progresses.
216 * @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
217 * @param newSrc A pointer whose value is to be used as input text.
218 * @param newLength A length of the new text in <code>UChar</code>s.
219 * @param newSize A new source capacity in <code>UChar</code>s.
220 * @param pErrorCode Pointer to the error code value.
223 updateSrc(UBiDiTransform
*pTransform
, const UChar
*newSrc
, uint32_t newLength
,
224 uint32_t newSize
, UErrorCode
*pErrorCode
)
226 if (newSize
< newLength
) {
227 *pErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
230 if (newSize
> pTransform
->srcSize
) {
231 newSize
+= 50; // allocate slightly more than needed right now
232 if (pTransform
->src
!= NULL
) {
233 uprv_free(pTransform
->src
);
234 pTransform
->src
= NULL
;
236 pTransform
->src
= (UChar
*)uprv_malloc(newSize
* sizeof(UChar
));
237 if (pTransform
->src
== NULL
) {
238 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
239 //pTransform->srcLength = pTransform->srcSize = 0;
242 pTransform
->srcSize
= newSize
;
244 u_strncpy(pTransform
->src
, newSrc
, newLength
);
245 pTransform
->srcLength
= u_terminateUChars(pTransform
->src
,
246 pTransform
->srcSize
, newLength
, pErrorCode
);
250 * Calls a lower level shaping function.
252 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
253 * @param options Shaping options.
254 * @param pErrorCode Pointer to the error code value.
257 doShape(UBiDiTransform
*pTransform
, uint32_t options
, UErrorCode
*pErrorCode
)
259 *pTransform
->pDestLength
= u_shapeArabic(pTransform
->src
,
260 pTransform
->srcLength
, pTransform
->dest
, pTransform
->destSize
,
261 options
, pErrorCode
);
265 * Performs digit and letter shaping.
267 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
268 * @param pErrorCode Pointer to the error code value.
270 * @return Whether or not this function modifies the text. Besides the return
271 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
274 action_shapeArabic(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
276 if ((pTransform
->letters
| pTransform
->digits
) == 0) {
279 if (pTransform
->pActiveScheme
->lettersDir
== pTransform
->pActiveScheme
->digitsDir
) {
280 doShape(pTransform
, pTransform
->letters
| pTransform
->digits
| pTransform
->pActiveScheme
->lettersDir
,
283 doShape(pTransform
, pTransform
->digits
| pTransform
->pActiveScheme
->digitsDir
, pErrorCode
);
284 if (U_SUCCESS(*pErrorCode
)) {
285 updateSrc(pTransform
, pTransform
->dest
, *pTransform
->pDestLength
,
286 *pTransform
->pDestLength
, pErrorCode
);
287 doShape(pTransform
, pTransform
->letters
| pTransform
->pActiveScheme
->lettersDir
,
295 * Performs character mirroring.
297 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
298 * @param pErrorCode Pointer to the error code value.
300 * @return Whether or not this function modifies the text. Besides the return
301 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
304 action_mirror(UBiDiTransform
*pTransform
, UErrorCode
*pErrorCode
)
307 uint32_t i
= 0, j
= 0;
308 if (0 == (pTransform
->reorderingOptions
& UBIDI_DO_MIRRORING
)) {
311 if (pTransform
->destSize
< pTransform
->srcLength
) {
312 *pErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
316 UBool isOdd
= ubidi_getLevelAt(pTransform
->pBidi
, i
) & 1;
317 U16_NEXT(pTransform
->src
, i
, pTransform
->srcLength
, c
);
318 U16_APPEND_UNSAFE(pTransform
->dest
, j
, isOdd
? u_charMirror(c
) : c
);
319 } while (i
< pTransform
->srcLength
);
321 *pTransform
->pDestLength
= pTransform
->srcLength
;
322 pTransform
->reorderingOptions
= UBIDI_REORDER_DEFAULT
;
327 * All possible reordering schemes.
330 static const ReorderingScheme Schemes
[] =
332 /* 0: Logical LTR => Visual LTR */
333 {LTR
, LOGICAL
, LTR
, VISUAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
334 {action_shapeArabic
, action_resolve
, action_reorder
, NULL
}},
335 /* 1: Logical RTL => Visual LTR */
336 {RTL
, LOGICAL
, LTR
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, RTL
,
337 {action_resolve
, action_reorder
, action_shapeArabic
, NULL
}},
338 /* 2: Logical LTR => Visual RTL */
339 {LTR
, LOGICAL
, RTL
, VISUAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
340 {action_shapeArabic
, action_resolve
, action_reorder
, action_reverse
, NULL
}},
341 /* 3: Logical RTL => Visual RTL */
342 {RTL
, LOGICAL
, RTL
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, RTL
,
343 {action_resolve
, action_reorder
, action_shapeArabic
, action_reverse
, NULL
}},
344 /* 4: Visual LTR => Logical RTL */
345 {LTR
, VISUAL
, RTL
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, RTL
,
346 {action_shapeArabic
, action_setInverse
, action_resolve
, action_reorder
, NULL
}},
347 /* 5: Visual RTL => Logical RTL */
348 {RTL
, VISUAL
, RTL
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, RTL
,
349 {action_reverse
, action_shapeArabic
, action_setInverse
, action_resolve
, action_reorder
, NULL
}},
350 /* 6: Visual LTR => Logical LTR */
351 {LTR
, VISUAL
, LTR
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
352 {action_setInverse
, action_resolve
, action_reorder
, action_shapeArabic
, NULL
}},
353 /* 7: Visual RTL => Logical LTR */
354 {RTL
, VISUAL
, LTR
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
355 {action_reverse
, action_setInverse
, action_resolve
, action_reorder
, action_shapeArabic
, NULL
}},
356 /* 8: Logical LTR => Logical RTL */
357 {LTR
, LOGICAL
, RTL
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
358 {action_shapeArabic
, action_resolve
, action_mirror
, action_setRunsOnly
, action_resolve
, action_reorder
, NULL
}},
359 /* 9: Logical RTL => Logical LTR */
360 {RTL
, LOGICAL
, LTR
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, RTL
,
361 {action_resolve
, action_mirror
, action_setRunsOnly
, action_resolve
, action_reorder
, action_shapeArabic
, NULL
}},
362 /* 10: Visual LTR => Visual RTL */
363 {LTR
, VISUAL
, RTL
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, LTR
,
364 {action_shapeArabic
, action_setInverse
, action_resolve
, action_mirror
, action_reverse
, NULL
}},
365 /* 11: Visual RTL => Visual LTR */
366 {RTL
, VISUAL
, LTR
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, LTR
,
367 {action_reverse
, action_shapeArabic
, action_setInverse
, action_resolve
, action_mirror
, NULL
}},
368 /* 12: Logical LTR => Logical LTR */
369 {LTR
, LOGICAL
, LTR
, LOGICAL
, SHAPE_LOGICAL
, SHAPE_LOGICAL
, LTR
,
370 {action_resolve
, action_mirror
, action_shapeArabic
, NULL
}},
371 /* 13: Logical RTL => Logical RTL */
372 {RTL
, LOGICAL
, RTL
, LOGICAL
, SHAPE_VISUAL
, SHAPE_LOGICAL
, RTL
,
373 {action_resolve
, action_mirror
, action_shapeArabic
, NULL
}},
374 /* 14: Visual LTR => Visual LTR */
375 {LTR
, VISUAL
, LTR
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, LTR
,
376 {action_resolve
, action_mirror
, action_shapeArabic
, NULL
}},
377 /* 15: Visual RTL => Visual RTL */
378 {RTL
, VISUAL
, RTL
, VISUAL
, SHAPE_LOGICAL
, SHAPE_VISUAL
, LTR
,
379 {action_reverse
, action_resolve
, action_mirror
, action_shapeArabic
, action_reverse
, NULL
}}
382 static const uint32_t nSchemes
= sizeof(Schemes
) / sizeof(*Schemes
);
385 * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
386 * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
387 * of the first strong bidi character.
390 resolveBaseDirection(const UChar
*text
, uint32_t length
,
391 UBiDiLevel
*pInLevel
, UBiDiLevel
*pOutLevel
)
394 case UBIDI_DEFAULT_LTR
:
395 case UBIDI_DEFAULT_RTL
: {
396 UBiDiLevel level
= ubidi_getBaseDirection(text
, length
);
397 *pInLevel
= level
!= UBIDI_NEUTRAL
? level
398 : *pInLevel
== UBIDI_DEFAULT_RTL
? RTL
: LTR
;
405 switch (*pOutLevel
) {
406 case UBIDI_DEFAULT_LTR
:
407 case UBIDI_DEFAULT_RTL
:
408 *pOutLevel
= *pInLevel
;
417 * Finds a valid <code>ReorderingScheme</code> matching the
418 * caller-defined scheme.
420 * @return A valid <code>ReorderingScheme</code> object or NULL
422 static const ReorderingScheme
*
423 findMatchingScheme(UBiDiLevel inLevel
, UBiDiLevel outLevel
,
424 UBiDiOrder inOrder
, UBiDiOrder outOrder
)
427 for (i
= 0; i
< nSchemes
; i
++) {
428 const ReorderingScheme
*pScheme
= Schemes
+ i
;
429 if (inLevel
== pScheme
->inLevel
&& outLevel
== pScheme
->outLevel
430 && inOrder
== pScheme
->inOrder
&& outOrder
== pScheme
->outOrder
) {
437 U_DRAFT
uint32_t U_EXPORT2
438 ubiditransform_transform(UBiDiTransform
*pBiDiTransform
,
439 const UChar
*src
, int32_t srcLength
,
440 UChar
*dest
, int32_t destSize
,
441 UBiDiLevel inParaLevel
, UBiDiOrder inOrder
,
442 UBiDiLevel outParaLevel
, UBiDiOrder outOrder
,
443 UBiDiMirroring doMirroring
, uint32_t shapingOptions
,
444 UErrorCode
*pErrorCode
)
446 uint32_t destLength
= 0;
447 UBool textChanged
= FALSE
;
448 const UBiDiTransform
*pOrigTransform
= pBiDiTransform
;
449 const UBiDiAction
*action
= NULL
;
451 if (U_FAILURE(*pErrorCode
)) {
454 if (src
== NULL
|| dest
== NULL
) {
455 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
458 CHECK_LEN(src
, srcLength
, pErrorCode
);
459 CHECK_LEN(dest
, destSize
, pErrorCode
);
461 if (pBiDiTransform
== NULL
) {
462 pBiDiTransform
= ubiditransform_open(pErrorCode
);
463 if (U_FAILURE(*pErrorCode
)) {
467 /* Current limitation: in multiple paragraphs will be resolved according
468 to the 1st paragraph */
469 resolveBaseDirection(src
, srcLength
, &inParaLevel
, &outParaLevel
);
471 pBiDiTransform
->pActiveScheme
= findMatchingScheme(inParaLevel
, outParaLevel
,
473 if (pBiDiTransform
->pActiveScheme
== NULL
) {
476 pBiDiTransform
->reorderingOptions
= doMirroring
? UBIDI_DO_MIRRORING
477 : UBIDI_REORDER_DEFAULT
;
479 /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
480 scheme at the time shaping is invoked. */
481 shapingOptions
&= ~U_SHAPE_TEXT_DIRECTION_MASK
;
482 pBiDiTransform
->digits
= shapingOptions
& ~U_SHAPE_LETTERS_MASK
;
483 pBiDiTransform
->letters
= shapingOptions
& ~U_SHAPE_DIGITS_MASK
;
485 updateSrc(pBiDiTransform
, src
, srcLength
, destSize
> srcLength
? destSize
: srcLength
, pErrorCode
);
486 if (U_FAILURE(*pErrorCode
)) {
489 if (pBiDiTransform
->pBidi
== NULL
) {
490 pBiDiTransform
->pBidi
= ubidi_openSized(0, 0, pErrorCode
);
491 if (U_FAILURE(*pErrorCode
)) {
495 pBiDiTransform
->dest
= dest
;
496 pBiDiTransform
->destSize
= destSize
;
497 pBiDiTransform
->pDestLength
= &destLength
;
499 /* Checking for U_SUCCESS() within the loop to bail out on first failure. */
500 for (action
= pBiDiTransform
->pActiveScheme
->actions
; *action
&& U_SUCCESS(*pErrorCode
); action
++) {
501 if ((*action
)(pBiDiTransform
, pErrorCode
)) {
503 updateSrc(pBiDiTransform
, pBiDiTransform
->dest
, *pBiDiTransform
->pDestLength
,
504 *pBiDiTransform
->pDestLength
, pErrorCode
);
509 ubidi_setInverse(pBiDiTransform
->pBidi
, FALSE
);
511 if (!textChanged
&& U_SUCCESS(*pErrorCode
)) {
512 /* Text was not changed - just copy src to dest */
513 if (destSize
< srcLength
) {
514 *pErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
516 u_strncpy(dest
, src
, srcLength
);
517 destLength
= srcLength
;
521 if (pOrigTransform
!= pBiDiTransform
) {
522 ubiditransform_close(pBiDiTransform
);
524 pBiDiTransform
->dest
= NULL
;
525 pBiDiTransform
->pDestLength
= NULL
;
526 pBiDiTransform
->srcLength
= 0;
527 pBiDiTransform
->destSize
= 0;
529 return U_FAILURE(*pErrorCode
) ? 0 : destLength
;