1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2000-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: ubidiwrt.c
12 * tab size: 8 (not used)
15 * created on: 1999aug06
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
18 * This file contains implementations for BiDi functions that use
19 * the core algorithm and core API to write reordered text.
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "unicode/uchar.h"
25 #include "unicode/ubidi.h"
26 #include "unicode/utf16.h"
32 * The function implementations in this file are designed
33 * for UTF-16 and UTF-32, not for UTF-8.
35 * Assumptions that are not true for UTF-8:
36 * - Any code point always needs the same number of code units
37 * ("minimum-length-problem" of UTF-8)
38 * - The BiDi control characters need only one code unit each
40 * Further assumptions for all UTFs:
41 * - u_charMirror(c) needs the same number of code units as c
43 #if defined(UTF_SIZE) && UTF_SIZE==8
44 # error reimplement ubidi_writeReordered() for UTF-8, see comment above
47 #define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
50 * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
51 * semantically write RTL runs in reverse and later reverse them again.
52 * Instead, we actually write them in forward order to begin with.
53 * However, if the RTL run was to be mirrored, we need to mirror here now
54 * since the implicit second reversal must not do it.
55 * It looks strange to do mirroring in LTR output, but it is only because
56 * we are writing RTL output in reverse.
59 doWriteForward(const UChar
*src
, int32_t srcLength
,
60 UChar
*dest
, int32_t destSize
,
62 UErrorCode
*pErrorCode
) {
63 /* optimize for several combinations of options */
64 switch(options
&(UBIDI_REMOVE_BIDI_CONTROLS
|UBIDI_DO_MIRRORING
)) {
66 /* simply copy the LTR run to the destination */
67 int32_t length
=srcLength
;
69 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
77 case UBIDI_DO_MIRRORING
: {
82 if(destSize
<srcLength
) {
83 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
87 U16_NEXT(src
, i
, srcLength
, c
);
89 U16_APPEND_UNSAFE(dest
, j
, c
);
93 case UBIDI_REMOVE_BIDI_CONTROLS
: {
94 /* copy the LTR run and remove any BiDi control characters */
95 int32_t remaining
=destSize
;
99 if(!IS_BIDI_CONTROL_CHAR(c
)) {
101 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
103 /* preflight the length */
104 while(--srcLength
>0) {
106 if(!IS_BIDI_CONTROL_CHAR(c
)) {
110 return destSize
-remaining
;
114 } while(--srcLength
>0);
115 return destSize
-remaining
;
118 /* remove BiDi control characters and do mirroring */
119 int32_t remaining
=destSize
;
124 U16_NEXT(src
, i
, srcLength
, c
);
127 if(!IS_BIDI_CONTROL_CHAR(c
)) {
130 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
132 /* preflight the length */
135 if(!IS_BIDI_CONTROL_CHAR(c
)) {
140 return destSize
-remaining
;
143 U16_APPEND_UNSAFE(dest
, j
, c
);
145 } while(srcLength
>0);
148 } /* end of switch */
152 doWriteReverse(const UChar
*src
, int32_t srcLength
,
153 UChar
*dest
, int32_t destSize
,
155 UErrorCode
*pErrorCode
) {
159 * RTL runs need to be copied to the destination in reverse order
160 * of code points, not code units, to keep Unicode characters intact.
162 * The general strategy for this is to read the source text
163 * in backward order, collect all code units for a code point
164 * (and optionally following combining characters, see below),
165 * and copy all these code units in ascending order
166 * to the destination for this run.
168 * Several options request whether combining characters
169 * should be kept after their base characters,
170 * whether BiDi control characters should be removed, and
171 * whether characters should be replaced by their mirror-image
172 * equivalent Unicode characters.
177 /* optimize for several combinations of options */
178 switch(options
&(UBIDI_REMOVE_BIDI_CONTROLS
|UBIDI_DO_MIRRORING
|UBIDI_KEEP_BASE_COMBINING
)) {
181 * With none of the "complicated" options set, the destination
182 * run will have the same length as the source run,
183 * and there is no mirroring and no keeping combining characters
184 * with their base characters.
186 if(destSize
<srcLength
) {
187 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
192 /* preserve character integrity */
194 /* i is always after the last code unit known to need to be kept in this segment */
197 /* collect code units for one base character */
198 U16_BACK_1(src
, 0, srcLength
);
200 /* copy this base character */
205 } while(srcLength
>0);
207 case UBIDI_KEEP_BASE_COMBINING
:
209 * Here, too, the destination
210 * run will have the same length as the source run,
211 * and there is no mirroring.
212 * We do need to keep combining characters with their base characters.
214 if(destSize
<srcLength
) {
215 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
220 /* preserve character integrity */
222 /* i is always after the last code unit known to need to be kept in this segment */
225 /* collect code units and modifier letters for one base character */
227 U16_PREV(src
, 0, srcLength
, c
);
228 } while(srcLength
>0 && IS_COMBINING(u_charType(c
)));
230 /* copy this "user character" */
235 } while(srcLength
>0);
239 * With several "complicated" options set, this is the most
240 * general and the slowest copying of an RTL run.
241 * We will do mirroring, remove BiDi controls, and
242 * keep combining characters with their base characters
245 if(!(options
&UBIDI_REMOVE_BIDI_CONTROLS
)) {
248 /* we need to find out the destination length of the run,
249 which will not include the BiDi control characters */
250 int32_t length
=srcLength
;
256 if(!IS_BIDI_CONTROL_CHAR(ch
)) {
264 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
269 /* preserve character integrity */
271 /* i is always after the last code unit known to need to be kept in this segment */
274 /* collect code units for one base character */
275 U16_PREV(src
, 0, srcLength
, c
);
276 if(options
&UBIDI_KEEP_BASE_COMBINING
) {
277 /* collect modifier letters for this base character */
278 while(srcLength
>0 && IS_COMBINING(u_charType(c
))) {
279 U16_PREV(src
, 0, srcLength
, c
);
283 if(options
&UBIDI_REMOVE_BIDI_CONTROLS
&& IS_BIDI_CONTROL_CHAR(c
)) {
284 /* do not copy this BiDi control character */
288 /* copy this "user character" */
290 if(options
&UBIDI_DO_MIRRORING
) {
291 /* mirror only the base character */
294 U16_APPEND_UNSAFE(dest
, k
, c
);
301 } while(srcLength
>0);
303 } /* end of switch */
308 U_CAPI
int32_t U_EXPORT2
309 ubidi_writeReverse(const UChar
*src
, int32_t srcLength
,
310 UChar
*dest
, int32_t destSize
,
312 UErrorCode
*pErrorCode
) {
315 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
319 /* more error checking */
320 if( src
==NULL
|| srcLength
<-1 ||
321 destSize
<0 || (destSize
>0 && dest
==NULL
))
323 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
327 /* do input and output overlap? */
329 ((src
>=dest
&& src
<dest
+destSize
) ||
330 (dest
>=src
&& dest
<src
+srcLength
)))
332 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
337 srcLength
=u_strlen(src
);
340 destLength
=doWriteReverse(src
, srcLength
, dest
, destSize
, options
, pErrorCode
);
346 return u_terminateUChars(dest
, destSize
, destLength
, pErrorCode
);
349 // Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
350 // function on Windows ARM64. As a work-around, we disable optimizations for this function.
351 // This work-around could/should be removed once the following versions of Visual Studio are no
352 // longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
353 #if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
354 #pragma optimize( "", off )
356 U_CAPI
int32_t U_EXPORT2
357 ubidi_writeReordered(UBiDi
*pBiDi
,
358 UChar
*dest
, int32_t destSize
,
360 UErrorCode
*pErrorCode
) {
363 int32_t length
, destCapacity
;
364 int32_t run
, runCount
, logicalStart
, runLength
;
366 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
370 /* more error checking */
372 (text
=pBiDi
->text
)==NULL
|| (length
=pBiDi
->length
)<0 ||
373 destSize
<0 || (destSize
>0 && dest
==NULL
))
375 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
379 /* do input and output overlap? */
381 ((text
>=dest
&& text
<dest
+destSize
) ||
382 (dest
>=text
&& dest
<text
+pBiDi
->originalLength
)))
384 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
390 return u_terminateUChars(dest
, destSize
, 0, pErrorCode
);
393 runCount
=ubidi_countRuns(pBiDi
, pErrorCode
);
394 if(U_FAILURE(*pErrorCode
)) {
398 /* destSize shrinks, later destination length=destCapacity-destSize */
400 destCapacity
=destSize
;
403 * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
404 * reordering mode (checked below) is appropriate.
406 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
407 options
|=UBIDI_INSERT_LRM_FOR_NUMERIC
;
408 options
&=~UBIDI_REMOVE_BIDI_CONTROLS
;
411 * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
412 * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
414 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
415 options
|=UBIDI_REMOVE_BIDI_CONTROLS
;
416 options
&=~UBIDI_INSERT_LRM_FOR_NUMERIC
;
419 * If we do not perform the "inverse BiDi" algorithm, then we
420 * don't need to insert any LRMs, and don't need to test for it.
422 if((pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_NUMBERS_AS_L
) &&
423 (pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_LIKE_DIRECT
) &&
424 (pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
) &&
425 (pBiDi
->reorderingMode
!= UBIDI_REORDER_RUNS_ONLY
)) {
426 options
&=~UBIDI_INSERT_LRM_FOR_NUMERIC
;
429 * Iterate through all visual runs and copy the run text segments to
430 * the destination, according to the options.
432 * The tests for where to insert LRMs ignore the fact that there may be
433 * BN codes or non-BMP code points at the beginning and end of a run;
434 * they may insert LRMs unnecessarily but the tests are faster this way
435 * (this would have to be improved for UTF-8).
437 * Note that the only errors that are set by doWriteXY() are buffer overflow
438 * errors. Ignore them until the end, and continue for preflighting.
440 if(!(options
&UBIDI_OUTPUT_REVERSE
)) {
442 if(!(options
&UBIDI_INSERT_LRM_FOR_NUMERIC
)) {
443 /* do not insert BiDi controls */
444 for(run
=0; run
<runCount
; ++run
) {
445 if(UBIDI_LTR
==ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
)) {
446 runLength
=doWriteForward(text
+logicalStart
, runLength
,
448 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
450 runLength
=doWriteReverse(text
+logicalStart
, runLength
,
452 options
, pErrorCode
);
460 /* insert BiDi controls for "inverse BiDi" */
461 const DirProp
*dirProps
=pBiDi
->dirProps
;
467 for(run
=0; run
<runCount
; ++run
) {
468 dir
=ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
);
469 src
=text
+logicalStart
;
470 /* check if something relevant in insertPoints */
471 markFlag
=pBiDi
->runs
[run
].insertRemove
;
472 if(markFlag
<0) { /* BiDi controls count */
477 if((pBiDi
->isInverse
) &&
478 (/*run>0 &&*/ dirProps
[logicalStart
]!=L
)) {
479 markFlag
|= LRM_BEFORE
;
481 if (markFlag
& LRM_BEFORE
) {
484 else if (markFlag
& RLM_BEFORE
) {
495 runLength
=doWriteForward(src
, runLength
,
497 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
503 if((pBiDi
->isInverse
) &&
504 (/*run<runCount-1 &&*/ dirProps
[logicalStart
+runLength
-1]!=L
)) {
505 markFlag
|= LRM_AFTER
;
507 if (markFlag
& LRM_AFTER
) {
510 else if (markFlag
& RLM_AFTER
) {
520 } else { /* RTL run */
521 if((pBiDi
->isInverse
) &&
522 (/*run>0 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
+runLength
-1])))) {
523 markFlag
|= RLM_BEFORE
;
525 if (markFlag
& LRM_BEFORE
) {
528 else if (markFlag
& RLM_BEFORE
) {
539 runLength
=doWriteReverse(src
, runLength
,
541 options
, pErrorCode
);
547 if((pBiDi
->isInverse
) &&
548 (/*run<runCount-1 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
])))) {
549 markFlag
|= RLM_AFTER
;
551 if (markFlag
& LRM_AFTER
) {
554 else if (markFlag
& RLM_AFTER
) {
569 if(!(options
&UBIDI_INSERT_LRM_FOR_NUMERIC
)) {
570 /* do not insert BiDi controls */
571 for(run
=runCount
; --run
>=0;) {
572 if(UBIDI_LTR
==ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
)) {
573 runLength
=doWriteReverse(text
+logicalStart
, runLength
,
575 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
577 runLength
=doWriteForward(text
+logicalStart
, runLength
,
579 options
, pErrorCode
);
587 /* insert BiDi controls for "inverse BiDi" */
588 const DirProp
*dirProps
=pBiDi
->dirProps
;
592 for(run
=runCount
; --run
>=0;) {
594 dir
=ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
);
595 src
=text
+logicalStart
;
598 if(/*run<runCount-1 &&*/ dirProps
[logicalStart
+runLength
-1]!=L
) {
605 runLength
=doWriteReverse(src
, runLength
,
607 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
613 if(/*run>0 &&*/ dirProps
[logicalStart
]!=L
) {
620 if(/*run<runCount-1 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
]))) {
627 runLength
=doWriteForward(src
, runLength
,
629 options
, pErrorCode
);
635 if(/*run>0 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
+runLength
-1]))) {
646 return u_terminateUChars(saveDest
, destCapacity
, destCapacity
-destSize
, pErrorCode
);
648 #if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
649 #pragma optimize( "", on )