2 ******************************************************************************
4 * Copyright (C) 2000-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 * file name: ubidiwrt.c
10 * tab size: 8 (not used)
13 * created on: 1999aug06
14 * created by: Markus W. Scherer
16 * This file contains implementations for BiDi functions that use
17 * the core algorithm and core API to write reordered text.
20 /* set import/export definitions */
21 #ifndef U_COMMON_IMPLEMENTATION
22 # define U_COMMON_IMPLEMENTATION
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ubidi.h"
34 * The function implementations in this file are designed
35 * for UTF-16 and UTF-32, not for UTF-8.
37 * Assumptions that are not true for UTF-8:
38 * - Any code point always needs the same number of code units
39 * ("minimum-length-problem" of UTF-8)
40 * - The BiDi control characters need only one code unit each
42 * Further assumptions for all UTFs:
43 * - u_charMirror(c) needs the same number of code units as c
46 # error reimplement ubidi_writeReordered() for UTF-8, see comment above
49 #define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
52 * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
53 * semantically write RTL runs in reverse and later reverse them again.
54 * Instead, we actually write them in forward order to begin with.
55 * However, if the RTL run was to be mirrored, we need to mirror here now
56 * since the implicit second reversal must not do it.
57 * It looks strange to do mirroring in LTR output, but it is only because
58 * we are writing RTL output in reverse.
61 doWriteForward(const UChar
*src
, int32_t srcLength
,
62 UChar
*dest
, int32_t destSize
,
64 UErrorCode
*pErrorCode
) {
65 /* optimize for several combinations of options */
66 switch(options
&(UBIDI_REMOVE_BIDI_CONTROLS
|UBIDI_DO_MIRRORING
)) {
68 /* simply copy the LTR run to the destination */
69 int32_t length
=srcLength
;
71 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
79 case UBIDI_DO_MIRRORING
: {
84 if(destSize
<srcLength
) {
85 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
89 UTF_NEXT_CHAR(src
, i
, srcLength
, c
);
91 UTF_APPEND_CHAR_UNSAFE(dest
, j
, c
);
95 case UBIDI_REMOVE_BIDI_CONTROLS
: {
96 /* copy the LTR run and remove any BiDi control characters */
97 int32_t remaining
=destSize
;
101 if(!IS_BIDI_CONTROL_CHAR(c
)) {
103 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
105 /* preflight the length */
106 while(--srcLength
>0) {
108 if(!IS_BIDI_CONTROL_CHAR(c
)) {
112 return destSize
-remaining
;
116 } while(--srcLength
>0);
117 return destSize
-remaining
;
120 /* remove BiDi control characters and do mirroring */
121 int32_t remaining
=destSize
;
126 UTF_NEXT_CHAR(src
, i
, srcLength
, c
);
129 if(!IS_BIDI_CONTROL_CHAR(c
)) {
132 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
134 /* preflight the length */
137 if(!IS_BIDI_CONTROL_CHAR(c
)) {
142 return destSize
-remaining
;
145 UTF_APPEND_CHAR_UNSAFE(dest
, j
, c
);
147 } while(srcLength
>0);
150 } /* end of switch */
154 doWriteReverse(const UChar
*src
, int32_t srcLength
,
155 UChar
*dest
, int32_t destSize
,
157 UErrorCode
*pErrorCode
) {
161 * RTL runs need to be copied to the destination in reverse order
162 * of code points, not code units, to keep Unicode characters intact.
164 * The general strategy for this is to read the source text
165 * in backward order, collect all code units for a code point
166 * (and optionally following combining characters, see below),
167 * and copy all these code units in ascending order
168 * to the destination for this run.
170 * Several options request whether combining characters
171 * should be kept after their base characters,
172 * whether BiDi control characters should be removed, and
173 * whether characters should be replaced by their mirror-image
174 * equivalent Unicode characters.
179 /* optimize for several combinations of options */
180 switch(options
&(UBIDI_REMOVE_BIDI_CONTROLS
|UBIDI_DO_MIRRORING
|UBIDI_KEEP_BASE_COMBINING
)) {
183 * With none of the "complicated" options set, the destination
184 * run will have the same length as the source run,
185 * and there is no mirroring and no keeping combining characters
186 * with their base characters.
188 if(destSize
<srcLength
) {
189 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
194 /* preserve character integrity */
196 /* i is always after the last code unit known to need to be kept in this segment */
199 /* collect code units for one base character */
200 UTF_BACK_1(src
, 0, srcLength
);
202 /* copy this base character */
207 } while(srcLength
>0);
209 case UBIDI_KEEP_BASE_COMBINING
:
211 * Here, too, the destination
212 * run will have the same length as the source run,
213 * and there is no mirroring.
214 * We do need to keep combining characters with their base characters.
216 if(destSize
<srcLength
) {
217 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
222 /* preserve character integrity */
224 /* i is always after the last code unit known to need to be kept in this segment */
227 /* collect code units and modifier letters for one base character */
229 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
230 } while(srcLength
>0 && IS_COMBINING(u_charType(c
)));
232 /* copy this "user character" */
237 } while(srcLength
>0);
241 * With several "complicated" options set, this is the most
242 * general and the slowest copying of an RTL run.
243 * We will do mirroring, remove BiDi controls, and
244 * keep combining characters with their base characters
247 if(!(options
&UBIDI_REMOVE_BIDI_CONTROLS
)) {
250 /* we need to find out the destination length of the run,
251 which will not include the BiDi control characters */
252 int32_t length
=srcLength
;
258 if(!IS_BIDI_CONTROL_CHAR(ch
)) {
266 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
271 /* preserve character integrity */
273 /* i is always after the last code unit known to need to be kept in this segment */
276 /* collect code units for one base character */
277 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
278 if(options
&UBIDI_KEEP_BASE_COMBINING
) {
279 /* collect modifier letters for this base character */
280 while(srcLength
>0 && IS_COMBINING(u_charType(c
))) {
281 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
285 if(options
&UBIDI_REMOVE_BIDI_CONTROLS
&& IS_BIDI_CONTROL_CHAR(c
)) {
286 /* do not copy this BiDi control character */
290 /* copy this "user character" */
292 if(options
&UBIDI_DO_MIRRORING
) {
293 /* mirror only the base character */
296 UTF_APPEND_CHAR_UNSAFE(dest
, k
, c
);
303 } while(srcLength
>0);
305 } /* end of switch */
310 U_CAPI
int32_t U_EXPORT2
311 ubidi_writeReverse(const UChar
*src
, int32_t srcLength
,
312 UChar
*dest
, int32_t destSize
,
314 UErrorCode
*pErrorCode
) {
317 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
321 /* more error checking */
322 if( src
==NULL
|| srcLength
<-1 ||
323 destSize
<0 || (destSize
>0 && dest
==NULL
))
325 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
329 /* do input and output overlap? */
331 ((src
>=dest
&& src
<dest
+destSize
) ||
332 (dest
>=src
&& dest
<src
+srcLength
)))
334 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
339 srcLength
=u_strlen(src
);
342 destLength
=doWriteReverse(src
, srcLength
, dest
, destSize
, options
, pErrorCode
);
348 return u_terminateUChars(dest
, destSize
, destLength
, pErrorCode
);
351 #define MASK_R_AL (1UL<<U_RIGHT_TO_LEFT|1UL<<U_RIGHT_TO_LEFT_ARABIC)
353 U_CAPI
int32_t U_EXPORT2
354 ubidi_writeReordered(UBiDi
*pBiDi
,
355 UChar
*dest
, int32_t destSize
,
357 UErrorCode
*pErrorCode
) {
360 int32_t length
, destCapacity
;
361 int32_t run
, runCount
, logicalStart
, runLength
;
363 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
367 /* more error checking */
369 (text
=pBiDi
->text
)==NULL
|| (length
=pBiDi
->length
)<0 ||
370 destSize
<0 || (destSize
>0 && dest
==NULL
))
372 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
376 /* do input and output overlap? */
378 ((text
>=dest
&& text
<dest
+destSize
) ||
379 (dest
>=text
&& dest
<text
+pBiDi
->originalLength
)))
381 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
387 return u_terminateUChars(dest
, destSize
, 0, pErrorCode
);
390 runCount
=ubidi_countRuns(pBiDi
, pErrorCode
);
391 if(U_FAILURE(*pErrorCode
)) {
395 /* destSize shrinks, later destination length=destCapacity-destSize */
397 destCapacity
=destSize
;
400 * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
401 * reordering mode (checked below) is appropriate.
403 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
404 options
|=UBIDI_INSERT_LRM_FOR_NUMERIC
;
405 options
&=~UBIDI_REMOVE_BIDI_CONTROLS
;
408 * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
409 * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
411 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
412 options
|=UBIDI_REMOVE_BIDI_CONTROLS
;
413 options
&=~UBIDI_INSERT_LRM_FOR_NUMERIC
;
416 * If we do not perform the "inverse BiDi" algorithm, then we
417 * don't need to insert any LRMs, and don't need to test for it.
419 if((pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_NUMBERS_AS_L
) &&
420 (pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_LIKE_DIRECT
) &&
421 (pBiDi
->reorderingMode
!= UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
) &&
422 (pBiDi
->reorderingMode
!= UBIDI_REORDER_RUNS_ONLY
)) {
423 options
&=~UBIDI_INSERT_LRM_FOR_NUMERIC
;
426 * Iterate through all visual runs and copy the run text segments to
427 * the destination, according to the options.
429 * The tests for where to insert LRMs ignore the fact that there may be
430 * BN codes or non-BMP code points at the beginning and end of a run;
431 * they may insert LRMs unnecessarily but the tests are faster this way
432 * (this would have to be improved for UTF-8).
434 * Note that the only errors that are set by doWriteXY() are buffer overflow
435 * errors. Ignore them until the end, and continue for preflighting.
437 if(!(options
&UBIDI_OUTPUT_REVERSE
)) {
439 if(!(options
&UBIDI_INSERT_LRM_FOR_NUMERIC
)) {
440 /* do not insert BiDi controls */
441 for(run
=0; run
<runCount
; ++run
) {
442 if(UBIDI_LTR
==ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
)) {
443 runLength
=doWriteForward(text
+logicalStart
, runLength
,
445 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
447 runLength
=doWriteReverse(text
+logicalStart
, runLength
,
449 options
, pErrorCode
);
455 /* insert BiDi controls for "inverse BiDi" */
456 const DirProp
*dirProps
=pBiDi
->dirProps
;
462 for(run
=0; run
<runCount
; ++run
) {
463 dir
=ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
);
464 src
=text
+logicalStart
;
465 /* check if something relevant in insertPoints */
466 markFlag
=pBiDi
->runs
[run
].insertRemove
;
467 if(markFlag
<0) { /* insert count */
472 if((pBiDi
->isInverse
) &&
473 (/*run>0 &&*/ dirProps
[logicalStart
]!=L
)) {
474 markFlag
|= LRM_BEFORE
;
476 if (markFlag
& LRM_BEFORE
) {
479 else if (markFlag
& RLM_BEFORE
) {
490 runLength
=doWriteForward(src
, runLength
,
492 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
496 if((pBiDi
->isInverse
) &&
497 (/*run<runCount-1 &&*/ dirProps
[logicalStart
+runLength
-1]!=L
)) {
498 markFlag
|= LRM_AFTER
;
500 if (markFlag
& LRM_AFTER
) {
503 else if (markFlag
& RLM_AFTER
) {
513 } else { /* RTL run */
514 if((pBiDi
->isInverse
) &&
515 (/*run>0 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
+runLength
-1])))) {
516 markFlag
|= RLM_BEFORE
;
518 if (markFlag
& LRM_BEFORE
) {
521 else if (markFlag
& RLM_BEFORE
) {
532 runLength
=doWriteReverse(src
, runLength
,
534 options
, pErrorCode
);
538 if((pBiDi
->isInverse
) &&
539 (/*run<runCount-1 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
])))) {
540 markFlag
|= RLM_AFTER
;
542 if (markFlag
& LRM_AFTER
) {
545 else if (markFlag
& RLM_AFTER
) {
560 if(!(options
&UBIDI_INSERT_LRM_FOR_NUMERIC
)) {
561 /* do not insert BiDi controls */
562 for(run
=runCount
; --run
>=0;) {
563 if(UBIDI_LTR
==ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
)) {
564 runLength
=doWriteReverse(text
+logicalStart
, runLength
,
566 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
568 runLength
=doWriteForward(text
+logicalStart
, runLength
,
570 options
, pErrorCode
);
576 /* insert BiDi controls for "inverse BiDi" */
577 const DirProp
*dirProps
=pBiDi
->dirProps
;
581 for(run
=runCount
; --run
>=0;) {
583 dir
=ubidi_getVisualRun(pBiDi
, run
, &logicalStart
, &runLength
);
584 src
=text
+logicalStart
;
587 if(/*run<runCount-1 &&*/ dirProps
[logicalStart
+runLength
-1]!=L
) {
594 runLength
=doWriteReverse(src
, runLength
,
596 (uint16_t)(options
&~UBIDI_DO_MIRRORING
), pErrorCode
);
600 if(/*run>0 &&*/ dirProps
[logicalStart
]!=L
) {
607 if(/*run<runCount-1 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
]))) {
614 runLength
=doWriteForward(src
, runLength
,
616 options
, pErrorCode
);
620 if(/*run>0 &&*/ !(MASK_R_AL
&DIRPROP_FLAG(dirProps
[logicalStart
+runLength
-1]))) {
631 return u_terminateUChars(saveDest
, destCapacity
, destCapacity
-destSize
, pErrorCode
);