]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidiwrt.c
ICU-57165.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidiwrt.c
CommitLineData
73c04bcf 1/*
b75a7d8f
A
2******************************************************************************
3*
2ca993e8 4* Copyright (C) 2000-2015, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: ubidiwrt.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999aug06
46f4442e 14* created by: Markus W. Scherer, updated by Matitiahu Allouche
b75a7d8f
A
15*
16* This file contains implementations for BiDi functions that use
17* the core algorithm and core API to write reordered text.
18*/
19
b75a7d8f
A
20#include "unicode/utypes.h"
21#include "unicode/ustring.h"
22#include "unicode/uchar.h"
23#include "unicode/ubidi.h"
4388f060 24#include "unicode/utf16.h"
b75a7d8f
A
25#include "cmemory.h"
26#include "ustr_imp.h"
27#include "ubidiimp.h"
28
29/*
30 * The function implementations in this file are designed
31 * for UTF-16 and UTF-32, not for UTF-8.
32 *
33 * Assumptions that are not true for UTF-8:
34 * - Any code point always needs the same number of code units
35 * ("minimum-length-problem" of UTF-8)
36 * - The BiDi control characters need only one code unit each
37 *
38 * Further assumptions for all UTFs:
39 * - u_charMirror(c) needs the same number of code units as c
40 */
41#if UTF_SIZE==8
42# error reimplement ubidi_writeReordered() for UTF-8, see comment above
43#endif
44
b75a7d8f
A
45#define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
46
47/*
48 * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
49 * semantically write RTL runs in reverse and later reverse them again.
50 * Instead, we actually write them in forward order to begin with.
51 * However, if the RTL run was to be mirrored, we need to mirror here now
52 * since the implicit second reversal must not do it.
53 * It looks strange to do mirroring in LTR output, but it is only because
54 * we are writing RTL output in reverse.
55 */
56static int32_t
57doWriteForward(const UChar *src, int32_t srcLength,
58 UChar *dest, int32_t destSize,
59 uint16_t options,
60 UErrorCode *pErrorCode) {
61 /* optimize for several combinations of options */
62 switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
63 case 0: {
64 /* simply copy the LTR run to the destination */
65 int32_t length=srcLength;
66 if(destSize<length) {
67 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
68 return srcLength;
69 }
70 do {
71 *dest++=*src++;
72 } while(--length>0);
73 return srcLength;
74 }
75 case UBIDI_DO_MIRRORING: {
76 /* do mirroring */
77 int32_t i=0, j=0;
78 UChar32 c;
79
80 if(destSize<srcLength) {
81 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
82 return srcLength;
83 }
84 do {
4388f060 85 U16_NEXT(src, i, srcLength, c);
b75a7d8f 86 c=u_charMirror(c);
4388f060 87 U16_APPEND_UNSAFE(dest, j, c);
b75a7d8f
A
88 } while(i<srcLength);
89 return srcLength;
90 }
91 case UBIDI_REMOVE_BIDI_CONTROLS: {
92 /* copy the LTR run and remove any BiDi control characters */
93 int32_t remaining=destSize;
94 UChar c;
95 do {
96 c=*src++;
97 if(!IS_BIDI_CONTROL_CHAR(c)) {
98 if(--remaining<0) {
99 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
100
101 /* preflight the length */
102 while(--srcLength>0) {
103 c=*src++;
104 if(!IS_BIDI_CONTROL_CHAR(c)) {
105 --remaining;
106 }
107 }
108 return destSize-remaining;
109 }
110 *dest++=c;
111 }
112 } while(--srcLength>0);
113 return destSize-remaining;
114 }
115 default: {
116 /* remove BiDi control characters and do mirroring */
117 int32_t remaining=destSize;
118 int32_t i, j=0;
119 UChar32 c;
120 do {
121 i=0;
4388f060 122 U16_NEXT(src, i, srcLength, c);
b75a7d8f
A
123 src+=i;
124 srcLength-=i;
125 if(!IS_BIDI_CONTROL_CHAR(c)) {
126 remaining-=i;
127 if(remaining<0) {
128 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
129
130 /* preflight the length */
131 while(srcLength>0) {
132 c=*src++;
133 if(!IS_BIDI_CONTROL_CHAR(c)) {
134 --remaining;
135 }
136 --srcLength;
137 }
138 return destSize-remaining;
139 }
140 c=u_charMirror(c);
4388f060 141 U16_APPEND_UNSAFE(dest, j, c);
b75a7d8f
A
142 }
143 } while(srcLength>0);
144 return j;
145 }
146 } /* end of switch */
147}
148
149static int32_t
150doWriteReverse(const UChar *src, int32_t srcLength,
151 UChar *dest, int32_t destSize,
152 uint16_t options,
153 UErrorCode *pErrorCode) {
154 /*
155 * RTL run -
156 *
157 * RTL runs need to be copied to the destination in reverse order
158 * of code points, not code units, to keep Unicode characters intact.
159 *
160 * The general strategy for this is to read the source text
161 * in backward order, collect all code units for a code point
162 * (and optionally following combining characters, see below),
163 * and copy all these code units in ascending order
164 * to the destination for this run.
165 *
166 * Several options request whether combining characters
167 * should be kept after their base characters,
168 * whether BiDi control characters should be removed, and
169 * whether characters should be replaced by their mirror-image
170 * equivalent Unicode characters.
171 */
172 int32_t i, j;
173 UChar32 c;
174
175 /* optimize for several combinations of options */
176 switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
177 case 0:
178 /*
179 * With none of the "complicated" options set, the destination
180 * run will have the same length as the source run,
181 * and there is no mirroring and no keeping combining characters
182 * with their base characters.
183 */
184 if(destSize<srcLength) {
185 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
186 return srcLength;
187 }
188 destSize=srcLength;
189
190 /* preserve character integrity */
191 do {
192 /* i is always after the last code unit known to need to be kept in this segment */
193 i=srcLength;
194
195 /* collect code units for one base character */
4388f060 196 U16_BACK_1(src, 0, srcLength);
b75a7d8f
A
197
198 /* copy this base character */
199 j=srcLength;
200 do {
201 *dest++=src[j++];
202 } while(j<i);
203 } while(srcLength>0);
204 break;
205 case UBIDI_KEEP_BASE_COMBINING:
206 /*
207 * Here, too, the destination
208 * run will have the same length as the source run,
209 * and there is no mirroring.
210 * We do need to keep combining characters with their base characters.
211 */
212 if(destSize<srcLength) {
213 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
214 return srcLength;
215 }
216 destSize=srcLength;
217
218 /* preserve character integrity */
219 do {
220 /* i is always after the last code unit known to need to be kept in this segment */
221 i=srcLength;
222
223 /* collect code units and modifier letters for one base character */
224 do {
4388f060 225 U16_PREV(src, 0, srcLength, c);
b75a7d8f
A
226 } while(srcLength>0 && IS_COMBINING(u_charType(c)));
227
228 /* copy this "user character" */
229 j=srcLength;
230 do {
231 *dest++=src[j++];
232 } while(j<i);
233 } while(srcLength>0);
234 break;
235 default:
236 /*
237 * With several "complicated" options set, this is the most
238 * general and the slowest copying of an RTL run.
239 * We will do mirroring, remove BiDi controls, and
240 * keep combining characters with their base characters
241 * as requested.
242 */
243 if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
244 i=srcLength;
245 } else {
246 /* we need to find out the destination length of the run,
247 which will not include the BiDi control characters */
248 int32_t length=srcLength;
249 UChar ch;
250
251 i=0;
252 do {
253 ch=*src++;
254 if(!IS_BIDI_CONTROL_CHAR(ch)) {
255 ++i;
256 }
257 } while(--length>0);
258 src-=srcLength;
259 }
260
261 if(destSize<i) {
262 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
263 return i;
264 }
265 destSize=i;
266
267 /* preserve character integrity */
268 do {
269 /* i is always after the last code unit known to need to be kept in this segment */
270 i=srcLength;
271
272 /* collect code units for one base character */
4388f060 273 U16_PREV(src, 0, srcLength, c);
b75a7d8f
A
274 if(options&UBIDI_KEEP_BASE_COMBINING) {
275 /* collect modifier letters for this base character */
276 while(srcLength>0 && IS_COMBINING(u_charType(c))) {
4388f060 277 U16_PREV(src, 0, srcLength, c);
b75a7d8f
A
278 }
279 }
280
281 if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
282 /* do not copy this BiDi control character */
283 continue;
284 }
285
286 /* copy this "user character" */
287 j=srcLength;
288 if(options&UBIDI_DO_MIRRORING) {
289 /* mirror only the base character */
290 int32_t k=0;
291 c=u_charMirror(c);
4388f060 292 U16_APPEND_UNSAFE(dest, k, c);
b75a7d8f
A
293 dest+=k;
294 j+=k;
295 }
296 while(j<i) {
297 *dest++=src[j++];
298 }
299 } while(srcLength>0);
300 break;
301 } /* end of switch */
302
303 return destSize;
304}
305
306U_CAPI int32_t U_EXPORT2
307ubidi_writeReverse(const UChar *src, int32_t srcLength,
308 UChar *dest, int32_t destSize,
309 uint16_t options,
310 UErrorCode *pErrorCode) {
311 int32_t destLength;
312
313 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
314 return 0;
315 }
316
317 /* more error checking */
318 if( src==NULL || srcLength<-1 ||
319 destSize<0 || (destSize>0 && dest==NULL))
320 {
321 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322 return 0;
323 }
324
325 /* do input and output overlap? */
326 if( dest!=NULL &&
327 ((src>=dest && src<dest+destSize) ||
328 (dest>=src && dest<src+srcLength)))
329 {
330 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
331 return 0;
332 }
333
334 if(srcLength==-1) {
335 srcLength=u_strlen(src);
336 }
337 if(srcLength>0) {
338 destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
339 } else {
340 /* nothing to do */
341 destLength=0;
342 }
343
344 return u_terminateUChars(dest, destSize, destLength, pErrorCode);
345}
346
b75a7d8f
A
347U_CAPI int32_t U_EXPORT2
348ubidi_writeReordered(UBiDi *pBiDi,
349 UChar *dest, int32_t destSize,
350 uint16_t options,
351 UErrorCode *pErrorCode) {
352 const UChar *text;
353 UChar *saveDest;
354 int32_t length, destCapacity;
355 int32_t run, runCount, logicalStart, runLength;
356
357 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
358 return 0;
359 }
360
361 /* more error checking */
362 if( pBiDi==NULL ||
73c04bcf 363 (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
b75a7d8f
A
364 destSize<0 || (destSize>0 && dest==NULL))
365 {
366 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
367 return 0;
368 }
369
370 /* do input and output overlap? */
371 if( dest!=NULL &&
372 ((text>=dest && text<dest+destSize) ||
73c04bcf 373 (dest>=text && dest<text+pBiDi->originalLength)))
b75a7d8f
A
374 {
375 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
376 return 0;
377 }
378
379 if(length==0) {
380 /* nothing to do */
381 return u_terminateUChars(dest, destSize, 0, pErrorCode);
382 }
383
384 runCount=ubidi_countRuns(pBiDi, pErrorCode);
385 if(U_FAILURE(*pErrorCode)) {
386 return 0;
387 }
388
389 /* destSize shrinks, later destination length=destCapacity-destSize */
390 saveDest=dest;
391 destCapacity=destSize;
392
73c04bcf
A
393 /*
394 * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
395 * reordering mode (checked below) is appropriate.
396 */
397 if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
398 options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
399 options&=~UBIDI_REMOVE_BIDI_CONTROLS;
400 }
401 /*
402 * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
403 * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
404 */
405 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
406 options|=UBIDI_REMOVE_BIDI_CONTROLS;
407 options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
408 }
b75a7d8f
A
409 /*
410 * If we do not perform the "inverse BiDi" algorithm, then we
411 * don't need to insert any LRMs, and don't need to test for it.
412 */
73c04bcf
A
413 if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
414 (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) &&
415 (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
416 (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
b75a7d8f
A
417 options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
418 }
b75a7d8f
A
419 /*
420 * Iterate through all visual runs and copy the run text segments to
421 * the destination, according to the options.
422 *
423 * The tests for where to insert LRMs ignore the fact that there may be
424 * BN codes or non-BMP code points at the beginning and end of a run;
425 * they may insert LRMs unnecessarily but the tests are faster this way
426 * (this would have to be improved for UTF-8).
427 *
428 * Note that the only errors that are set by doWriteXY() are buffer overflow
429 * errors. Ignore them until the end, and continue for preflighting.
430 */
431 if(!(options&UBIDI_OUTPUT_REVERSE)) {
432 /* forward output */
433 if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
434 /* do not insert BiDi controls */
435 for(run=0; run<runCount; ++run) {
436 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
437 runLength=doWriteForward(text+logicalStart, runLength,
438 dest, destSize,
439 (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
440 } else {
441 runLength=doWriteReverse(text+logicalStart, runLength,
442 dest, destSize,
443 options, pErrorCode);
444 }
4388f060
A
445 if(dest!=NULL) {
446 dest+=runLength;
447 }
b75a7d8f
A
448 destSize-=runLength;
449 }
450 } else {
451 /* insert BiDi controls for "inverse BiDi" */
73c04bcf 452 const DirProp *dirProps=pBiDi->dirProps;
b75a7d8f 453 const UChar *src;
73c04bcf 454 UChar uc;
b75a7d8f 455 UBiDiDirection dir;
73c04bcf 456 int32_t markFlag;
b75a7d8f
A
457
458 for(run=0; run<runCount; ++run) {
459 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
460 src=text+logicalStart;
73c04bcf
A
461 /* check if something relevant in insertPoints */
462 markFlag=pBiDi->runs[run].insertRemove;
46f4442e 463 if(markFlag<0) { /* BiDi controls count */
73c04bcf
A
464 markFlag=0;
465 }
b75a7d8f
A
466
467 if(UBIDI_LTR==dir) {
73c04bcf
A
468 if((pBiDi->isInverse) &&
469 (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
470 markFlag |= LRM_BEFORE;
471 }
472 if (markFlag & LRM_BEFORE) {
473 uc=LRM_CHAR;
474 }
475 else if (markFlag & RLM_BEFORE) {
476 uc=RLM_CHAR;
477 }
478 else uc=0;
479 if(uc) {
b75a7d8f 480 if(destSize>0) {
73c04bcf 481 *dest++=uc;
b75a7d8f
A
482 }
483 --destSize;
484 }
485
486 runLength=doWriteForward(src, runLength,
487 dest, destSize,
488 (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
4388f060
A
489 if(dest!=NULL) {
490 dest+=runLength;
491 }
b75a7d8f
A
492 destSize-=runLength;
493
73c04bcf
A
494 if((pBiDi->isInverse) &&
495 (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
496 markFlag |= LRM_AFTER;
497 }
498 if (markFlag & LRM_AFTER) {
499 uc=LRM_CHAR;
500 }
501 else if (markFlag & RLM_AFTER) {
502 uc=RLM_CHAR;
503 }
504 else uc=0;
505 if(uc) {
b75a7d8f 506 if(destSize>0) {
73c04bcf 507 *dest++=uc;
b75a7d8f
A
508 }
509 --destSize;
510 }
73c04bcf
A
511 } else { /* RTL run */
512 if((pBiDi->isInverse) &&
513 (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
514 markFlag |= RLM_BEFORE;
515 }
516 if (markFlag & LRM_BEFORE) {
517 uc=LRM_CHAR;
518 }
519 else if (markFlag & RLM_BEFORE) {
520 uc=RLM_CHAR;
521 }
522 else uc=0;
523 if(uc) {
b75a7d8f 524 if(destSize>0) {
73c04bcf 525 *dest++=uc;
b75a7d8f
A
526 }
527 --destSize;
528 }
529
530 runLength=doWriteReverse(src, runLength,
531 dest, destSize,
532 options, pErrorCode);
4388f060
A
533 if(dest!=NULL) {
534 dest+=runLength;
535 }
b75a7d8f
A
536 destSize-=runLength;
537
73c04bcf
A
538 if((pBiDi->isInverse) &&
539 (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
540 markFlag |= RLM_AFTER;
541 }
542 if (markFlag & LRM_AFTER) {
543 uc=LRM_CHAR;
544 }
545 else if (markFlag & RLM_AFTER) {
546 uc=RLM_CHAR;
547 }
548 else uc=0;
549 if(uc) {
b75a7d8f 550 if(destSize>0) {
73c04bcf 551 *dest++=uc;
b75a7d8f
A
552 }
553 --destSize;
554 }
555 }
556 }
557 }
558 } else {
559 /* reverse output */
560 if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
561 /* do not insert BiDi controls */
562 for(run=runCount; --run>=0;) {
563 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
564 runLength=doWriteReverse(text+logicalStart, runLength,
565 dest, destSize,
566 (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
567 } else {
568 runLength=doWriteForward(text+logicalStart, runLength,
569 dest, destSize,
570 options, pErrorCode);
571 }
4388f060
A
572 if(dest!=NULL) {
573 dest+=runLength;
574 }
b75a7d8f
A
575 destSize-=runLength;
576 }
577 } else {
578 /* insert BiDi controls for "inverse BiDi" */
73c04bcf 579 const DirProp *dirProps=pBiDi->dirProps;
b75a7d8f
A
580 const UChar *src;
581 UBiDiDirection dir;
582
583 for(run=runCount; --run>=0;) {
584 /* reverse output */
585 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
586 src=text+logicalStart;
587
588 if(UBIDI_LTR==dir) {
73c04bcf 589 if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
b75a7d8f
A
590 if(destSize>0) {
591 *dest++=LRM_CHAR;
592 }
593 --destSize;
594 }
595
596 runLength=doWriteReverse(src, runLength,
597 dest, destSize,
598 (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
4388f060
A
599 if(dest!=NULL) {
600 dest+=runLength;
601 }
b75a7d8f
A
602 destSize-=runLength;
603
73c04bcf 604 if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
b75a7d8f
A
605 if(destSize>0) {
606 *dest++=LRM_CHAR;
607 }
608 --destSize;
609 }
610 } else {
73c04bcf 611 if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
b75a7d8f
A
612 if(destSize>0) {
613 *dest++=RLM_CHAR;
614 }
615 --destSize;
616 }
617
618 runLength=doWriteForward(src, runLength,
619 dest, destSize,
620 options, pErrorCode);
4388f060
A
621 if(dest!=NULL) {
622 dest+=runLength;
623 }
b75a7d8f
A
624 destSize-=runLength;
625
73c04bcf 626 if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
b75a7d8f
A
627 if(destSize>0) {
628 *dest++=RLM_CHAR;
629 }
630 --destSize;
631 }
632 }
633 }
634 }
635 }
636
637 return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
638}