1 /********************************************************************
3 * Copyright (c) 2005-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /************************************************************************
7 * Tests for the UText and UTextIterator text abstraction classses
9 ************************************************************************/
14 #include "unicode/utypes.h"
15 #include "unicode/utext.h"
16 #include "unicode/utf8.h"
17 #include "unicode/ustring.h"
18 #include "unicode/uchriter.h"
23 static UBool gFailed
= FALSE
;
24 static int gTestNum
= 0;
27 UText
*openFragmentedUnicodeString(UText
*ut
, UnicodeString
*s
, UErrorCode
*status
);
29 #define TEST_ASSERT(x) \
30 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
35 #define TEST_SUCCESS(status) \
36 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
37 gTestNum, __FILE__, __LINE__, u_errorName(status)); \
41 UTextTest::UTextTest() {
44 UTextTest::~UTextTest() {
49 UTextTest::runIndexedTest(int32_t index
, UBool exec
,
50 const char* &name
, char* /*par*/) {
52 case 0: name
= "TextTest";
53 if (exec
) TextTest(); break;
54 case 1: name
= "ErrorTest";
55 if (exec
) ErrorTest(); break;
56 case 2: name
= "FreezeTest";
57 if (exec
) FreezeTest(); break;
58 case 3: name
= "Ticket5560";
59 if (exec
) Ticket5560(); break;
60 case 4: name
= "Ticket6847";
61 if (exec
) Ticket6847(); break;
62 case 5: name
= "Ticket10562";
63 if (exec
) Ticket10562(); break;
64 case 6: name
= "Ticket10983";
65 if (exec
) Ticket10983(); break;
66 case 7: name
= "Ticket12130";
67 if (exec
) Ticket12130(); break;
68 default: name
= ""; break;
73 // Quick and dirty random number generator.
74 // (don't use library so that results are portable.
75 static uint32_t m_seed
= 1;
76 static uint32_t m_rand()
78 m_seed
= m_seed
* 1103515245 + 12345;
79 return (uint32_t)(m_seed
/65536) % 32768;
86 // Top Level function for UText testing.
87 // Specifies the strings to be tested, with the acutal testing itself
88 // being carried out in another function, TestString().
90 void UTextTest::TextTest() {
93 TestString("abcd\\U00010001xyz");
96 // Supplementary chars at start or end
97 TestString("\\U00010001");
98 TestString("abc\\U00010001");
99 TestString("\\U00010001abc");
101 // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
103 for (i
=1; i
<60; i
++) {
105 for (j
=0; j
<i
; j
++) {
106 if (j
+0x30 == 0x5c) {
107 // backslash. Needs to be escaped
108 s
.append((UChar
)0x5c);
110 s
.append(UChar(j
+0x30));
115 // Test strings with odd-aligned supplementary chars,
116 // looking for glitches at buffer boundaries
117 for (i
=1; i
<60; i
++) {
119 s
.append((UChar
)0x41);
120 for (j
=0; j
<i
; j
++) {
121 s
.append(UChar32(j
+0x11000));
126 // String of chars of randomly varying size in utf-8 representation.
127 // Exercise the mapping, and the varying sized buffer.
133 UChar32 c4
= 0x11000;
134 for (i
=0; i
<1000; i
++) {
135 int len8
= m_rand()%4
+ 1;
139 // don't put 0 into string (0 terminated strings for some tests)
140 // don't put '\', will cause unescape() to fail.
141 if (c1
==0x5c || c1
==0) {
162 // TestString() Run a suite of UText tests on a string.
163 // The test string is unescaped before use.
165 void UTextTest::TestString(const UnicodeString
&s
) {
170 UErrorCode status
= U_ZERO_ERROR
;
174 UnicodeString sa
= s
.unescape();
178 // Build up a mapping between code points and UTF-16 code unit indexes.
180 m
*cpMap
= new m
[sa
.length() + 1];
182 for (i
=0; i
<sa
.length(); i
=sa
.moveIndex32(i
, 1)) {
184 cpMap
[j
].nativeIdx
= i
;
189 cpMap
[j
].nativeIdx
= i
; // position following the last char in utf-16 string.
192 // UChar * test, null terminated
193 status
= U_ZERO_ERROR
;
194 UChar
*buf
= new UChar
[saLen
+1];
195 sa
.extract(buf
, saLen
+1, status
);
196 TEST_SUCCESS(status
);
197 ut
= utext_openUChars(NULL
, buf
, -1, &status
);
198 TEST_SUCCESS(status
);
199 TestAccess(sa
, ut
, cpCount
, cpMap
);
203 // UChar * test, with length
204 status
= U_ZERO_ERROR
;
205 buf
= new UChar
[saLen
+1];
206 sa
.extract(buf
, saLen
+1, status
);
207 TEST_SUCCESS(status
);
208 ut
= utext_openUChars(NULL
, buf
, saLen
, &status
);
209 TEST_SUCCESS(status
);
210 TestAccess(sa
, ut
, cpCount
, cpMap
);
215 // UnicodeString test
216 status
= U_ZERO_ERROR
;
217 ut
= utext_openUnicodeString(NULL
, &sa
, &status
);
218 TEST_SUCCESS(status
);
219 TestAccess(sa
, ut
, cpCount
, cpMap
);
220 TestCMR(sa
, ut
, cpCount
, cpMap
, cpMap
);
224 // Const UnicodeString test
225 status
= U_ZERO_ERROR
;
226 ut
= utext_openConstUnicodeString(NULL
, &sa
, &status
);
227 TEST_SUCCESS(status
);
228 TestAccess(sa
, ut
, cpCount
, cpMap
);
232 // Replaceable test. (UnicodeString inherits Replaceable)
233 status
= U_ZERO_ERROR
;
234 ut
= utext_openReplaceable(NULL
, &sa
, &status
);
235 TEST_SUCCESS(status
);
236 TestAccess(sa
, ut
, cpCount
, cpMap
);
237 TestCMR(sa
, ut
, cpCount
, cpMap
, cpMap
);
240 // Character Iterator Tests
241 status
= U_ZERO_ERROR
;
242 const UChar
*cbuf
= sa
.getBuffer();
243 CharacterIterator
*ci
= new UCharCharacterIterator(cbuf
, saLen
, status
);
244 TEST_SUCCESS(status
);
245 ut
= utext_openCharacterIterator(NULL
, ci
, &status
);
246 TEST_SUCCESS(status
);
247 TestAccess(sa
, ut
, cpCount
, cpMap
);
252 // Fragmented UnicodeString (Chunk size of one)
254 status
= U_ZERO_ERROR
;
255 ut
= openFragmentedUnicodeString(NULL
, &sa
, &status
);
256 TEST_SUCCESS(status
);
257 TestAccess(sa
, ut
, cpCount
, cpMap
);
264 // Convert the test string from UnicodeString to (char *) in utf-8 format
265 int32_t u8Len
= sa
.extract(0, sa
.length(), NULL
, 0, "utf-8");
266 char *u8String
= new char[u8Len
+ 1];
267 sa
.extract(0, sa
.length(), u8String
, u8Len
+1, "utf-8");
269 // Build up the map of code point indices in the utf-8 string
270 m
* u8Map
= new m
[sa
.length() + 1];
271 i
= 0; // native utf-8 index
272 for (j
=0; j
<cpCount
; j
++) { // code point number
273 u8Map
[j
].nativeIdx
= i
;
274 U8_NEXT(u8String
, i
, u8Len
, c
)
277 u8Map
[cpCount
].nativeIdx
= u8Len
; // position following the last char in utf-8 string.
279 // Do the test itself
280 status
= U_ZERO_ERROR
;
281 ut
= utext_openUTF8(NULL
, u8String
, -1, &status
);
282 TEST_SUCCESS(status
);
283 TestAccess(sa
, ut
, cpCount
, u8Map
);
293 // TestCMR test Copy, Move and Replace operations.
294 // us UnicodeString containing the test text.
295 // ut UText containing the same test text.
296 // cpCount number of code points in the test text.
297 // nativeMap Mapping from code points to native indexes for the UText.
298 // u16Map Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
300 // This function runs a whole series of opertions on each incoming UText.
301 // The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
303 void UTextTest::TestCMR(const UnicodeString
&us
, UText
*ut
, int cpCount
, m
*nativeMap
, m
*u16Map
) {
304 TEST_ASSERT(utext_isWritable(ut
) == TRUE
);
306 int srcLengthType
; // Loop variables for selecting the postion and length
307 int srcPosType
; // of the block to operate on within the source text.
310 int srcIndex
= 0; // Code Point indexes of the block to operate on for
311 int srcLength
= 0; // a specific test.
313 int destIndex
= 0; // Code point index of the destination for a copy/move test.
315 int32_t nativeStart
= 0; // Native unit indexes for a test.
316 int32_t nativeLimit
= 0;
317 int32_t nativeDest
= 0;
319 int32_t u16Start
= 0; // UTF-16 indexes for a test.
320 int32_t u16Limit
= 0; // used when performing the same operation in a Unicode String
323 // Iterate over a whole series of source index, length and a target indexes.
324 // This is done with code point indexes; these will be later translated to native
325 // indexes using the cpMap.
326 for (srcLengthType
=1; srcLengthType
<=3; srcLengthType
++) {
327 switch (srcLengthType
) {
328 case 1: srcLength
= 1; break;
329 case 2: srcLength
= 5; break;
330 case 3: srcLength
= cpCount
/ 3;
332 for (srcPosType
=1; srcPosType
<=5; srcPosType
++) {
333 switch (srcPosType
) {
334 case 1: srcIndex
= 0; break;
335 case 2: srcIndex
= 1; break;
336 case 3: srcIndex
= cpCount
- srcLength
; break;
337 case 4: srcIndex
= cpCount
- srcLength
- 1; break;
338 case 5: srcIndex
= cpCount
/ 2; break;
340 if (srcIndex
< 0 || srcIndex
+ srcLength
> cpCount
) {
341 // filter out bogus test cases -
342 // those with a source range that falls of an edge of the string.
347 // Copy and move tests.
348 // iterate over a variety of destination positions.
350 for (destPosType
=1; destPosType
<=4; destPosType
++) {
351 switch (destPosType
) {
352 case 1: destIndex
= 0; break;
353 case 2: destIndex
= 1; break;
354 case 3: destIndex
= srcIndex
- 1; break;
355 case 4: destIndex
= srcIndex
+ srcLength
+ 1; break;
356 case 5: destIndex
= cpCount
-1; break;
357 case 6: destIndex
= cpCount
; break;
359 if (destIndex
<0 || destIndex
>cpCount
) {
360 // filter out bogus test cases.
364 nativeStart
= nativeMap
[srcIndex
].nativeIdx
;
365 nativeLimit
= nativeMap
[srcIndex
+srcLength
].nativeIdx
;
366 nativeDest
= nativeMap
[destIndex
].nativeIdx
;
368 u16Start
= u16Map
[srcIndex
].nativeIdx
;
369 u16Limit
= u16Map
[srcIndex
+srcLength
].nativeIdx
;
370 u16Dest
= u16Map
[destIndex
].nativeIdx
;
373 TestCopyMove(us
, ut
, FALSE
,
374 nativeStart
, nativeLimit
, nativeDest
,
375 u16Start
, u16Limit
, u16Dest
);
377 TestCopyMove(us
, ut
, TRUE
,
378 nativeStart
, nativeLimit
, nativeDest
,
379 u16Start
, u16Limit
, u16Dest
);
389 UnicodeString
fullRepString("This is an arbitrary string that will be used as replacement text");
390 for (int32_t replStrLen
=0; replStrLen
<20; replStrLen
++) {
391 UnicodeString
repStr(fullRepString
, 0, replStrLen
);
393 nativeStart
, nativeLimit
,
407 // TestCopyMove run a single test case for utext_copy.
408 // Test cases are created in TestCMR and dispatched here for execution.
410 void UTextTest::TestCopyMove(const UnicodeString
&us
, UText
*ut
, UBool move
,
411 int32_t nativeStart
, int32_t nativeLimit
, int32_t nativeDest
,
412 int32_t u16Start
, int32_t u16Limit
, int32_t u16Dest
)
414 UErrorCode status
= U_ZERO_ERROR
;
415 UText
*targetUT
= NULL
;
420 // clone the UText. The test will be run in the cloned copy
421 // so that we don't alter the original.
423 targetUT
= utext_clone(NULL
, ut
, TRUE
, FALSE
, &status
);
424 TEST_SUCCESS(status
);
425 UnicodeString
targetUS(us
); // And copy the reference string.
427 // do the test operation first in the reference
428 targetUS
.copy(u16Start
, u16Limit
, u16Dest
);
430 // delete out the source range.
431 if (u16Limit
< u16Dest
) {
432 targetUS
.removeBetween(u16Start
, u16Limit
);
434 int32_t amtCopied
= u16Limit
- u16Start
;
435 targetUS
.removeBetween(u16Start
+amtCopied
, u16Limit
+amtCopied
);
439 // Do the same operation in the UText under test
440 utext_copy(targetUT
, nativeStart
, nativeLimit
, nativeDest
, move
, &status
);
441 if (nativeDest
> nativeStart
&& nativeDest
< nativeLimit
) {
442 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
444 TEST_SUCCESS(status
);
446 // Compare the results of the two parallel tests
447 int32_t usi
= 0; // UnicodeString postion, utf-16 index.
448 int64_t uti
= 0; // UText position, native index.
449 int32_t cpi
; // char32 position (code point index)
450 UChar32 usc
; // code point from Unicode String
451 UChar32 utc
; // code point from UText
452 utext_setNativeIndex(targetUT
, 0);
453 for (cpi
=0; ; cpi
++) {
454 usc
= targetUS
.char32At(usi
);
455 utc
= utext_next32(targetUT
);
459 TEST_ASSERT(uti
== usi
);
460 TEST_ASSERT(utc
== usc
);
461 usi
= targetUS
.moveIndex32(usi
, 1);
462 uti
= utext_getNativeIndex(targetUT
);
464 goto cleanupAndReturn
;
467 int64_t expectedNativeLength
= utext_nativeLength(ut
);
469 expectedNativeLength
+= nativeLimit
- nativeStart
;
471 uti
= utext_getNativeIndex(targetUT
);
472 TEST_ASSERT(uti
== expectedNativeLength
);
476 utext_close(targetUT
);
481 // TestReplace Test a single Replace operation.
483 void UTextTest::TestReplace(
484 const UnicodeString
&us
, // reference UnicodeString in which to do the replace
485 UText
*ut
, // UnicodeText object under test.
486 int32_t nativeStart
, // Range to be replaced, in UText native units.
488 int32_t u16Start
, // Range to be replaced, in UTF-16 units
489 int32_t u16Limit
, // for use in the reference UnicodeString.
490 const UnicodeString
&repStr
) // The replacement string
492 UErrorCode status
= U_ZERO_ERROR
;
493 UText
*targetUT
= NULL
;
498 // clone the target UText. The test will be run in the cloned copy
499 // so that we don't alter the original.
501 targetUT
= utext_clone(NULL
, ut
, TRUE
, FALSE
, &status
);
502 TEST_SUCCESS(status
);
503 UnicodeString
targetUS(us
); // And copy the reference string.
506 // Do the replace operation in the Unicode String, to
507 // produce a reference result.
509 targetUS
.replace(u16Start
, u16Limit
-u16Start
, repStr
);
512 // Do the replace on the UText under test
514 const UChar
*rs
= repStr
.getBuffer();
515 int32_t rsLen
= repStr
.length();
516 int32_t actualDelta
= utext_replace(targetUT
, nativeStart
, nativeLimit
, rs
, rsLen
, &status
);
517 int32_t expectedDelta
= repStr
.length() - (nativeLimit
- nativeStart
);
518 TEST_ASSERT(actualDelta
== expectedDelta
);
521 // Compare the results
523 int32_t usi
= 0; // UnicodeString postion, utf-16 index.
524 int64_t uti
= 0; // UText position, native index.
525 int32_t cpi
; // char32 position (code point index)
526 UChar32 usc
; // code point from Unicode String
527 UChar32 utc
; // code point from UText
528 int64_t expectedNativeLength
= 0;
529 utext_setNativeIndex(targetUT
, 0);
530 for (cpi
=0; ; cpi
++) {
531 usc
= targetUS
.char32At(usi
);
532 utc
= utext_next32(targetUT
);
536 TEST_ASSERT(uti
== usi
);
537 TEST_ASSERT(utc
== usc
);
538 usi
= targetUS
.moveIndex32(usi
, 1);
539 uti
= utext_getNativeIndex(targetUT
);
541 goto cleanupAndReturn
;
544 expectedNativeLength
= utext_nativeLength(ut
) + expectedDelta
;
545 uti
= utext_getNativeIndex(targetUT
);
546 TEST_ASSERT(uti
== expectedNativeLength
);
549 utext_close(targetUT
);
553 // TestAccess Test the read only access functions on a UText, including cloning.
554 // The text is accessed in a variety of ways, and compared with
555 // the reference UnicodeString.
557 void UTextTest::TestAccess(const UnicodeString
&us
, UText
*ut
, int cpCount
, m
*cpMap
) {
558 // Run the standard tests on the caller-supplied UText.
559 TestAccessNoClone(us
, ut
, cpCount
, cpMap
);
561 // Re-run tests on a shallow clone.
562 utext_setNativeIndex(ut
, 0);
563 UErrorCode status
= U_ZERO_ERROR
;
564 UText
*shallowClone
= utext_clone(NULL
, ut
, FALSE
/*deep*/, FALSE
/*readOnly*/, &status
);
565 TEST_SUCCESS(status
);
566 TestAccessNoClone(us
, shallowClone
, cpCount
, cpMap
);
569 // Rerun again on a deep clone.
570 // Note that text providers are not required to provide deep cloning,
571 // so unsupported errors are ignored.
573 status
= U_ZERO_ERROR
;
574 utext_setNativeIndex(shallowClone
, 0);
575 UText
*deepClone
= utext_clone(NULL
, shallowClone
, TRUE
, FALSE
, &status
);
576 utext_close(shallowClone
);
577 if (status
!= U_UNSUPPORTED_ERROR
) {
578 TEST_SUCCESS(status
);
579 TestAccessNoClone(us
, deepClone
, cpCount
, cpMap
);
581 utext_close(deepClone
);
586 // TestAccessNoClone() Test the read only access functions on a UText.
587 // The text is accessed in a variety of ways, and compared with
588 // the reference UnicodeString.
590 void UTextTest::TestAccessNoClone(const UnicodeString
&us
, UText
*ut
, int cpCount
, m
*cpMap
) {
591 UErrorCode status
= U_ZERO_ERROR
;
595 // Check the length from the UText
597 int64_t expectedLen
= cpMap
[cpCount
].nativeIdx
;
598 int64_t utlen
= utext_nativeLength(ut
);
599 TEST_ASSERT(expectedLen
== utlen
);
602 // Iterate forwards, verify that we get the correct code points
603 // at the correct native offsets.
607 int64_t expectedIndex
= 0;
608 int64_t foundIndex
= 0;
613 for (i
=0; i
<cpCount
; i
++) {
614 expectedIndex
= cpMap
[i
].nativeIdx
;
615 foundIndex
= utext_getNativeIndex(ut
);
616 TEST_ASSERT(expectedIndex
== foundIndex
);
617 expectedC
= cpMap
[i
].cp
;
618 foundC
= utext_next32(ut
);
619 TEST_ASSERT(expectedC
== foundC
);
620 foundIndex
= utext_getPreviousNativeIndex(ut
);
621 TEST_ASSERT(expectedIndex
== foundIndex
);
626 foundC
= utext_next32(ut
);
627 TEST_ASSERT(foundC
== U_SENTINEL
);
629 // Repeat above, using macros
630 utext_setNativeIndex(ut
, 0);
631 for (i
=0; i
<cpCount
; i
++) {
632 expectedIndex
= cpMap
[i
].nativeIdx
;
633 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
634 TEST_ASSERT(expectedIndex
== foundIndex
);
635 expectedC
= cpMap
[i
].cp
;
636 foundC
= UTEXT_NEXT32(ut
);
637 TEST_ASSERT(expectedC
== foundC
);
642 foundC
= UTEXT_NEXT32(ut
);
643 TEST_ASSERT(foundC
== U_SENTINEL
);
646 // Forward iteration (above) should have left index at the
647 // end of the input, which should == length().
649 len
= utext_nativeLength(ut
);
650 foundIndex
= utext_getNativeIndex(ut
);
651 TEST_ASSERT(len
== foundIndex
);
654 // Iterate backwards over entire test string
656 len
= utext_getNativeIndex(ut
);
657 utext_setNativeIndex(ut
, len
);
658 for (i
=cpCount
-1; i
>=0; i
--) {
659 expectedC
= cpMap
[i
].cp
;
660 expectedIndex
= cpMap
[i
].nativeIdx
;
661 int64_t prevIndex
= utext_getPreviousNativeIndex(ut
);
662 foundC
= utext_previous32(ut
);
663 foundIndex
= utext_getNativeIndex(ut
);
664 TEST_ASSERT(expectedIndex
== foundIndex
);
665 TEST_ASSERT(expectedC
== foundC
);
666 TEST_ASSERT(prevIndex
== foundIndex
);
673 // Backwards iteration, above, should have left our iterator
674 // position at zero, and continued backwards iterationshould fail.
676 foundIndex
= utext_getNativeIndex(ut
);
677 TEST_ASSERT(foundIndex
== 0);
678 foundIndex
= utext_getPreviousNativeIndex(ut
);
679 TEST_ASSERT(foundIndex
== 0);
682 foundC
= utext_previous32(ut
);
683 TEST_ASSERT(foundC
== U_SENTINEL
);
684 foundIndex
= utext_getNativeIndex(ut
);
685 TEST_ASSERT(foundIndex
== 0);
686 foundIndex
= utext_getPreviousNativeIndex(ut
);
687 TEST_ASSERT(foundIndex
== 0);
690 // And again, with the macros
691 utext_setNativeIndex(ut
, len
);
692 for (i
=cpCount
-1; i
>=0; i
--) {
693 expectedC
= cpMap
[i
].cp
;
694 expectedIndex
= cpMap
[i
].nativeIdx
;
695 foundC
= UTEXT_PREVIOUS32(ut
);
696 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
697 TEST_ASSERT(expectedIndex
== foundIndex
);
698 TEST_ASSERT(expectedC
== foundC
);
705 // Backwards iteration, above, should have left our iterator
706 // position at zero, and continued backwards iterationshould fail.
708 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
709 TEST_ASSERT(foundIndex
== 0);
711 foundC
= UTEXT_PREVIOUS32(ut
);
712 TEST_ASSERT(foundC
== U_SENTINEL
);
713 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
714 TEST_ASSERT(foundIndex
== 0);
720 // next32From(), prevous32From(), Iterate in a somewhat random order.
723 for (i
=0; i
<cpCount
; i
++) {
724 cpIndex
= (cpIndex
+ 9973) % cpCount
;
725 index
= cpMap
[cpIndex
].nativeIdx
;
726 expectedC
= cpMap
[cpIndex
].cp
;
727 foundC
= utext_next32From(ut
, index
);
728 TEST_ASSERT(expectedC
== foundC
);
735 for (i
=0; i
<cpCount
; i
++) {
736 cpIndex
= (cpIndex
+ 9973) % cpCount
;
737 index
= cpMap
[cpIndex
+1].nativeIdx
;
738 expectedC
= cpMap
[cpIndex
].cp
;
739 foundC
= utext_previous32From(ut
, index
);
740 TEST_ASSERT(expectedC
== foundC
);
748 // moveIndex(int32_t delta);
751 // Walk through frontwards, incrementing by one
752 utext_setNativeIndex(ut
, 0);
753 for (i
=1; i
<=cpCount
; i
++) {
754 utext_moveIndex32(ut
, 1);
755 index
= utext_getNativeIndex(ut
);
756 expectedIndex
= cpMap
[i
].nativeIdx
;
757 TEST_ASSERT(expectedIndex
== index
);
758 index
= UTEXT_GETNATIVEINDEX(ut
);
759 TEST_ASSERT(expectedIndex
== index
);
762 // Walk through frontwards, incrementing by two
763 utext_setNativeIndex(ut
, 0);
764 for (i
=2; i
<cpCount
; i
+=2) {
765 utext_moveIndex32(ut
, 2);
766 index
= utext_getNativeIndex(ut
);
767 expectedIndex
= cpMap
[i
].nativeIdx
;
768 TEST_ASSERT(expectedIndex
== index
);
769 index
= UTEXT_GETNATIVEINDEX(ut
);
770 TEST_ASSERT(expectedIndex
== index
);
773 // walk through the string backwards, decrementing by one.
774 i
= cpMap
[cpCount
].nativeIdx
;
775 utext_setNativeIndex(ut
, i
);
776 for (i
=cpCount
; i
>=0; i
--) {
777 expectedIndex
= cpMap
[i
].nativeIdx
;
778 index
= utext_getNativeIndex(ut
);
779 TEST_ASSERT(expectedIndex
== index
);
780 index
= UTEXT_GETNATIVEINDEX(ut
);
781 TEST_ASSERT(expectedIndex
== index
);
782 utext_moveIndex32(ut
, -1);
786 // walk through backwards, decrementing by three
787 i
= cpMap
[cpCount
].nativeIdx
;
788 utext_setNativeIndex(ut
, i
);
789 for (i
=cpCount
; i
>=0; i
-=3) {
790 expectedIndex
= cpMap
[i
].nativeIdx
;
791 index
= utext_getNativeIndex(ut
);
792 TEST_ASSERT(expectedIndex
== index
);
793 index
= UTEXT_GETNATIVEINDEX(ut
);
794 TEST_ASSERT(expectedIndex
== index
);
795 utext_moveIndex32(ut
, -3);
802 int bufSize
= us
.length() + 10;
803 UChar
*buf
= new UChar
[bufSize
];
804 status
= U_ZERO_ERROR
;
805 expectedLen
= us
.length();
806 len
= utext_extract(ut
, 0, utlen
, buf
, bufSize
, &status
);
807 TEST_SUCCESS(status
);
808 TEST_ASSERT(len
== expectedLen
);
809 int compareResult
= us
.compare(buf
, -1);
810 TEST_ASSERT(compareResult
== 0);
812 status
= U_ZERO_ERROR
;
813 len
= utext_extract(ut
, 0, utlen
, NULL
, 0, &status
);
815 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
817 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
819 TEST_ASSERT(len
== expectedLen
);
821 status
= U_ZERO_ERROR
;
822 u_memset(buf
, 0x5555, bufSize
);
823 len
= utext_extract(ut
, 0, utlen
, buf
, 1, &status
);
824 if (us
.length() == 0) {
825 TEST_SUCCESS(status
);
826 TEST_ASSERT(buf
[0] == 0);
828 // Buf len == 1, extracting a single 16 bit value.
829 // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
830 // or whether the lead surrogate of the pair is extracted.
831 // It's a buffer overflow error in either case.
832 TEST_ASSERT(buf
[0] == us
.charAt(0) ||
833 (buf
[0] == 0x5555 && U_IS_SUPPLEMENTARY(us
.char32At(0))));
834 TEST_ASSERT(buf
[1] == 0x5555);
835 if (us
.length() == 1) {
836 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
838 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
846 // ErrorTest() Check various error and edge cases.
848 void UTextTest::ErrorTest()
850 // Close of an unitialized UText. Shouldn't blow up.
853 memset(&ut
, 0, sizeof(UText
));
858 // Double-close of a UText. Shouldn't blow up. UText should still be usable.
860 UErrorCode status
= U_ZERO_ERROR
;
861 UText ut
= UTEXT_INITIALIZER
;
862 UnicodeString
s("Hello, World");
863 UText
*ut2
= utext_openUnicodeString(&ut
, &s
, &status
);
864 TEST_SUCCESS(status
);
865 TEST_ASSERT(ut2
== &ut
);
867 UText
*ut3
= utext_close(&ut
);
868 TEST_ASSERT(ut3
== &ut
);
870 UText
*ut4
= utext_close(&ut
);
871 TEST_ASSERT(ut4
== &ut
);
873 utext_openUnicodeString(&ut
, &s
, &status
);
874 TEST_SUCCESS(status
);
878 // Re-use of a UText, chaining through each of the types of UText
879 // (If it doesn't blow up, and doesn't leak, it's probably working fine)
881 UErrorCode status
= U_ZERO_ERROR
;
882 UText ut
= UTEXT_INITIALIZER
;
884 UnicodeString
s1("Hello, World");
885 UChar s2
[] = {(UChar
)0x41, (UChar
)0x42, (UChar
)0};
886 const char *s3
= "\x66\x67\x68";
888 utp
= utext_openUnicodeString(&ut
, &s1
, &status
);
889 TEST_SUCCESS(status
);
890 TEST_ASSERT(utp
== &ut
);
892 utp
= utext_openConstUnicodeString(&ut
, &s1
, &status
);
893 TEST_SUCCESS(status
);
894 TEST_ASSERT(utp
== &ut
);
896 utp
= utext_openUTF8(&ut
, s3
, -1, &status
);
897 TEST_SUCCESS(status
);
898 TEST_ASSERT(utp
== &ut
);
900 utp
= utext_openUChars(&ut
, s2
, -1, &status
);
901 TEST_SUCCESS(status
);
902 TEST_ASSERT(utp
== &ut
);
904 utp
= utext_close(&ut
);
905 TEST_ASSERT(utp
== &ut
);
907 utp
= utext_openUnicodeString(&ut
, &s1
, &status
);
908 TEST_SUCCESS(status
);
909 TEST_ASSERT(utp
== &ut
);
912 // Invalid parameters on open
915 UErrorCode status
= U_ZERO_ERROR
;
916 UText ut
= UTEXT_INITIALIZER
;
918 utext_openUChars(&ut
, NULL
, 5, &status
);
919 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
921 status
= U_ZERO_ERROR
;
922 utext_openUChars(&ut
, NULL
, -1, &status
);
923 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
925 status
= U_ZERO_ERROR
;
926 utext_openUTF8(&ut
, NULL
, 4, &status
);
927 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
929 status
= U_ZERO_ERROR
;
930 utext_openUTF8(&ut
, NULL
, -1, &status
);
931 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
935 // UTF-8 with malformed sequences.
936 // These should come through as the Unicode replacement char, \ufffd
939 UErrorCode status
= U_ZERO_ERROR
;
941 const char *badUTF8
= "\x41\x81\x42\xf0\x81\x81\x43";
944 ut
= utext_openUTF8(NULL
, badUTF8
, -1, &status
);
945 TEST_SUCCESS(status
);
946 c
= utext_char32At(ut
, 1);
947 TEST_ASSERT(c
== 0xfffd);
948 c
= utext_char32At(ut
, 3);
949 TEST_ASSERT(c
== 0xfffd);
950 c
= utext_char32At(ut
, 5);
951 TEST_ASSERT(c
== 0xfffd);
952 c
= utext_char32At(ut
, 6);
953 TEST_ASSERT(c
== 0x43);
956 int n
= utext_extract(ut
, 0, 9, buf
, 10, &status
);
957 TEST_SUCCESS(status
);
959 TEST_ASSERT(buf
[1] == 0xfffd);
960 TEST_ASSERT(buf
[3] == 0xfffd);
961 TEST_ASSERT(buf
[2] == 0x42);
967 // isLengthExpensive - does it make the exptected transitions after
968 // getting the length of a nul terminated string?
971 UErrorCode status
= U_ZERO_ERROR
;
972 UnicodeString
sa("Hello, this is a string");
976 memset(sb
, 0x20, sizeof(sb
));
979 UText
*uta
= utext_openUnicodeString(NULL
, &sa
, &status
);
980 TEST_SUCCESS(status
);
981 isExpensive
= utext_isLengthExpensive(uta
);
982 TEST_ASSERT(isExpensive
== FALSE
);
985 UText
*utb
= utext_openUChars(NULL
, sb
, -1, &status
);
986 TEST_SUCCESS(status
);
987 isExpensive
= utext_isLengthExpensive(utb
);
988 TEST_ASSERT(isExpensive
== TRUE
);
989 int64_t len
= utext_nativeLength(utb
);
990 TEST_ASSERT(len
== 99);
991 isExpensive
= utext_isLengthExpensive(utb
);
992 TEST_ASSERT(isExpensive
== FALSE
);
997 // Index to positions not on code point boundaries.
1000 const char *u8str
= "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
1001 int32_t startMap
[] = { 0, 0, 2, 2, 2, 5, 5, 5, 5, 9, 9};
1002 int32_t nextMap
[] = { 2, 2, 5, 5, 5, 9, 9, 9, 9, 9, 9};
1003 int32_t prevMap
[] = { 0, 0, 0, 0, 0, 2, 2, 2, 2, 5, 5};
1004 UChar32 c32Map
[] = {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
1005 UChar32 pr32Map
[] = { -1, -1, 0x201, 0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146};
1007 // extractLen is the size, in UChars, of what will be extracted between index and index+1.
1008 // is zero when both index positions lie within the same code point.
1009 int32_t exLen
[] = { 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0};
1012 UErrorCode status
= U_ZERO_ERROR
;
1013 UText
*ut
= utext_openUTF8(NULL
, u8str
, -1, &status
);
1014 TEST_SUCCESS(status
);
1018 int32_t startMapLimit
= UPRV_LENGTHOF(startMap
);
1019 for (i
=0; i
<startMapLimit
; i
++) {
1020 utext_setNativeIndex(ut
, i
);
1021 int64_t cpIndex
= utext_getNativeIndex(ut
);
1022 TEST_ASSERT(cpIndex
== startMap
[i
]);
1023 cpIndex
= UTEXT_GETNATIVEINDEX(ut
);
1024 TEST_ASSERT(cpIndex
== startMap
[i
]);
1028 for (i
=0; i
<startMapLimit
; i
++) {
1029 UChar32 c32
= utext_char32At(ut
, i
);
1030 TEST_ASSERT(c32
== c32Map
[i
]);
1031 int64_t cpIndex
= utext_getNativeIndex(ut
);
1032 TEST_ASSERT(cpIndex
== startMap
[i
]);
1035 // Check utext_next32From
1036 for (i
=0; i
<startMapLimit
; i
++) {
1037 UChar32 c32
= utext_next32From(ut
, i
);
1038 TEST_ASSERT(c32
== c32Map
[i
]);
1039 int64_t cpIndex
= utext_getNativeIndex(ut
);
1040 TEST_ASSERT(cpIndex
== nextMap
[i
]);
1043 // check utext_previous32From
1044 for (i
=0; i
<startMapLimit
; i
++) {
1046 UChar32 c32
= utext_previous32From(ut
, i
);
1047 TEST_ASSERT(c32
== pr32Map
[i
]);
1048 int64_t cpIndex
= utext_getNativeIndex(ut
);
1049 TEST_ASSERT(cpIndex
== prevMap
[i
]);
1053 // Extract from i to i+1, which may be zero or one code points,
1054 // depending on whether the indices straddle a cp boundary.
1055 for (i
=0; i
<startMapLimit
; i
++) {
1057 status
= U_ZERO_ERROR
;
1058 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
1059 TEST_SUCCESS(status
);
1060 TEST_ASSERT(extractedLen
== exLen
[i
]);
1061 if (extractedLen
> 0) {
1063 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1064 U16_GET(buf
, 0, extractedLen
-extractedLen
, extractedLen
, c32
);
1065 TEST_ASSERT(c32
== c32Map
[i
]);
1073 { // Similar test, with utf16 instead of utf8
1074 // TODO: merge the common parts of these tests.
1076 UnicodeString
u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV
);
1077 int32_t startMap
[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
1078 int32_t nextMap
[] = { 1, 3, 3, 4, 6, 6, 6, 6};
1079 int32_t prevMap
[] = { 0, 0, 0, 1, 3, 3, 4, 4};
1080 UChar32 c32Map
[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
1081 UChar32 pr32Map
[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
1082 int32_t exLen
[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
1084 u16str
= u16str
.unescape();
1085 UErrorCode status
= U_ZERO_ERROR
;
1086 UText
*ut
= utext_openUnicodeString(NULL
, &u16str
, &status
);
1087 TEST_SUCCESS(status
);
1089 int32_t startMapLimit
= UPRV_LENGTHOF(startMap
);
1091 for (i
=0; i
<startMapLimit
; i
++) {
1092 utext_setNativeIndex(ut
, i
);
1093 int64_t cpIndex
= utext_getNativeIndex(ut
);
1094 TEST_ASSERT(cpIndex
== startMap
[i
]);
1098 for (i
=0; i
<startMapLimit
; i
++) {
1099 UChar32 c32
= utext_char32At(ut
, i
);
1100 TEST_ASSERT(c32
== c32Map
[i
]);
1101 int64_t cpIndex
= utext_getNativeIndex(ut
);
1102 TEST_ASSERT(cpIndex
== startMap
[i
]);
1105 // Check utext_next32From
1106 for (i
=0; i
<startMapLimit
; i
++) {
1107 UChar32 c32
= utext_next32From(ut
, i
);
1108 TEST_ASSERT(c32
== c32Map
[i
]);
1109 int64_t cpIndex
= utext_getNativeIndex(ut
);
1110 TEST_ASSERT(cpIndex
== nextMap
[i
]);
1113 // check utext_previous32From
1114 for (i
=0; i
<startMapLimit
; i
++) {
1115 UChar32 c32
= utext_previous32From(ut
, i
);
1116 TEST_ASSERT(c32
== pr32Map
[i
]);
1117 int64_t cpIndex
= utext_getNativeIndex(ut
);
1118 TEST_ASSERT(cpIndex
== prevMap
[i
]);
1122 // Extract from i to i+1, which may be zero or one code points,
1123 // depending on whether the indices straddle a cp boundary.
1124 for (i
=0; i
<startMapLimit
; i
++) {
1126 status
= U_ZERO_ERROR
;
1127 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
1128 TEST_SUCCESS(status
);
1129 TEST_ASSERT(extractedLen
== exLen
[i
]);
1130 if (extractedLen
> 0) {
1132 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1133 U16_GET(buf
, 0, extractedLen
-extractedLen
, extractedLen
, c32
);
1134 TEST_ASSERT(c32
== c32Map
[i
]);
1141 { // Similar test, with UText over Replaceable
1142 // TODO: merge the common parts of these tests.
1144 UnicodeString
u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV
);
1145 int32_t startMap
[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
1146 int32_t nextMap
[] = { 1, 3, 3, 4, 6, 6, 6, 6};
1147 int32_t prevMap
[] = { 0, 0, 0, 1, 3, 3, 4, 4};
1148 UChar32 c32Map
[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
1149 UChar32 pr32Map
[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
1150 int32_t exLen
[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
1152 u16str
= u16str
.unescape();
1153 UErrorCode status
= U_ZERO_ERROR
;
1154 UText
*ut
= utext_openReplaceable(NULL
, &u16str
, &status
);
1155 TEST_SUCCESS(status
);
1157 int32_t startMapLimit
= UPRV_LENGTHOF(startMap
);
1159 for (i
=0; i
<startMapLimit
; i
++) {
1160 utext_setNativeIndex(ut
, i
);
1161 int64_t cpIndex
= utext_getNativeIndex(ut
);
1162 TEST_ASSERT(cpIndex
== startMap
[i
]);
1166 for (i
=0; i
<startMapLimit
; i
++) {
1167 UChar32 c32
= utext_char32At(ut
, i
);
1168 TEST_ASSERT(c32
== c32Map
[i
]);
1169 int64_t cpIndex
= utext_getNativeIndex(ut
);
1170 TEST_ASSERT(cpIndex
== startMap
[i
]);
1173 // Check utext_next32From
1174 for (i
=0; i
<startMapLimit
; i
++) {
1175 UChar32 c32
= utext_next32From(ut
, i
);
1176 TEST_ASSERT(c32
== c32Map
[i
]);
1177 int64_t cpIndex
= utext_getNativeIndex(ut
);
1178 TEST_ASSERT(cpIndex
== nextMap
[i
]);
1181 // check utext_previous32From
1182 for (i
=0; i
<startMapLimit
; i
++) {
1183 UChar32 c32
= utext_previous32From(ut
, i
);
1184 TEST_ASSERT(c32
== pr32Map
[i
]);
1185 int64_t cpIndex
= utext_getNativeIndex(ut
);
1186 TEST_ASSERT(cpIndex
== prevMap
[i
]);
1190 // Extract from i to i+1, which may be zero or one code points,
1191 // depending on whether the indices straddle a cp boundary.
1192 for (i
=0; i
<startMapLimit
; i
++) {
1194 status
= U_ZERO_ERROR
;
1195 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
1196 TEST_SUCCESS(status
);
1197 TEST_ASSERT(extractedLen
== exLen
[i
]);
1198 if (extractedLen
> 0) {
1200 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1201 U16_GET(buf
, 0, extractedLen
-extractedLen
, extractedLen
, c32
);
1202 TEST_ASSERT(c32
== c32Map
[i
]);
1211 void UTextTest::FreezeTest() {
1212 // Check isWritable() and freeze() behavior.
1215 UnicodeString
ustr("Hello, World.");
1216 const char u8str
[] = {char(0x31), (char)0x32, (char)0x33, 0};
1217 const UChar u16str
[] = {(UChar
)0x31, (UChar
)0x32, (UChar
)0x44, 0};
1219 UErrorCode status
= U_ZERO_ERROR
;
1223 ut
= utext_openUTF8(ut
, u8str
, -1, &status
);
1224 TEST_SUCCESS(status
);
1225 UBool writable
= utext_isWritable(ut
);
1226 TEST_ASSERT(writable
== FALSE
);
1227 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1228 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1230 status
= U_ZERO_ERROR
;
1231 ut
= utext_openUChars(ut
, u16str
, -1, &status
);
1232 TEST_SUCCESS(status
);
1233 writable
= utext_isWritable(ut
);
1234 TEST_ASSERT(writable
== FALSE
);
1235 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1236 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1238 status
= U_ZERO_ERROR
;
1239 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1240 TEST_SUCCESS(status
);
1241 writable
= utext_isWritable(ut
);
1242 TEST_ASSERT(writable
== TRUE
);
1244 writable
= utext_isWritable(ut
);
1245 TEST_ASSERT(writable
== FALSE
);
1246 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1247 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1249 status
= U_ZERO_ERROR
;
1250 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1251 TEST_SUCCESS(status
);
1252 ut2
= utext_clone(ut2
, ut
, FALSE
, FALSE
, &status
); // clone with readonly = false
1253 TEST_SUCCESS(status
);
1254 writable
= utext_isWritable(ut2
);
1255 TEST_ASSERT(writable
== TRUE
);
1256 ut2
= utext_clone(ut2
, ut
, FALSE
, TRUE
, &status
); // clone with readonly = true
1257 TEST_SUCCESS(status
);
1258 writable
= utext_isWritable(ut2
);
1259 TEST_ASSERT(writable
== FALSE
);
1260 utext_copy(ut2
, 1, 2, 0, TRUE
, &status
);
1261 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1263 status
= U_ZERO_ERROR
;
1264 ut
= utext_openConstUnicodeString(ut
, (const UnicodeString
*)&ustr
, &status
);
1265 TEST_SUCCESS(status
);
1266 writable
= utext_isWritable(ut
);
1267 TEST_ASSERT(writable
== FALSE
);
1268 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1269 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1271 // Deep Clone of a frozen UText should re-enable writing in the copy.
1272 status
= U_ZERO_ERROR
;
1273 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1274 TEST_SUCCESS(status
);
1276 ut2
= utext_clone(ut2
, ut
, TRUE
, FALSE
, &status
); // deep clone
1277 TEST_SUCCESS(status
);
1278 writable
= utext_isWritable(ut2
);
1279 TEST_ASSERT(writable
== TRUE
);
1282 // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
1283 // should NOT enable writing in the copy.
1284 status
= U_ZERO_ERROR
;
1285 ut
= utext_openUChars(ut
, u16str
, -1, &status
);
1286 TEST_SUCCESS(status
);
1288 ut2
= utext_clone(ut2
, ut
, TRUE
, FALSE
, &status
); // deep clone
1289 TEST_SUCCESS(status
);
1290 writable
= utext_isWritable(ut2
);
1291 TEST_ASSERT(writable
== FALSE
);
1301 // A UText type that works with a chunk size of 1.
1302 // Intended to test for edge cases.
1303 // Input comes from a UnicodeString.
1305 // ut.b the character. Put into both halves.
1309 static UBool U_CALLCONV
1310 fragTextAccess(UText
*ut
, int64_t index
, UBool forward
) {
1311 const UnicodeString
*us
= (const UnicodeString
*)ut
->context
;
1313 int32_t length
= us
->length();
1314 if (forward
&& index
>=0 && index
<length
) {
1315 c
= us
->charAt((int32_t)index
);
1317 ut
->chunkOffset
= 0;
1318 ut
->chunkLength
= 1;
1319 ut
->chunkNativeStart
= index
;
1320 ut
->chunkNativeLimit
= index
+1;
1323 if (!forward
&& index
>0 && index
<=length
) {
1324 c
= us
->charAt((int32_t)index
-1);
1326 ut
->chunkOffset
= 1;
1327 ut
->chunkLength
= 1;
1328 ut
->chunkNativeStart
= index
-1;
1329 ut
->chunkNativeLimit
= index
;
1333 ut
->chunkOffset
= 0;
1334 ut
->chunkLength
= 0;
1336 ut
->chunkNativeStart
= 0;
1337 ut
->chunkNativeLimit
= 0;
1339 ut
->chunkNativeStart
= length
;
1340 ut
->chunkNativeLimit
= length
;
1345 // Function table to be used with this fragmented text provider.
1346 // Initialized in the open function.
1347 static UTextFuncs fragmentFuncs
;
1349 // Clone function for fragmented text provider.
1350 // Didn't really want to provide this, but it's easier to provide it than to keep it
1351 // out of the tests.
1354 cloneFragmentedUnicodeString(UText
*dest
, const UText
*src
, UBool deep
, UErrorCode
*status
) {
1355 if (U_FAILURE(*status
)) {
1359 *status
= U_UNSUPPORTED_ERROR
;
1362 dest
= utext_openUnicodeString(dest
, (UnicodeString
*)src
->context
, status
);
1363 utext_setNativeIndex(dest
, utext_getNativeIndex(src
));
1369 // Open function for the fragmented text provider.
1371 openFragmentedUnicodeString(UText
*ut
, UnicodeString
*s
, UErrorCode
*status
) {
1372 ut
= utext_openUnicodeString(ut
, s
, status
);
1373 if (U_FAILURE(*status
)) {
1377 // Copy of the function table from the stock UnicodeString UText,
1378 // and replace the entry for the access function.
1379 memcpy(&fragmentFuncs
, ut
->pFuncs
, sizeof(fragmentFuncs
));
1380 fragmentFuncs
.access
= fragTextAccess
;
1381 fragmentFuncs
.clone
= cloneFragmentedUnicodeString
;
1382 ut
->pFuncs
= &fragmentFuncs
;
1384 ut
->chunkContents
= (UChar
*)&ut
->b
;
1385 ut
->pFuncs
->access(ut
, 0, TRUE
);
1389 // Regression test for Ticket 5560
1390 // Clone fails to update chunkContentPointer in the cloned copy.
1391 // This is only an issue for UText types that work in a local buffer,
1392 // (UTF-8 wrapper, for example)
1395 // 1. Create an inital UText
1396 // 2. Deep clone it. Contents should match original.
1397 // 3. Reset original to something different.
1398 // 4. Check that clone contents did not change.
1400 void UTextTest::Ticket5560() {
1401 /* The following two strings are in UTF-8 even on EBCDIC platforms. */
1402 static const char s1
[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
1403 static const char s2
[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
1404 UErrorCode status
= U_ZERO_ERROR
;
1406 UText ut1
= UTEXT_INITIALIZER
;
1407 UText ut2
= UTEXT_INITIALIZER
;
1409 utext_openUTF8(&ut1
, s1
, -1, &status
);
1410 UChar c
= utext_next32(&ut1
);
1411 TEST_ASSERT(c
== 0x41); // c == 'A'
1413 utext_clone(&ut2
, &ut1
, TRUE
, FALSE
, &status
);
1414 TEST_SUCCESS(status
);
1415 c
= utext_next32(&ut2
);
1416 TEST_ASSERT(c
== 0x42); // c == 'B'
1417 c
= utext_next32(&ut1
);
1418 TEST_ASSERT(c
== 0x42); // c == 'B'
1420 utext_openUTF8(&ut1
, s2
, -1, &status
);
1421 c
= utext_next32(&ut1
);
1422 TEST_ASSERT(c
== 0x31); // c == '1'
1423 c
= utext_next32(&ut2
);
1424 TEST_ASSERT(c
== 0x43); // c == 'C'
1431 // Test for Ticket 6847
1433 void UTextTest::Ticket6847() {
1434 const int STRLEN
= 90;
1436 u_memset(s
, 0x41, STRLEN
);
1439 UErrorCode status
= U_ZERO_ERROR
;
1440 UText
*ut
= utext_openUChars(NULL
, s
, -1, &status
);
1442 utext_setNativeIndex(ut
, 0);
1445 int64_t nativeIndex
= UTEXT_GETNATIVEINDEX(ut
);
1446 TEST_ASSERT(nativeIndex
== 0);
1447 while ((c
= utext_next32(ut
)) != U_SENTINEL
) {
1448 TEST_ASSERT(c
== 0x41);
1449 TEST_ASSERT(count
< STRLEN
);
1450 if (count
>= STRLEN
) {
1454 nativeIndex
= UTEXT_GETNATIVEINDEX(ut
);
1455 TEST_ASSERT(nativeIndex
== count
);
1457 TEST_ASSERT(count
== STRLEN
);
1458 nativeIndex
= UTEXT_GETNATIVEINDEX(ut
);
1459 TEST_ASSERT(nativeIndex
== STRLEN
);
1464 void UTextTest::Ticket10562() {
1465 // Note: failures show as a heap error when the test is run under valgrind.
1466 UErrorCode status
= U_ZERO_ERROR
;
1468 const char *utf8_string
= "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
1469 UText
*utf8Text
= utext_openUTF8(NULL
, utf8_string
, -1, &status
);
1470 TEST_SUCCESS(status
);
1471 UText
*deepClone
= utext_clone(NULL
, utf8Text
, TRUE
, FALSE
, &status
);
1472 TEST_SUCCESS(status
);
1473 UText
*shallowClone
= utext_clone(NULL
, deepClone
, FALSE
, FALSE
, &status
);
1474 TEST_SUCCESS(status
);
1475 utext_close(shallowClone
);
1476 utext_close(deepClone
);
1477 utext_close(utf8Text
);
1479 status
= U_ZERO_ERROR
;
1480 UnicodeString
usString("Hello, World.");
1481 UText
*usText
= utext_openUnicodeString(NULL
, &usString
, &status
);
1482 TEST_SUCCESS(status
);
1483 UText
*usDeepClone
= utext_clone(NULL
, usText
, TRUE
, FALSE
, &status
);
1484 TEST_SUCCESS(status
);
1485 UText
*usShallowClone
= utext_clone(NULL
, usDeepClone
, FALSE
, FALSE
, &status
);
1486 TEST_SUCCESS(status
);
1487 utext_close(usShallowClone
);
1488 utext_close(usDeepClone
);
1489 utext_close(usText
);
1493 void UTextTest::Ticket10983() {
1494 // Note: failure shows as a seg fault when the defect is present.
1496 UErrorCode status
= U_ZERO_ERROR
;
1497 UnicodeString
s("Hello, World");
1498 UText
*ut
= utext_openConstUnicodeString(NULL
, &s
, &status
);
1499 TEST_SUCCESS(status
);
1501 status
= U_INVALID_STATE_ERROR
;
1502 UText
*cloned
= utext_clone(NULL
, ut
, TRUE
, TRUE
, &status
);
1503 TEST_ASSERT(cloned
== NULL
);
1504 TEST_ASSERT(status
== U_INVALID_STATE_ERROR
);
1509 // Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
1510 // leaves the iteration position set incorrectly when the
1511 // actual string length is not yet known.
1513 // The test text needs to be long enough that UText defers getting the length.
1515 void UTextTest::Ticket12130() {
1516 UErrorCode status
= U_ZERO_ERROR
;
1519 "Fundamentally, computers just deal with numbers. They store letters and other characters "
1520 "by assigning a number for each one. Before Unicode was invented, there were hundreds "
1521 "of different encoding systems for assigning these numbers. No single encoding could "
1522 "contain enough characters: for example, the European Union alone requires several "
1523 "different encodings to cover all its languages. Even for a single language like "
1524 "English no single encoding was adequate for all the letters, punctuation, and technical "
1525 "symbols in common use.";
1527 UnicodeString
str(text8
);
1528 const UChar
*ustr
= str
.getTerminatedBuffer();
1529 UText ut
= UTEXT_INITIALIZER
;
1530 utext_openUChars(&ut
, ustr
, -1, &status
);
1531 UChar extractBuffer
[50];
1533 for (int32_t startIdx
= 0; startIdx
<str
.length(); ++startIdx
) {
1534 int32_t endIdx
= startIdx
+ 20;
1536 u_memset(extractBuffer
, 0, UPRV_LENGTHOF(extractBuffer
));
1537 utext_extract(&ut
, startIdx
, endIdx
, extractBuffer
, UPRV_LENGTHOF(extractBuffer
), &status
);
1538 if (U_FAILURE(status
)) {
1539 errln("%s:%d %s", __FILE__
, __LINE__
, u_errorName(status
));
1542 int64_t ni
= utext_getNativeIndex(&ut
);
1543 int64_t expectedni
= startIdx
+ 20;
1544 if (expectedni
> str
.length()) {
1545 expectedni
= str
.length();
1547 if (expectedni
!= ni
) {
1548 errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__
, __LINE__
, expectedni
, ni
);
1550 if (0 != str
.tempSubString(startIdx
, 20).compare(extractBuffer
)) {
1551 errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1552 __FILE__
, __LINE__
, CStr(str
.tempSubString(startIdx
, 20))(), CStr(UnicodeString(extractBuffer
))());
1557 // Similar utext extract, this time with the string length provided to the UText in advance,
1558 // and a buffer of larger than required capacity.
1560 utext_openUChars(&ut
, ustr
, str
.length(), &status
);
1561 for (int32_t startIdx
= 0; startIdx
<str
.length(); ++startIdx
) {
1562 int32_t endIdx
= startIdx
+ 20;
1563 u_memset(extractBuffer
, 0, UPRV_LENGTHOF(extractBuffer
));
1564 utext_extract(&ut
, startIdx
, endIdx
, extractBuffer
, UPRV_LENGTHOF(extractBuffer
), &status
);
1565 if (U_FAILURE(status
)) {
1566 errln("%s:%d %s", __FILE__
, __LINE__
, u_errorName(status
));
1569 int64_t ni
= utext_getNativeIndex(&ut
);
1570 int64_t expectedni
= startIdx
+ 20;
1571 if (expectedni
> str
.length()) {
1572 expectedni
= str
.length();
1574 if (expectedni
!= ni
) {
1575 errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__
, __LINE__
, expectedni
, ni
);
1577 if (0 != str
.tempSubString(startIdx
, 20).compare(extractBuffer
)) {
1578 errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1579 __FILE__
, __LINE__
, CStr(str
.tempSubString(startIdx
, 20))(), CStr(UnicodeString(extractBuffer
))());