]>
git.saurik.com Git - apple/icu.git/blob - icuSources/samples/uciter8/uciter8.c
2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003jan10
14 * created by: Markus W. Scherer
16 * This file contains sample code that illustrates reading
17 * 8-bit Unicode text leniently, accepting a mix of UTF-8 and CESU-8
18 * and also accepting single surrogates.
23 #include "unicode/utypes.h"
24 #include "unicode/uiter.h"
27 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
29 #define log_err printf
31 /* UCharIterator test ------------------------------------------------------- */
34 * The following code is a copy of the UCharIterator test code in
35 * source/test/cintltst/custrtst.c,
36 * testing the lenient-8 iterator instead of the UTF-8 one.
40 * Compare results from two iterators, should be same.
41 * Assume that the text is not empty and that
42 * iteration start==0 and iteration limit==length.
45 compareIterators(UCharIterator
*iter1
, const char *n1
,
46 UCharIterator
*iter2
, const char *n2
) {
47 int32_t i
, pos1
, pos2
, middle
, length
;
51 length
=iter1
->getIndex(iter1
, UITER_LENGTH
);
52 pos2
=iter2
->getIndex(iter2
, UITER_LENGTH
);
54 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1
, length
, pos2
, n2
);
58 /* set into the middle */
61 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
63 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
67 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
69 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
74 c1
=iter1
->current(iter1
);
75 c2
=iter2
->current(iter2
);
77 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1
, c1
, c2
, n2
, middle
);
81 /* move forward 3 UChars */
83 c1
=iter1
->next(iter1
);
84 c2
=iter2
->next(iter2
);
86 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
91 /* move backward 5 UChars */
93 c1
=iter1
->previous(iter1
);
94 c2
=iter2
->previous(iter2
);
96 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
101 /* iterate forward from the beginning */
102 pos1
=iter1
->move(iter1
, 0, UITER_START
);
104 log_err("%s->move(start) failed\n", n1
);
107 if(!iter1
->hasNext(iter1
)) {
108 log_err("%s->hasNext() at the start returns FALSE\n", n1
);
112 pos2
=iter2
->move(iter2
, 0, UITER_START
);
114 log_err("%s->move(start) failed\n", n2
);
117 if(!iter2
->hasNext(iter2
)) {
118 log_err("%s->hasNext() at the start returns FALSE\n", n2
);
123 c1
=iter1
->next(iter1
);
124 c2
=iter2
->next(iter2
);
126 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
131 if(iter1
->hasNext(iter1
)) {
132 log_err("%s->hasNext() at the end returns TRUE\n", n1
);
135 if(iter2
->hasNext(iter2
)) {
136 log_err("%s->hasNext() at the end returns TRUE\n", n2
);
140 /* back to the middle */
141 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
143 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
147 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
149 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
153 /* move to index 1 */
154 pos1
=iter1
->move(iter1
, 1, UITER_ZERO
);
156 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1
, middle
, pos1
);
160 pos2
=iter2
->move(iter2
, 1, UITER_ZERO
);
162 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2
, middle
, pos2
);
166 /* iterate backward from the end */
167 pos1
=iter1
->move(iter1
, 0, UITER_LIMIT
);
169 log_err("%s->move(limit) failed\n", n1
);
172 if(!iter1
->hasPrevious(iter1
)) {
173 log_err("%s->hasPrevious() at the end returns FALSE\n", n1
);
177 pos2
=iter2
->move(iter2
, 0, UITER_LIMIT
);
179 log_err("%s->move(limit) failed\n", n2
);
182 if(!iter2
->hasPrevious(iter2
)) {
183 log_err("%s->hasPrevious() at the end returns FALSE\n", n2
);
188 c1
=iter1
->previous(iter1
);
189 c2
=iter2
->previous(iter2
);
191 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
196 if(iter1
->hasPrevious(iter1
)) {
197 log_err("%s->hasPrevious() at the start returns TRUE\n", n1
);
200 if(iter2
->hasPrevious(iter2
)) {
201 log_err("%s->hasPrevious() at the start returns TRUE\n", n2
);
207 * Test the iterator's getState() and setState() functions.
208 * iter1 and iter2 must be set up for the same iterator type and the same string
209 * but may be physically different structs (different addresses).
211 * Assume that the text is not empty and that
212 * iteration start==0 and iteration limit==length.
213 * It must be 2<=middle<=length-2.
216 testIteratorState(UCharIterator
*iter1
, UCharIterator
*iter2
, const char *n
, int32_t middle
) {
219 UErrorCode errorCode
;
224 /* get four UChars from the middle of the string */
225 iter1
->move(iter1
, middle
-2, UITER_ZERO
);
227 c
=iter1
->next(iter1
);
229 /* the test violates the assumptions, see comment above */
230 log_err("test error: %s[%d]=%d\n", n
, middle
-2+i
, c
);
236 /* move to the middle and get the state */
237 iter1
->move(iter1
, -2, UITER_CURRENT
);
238 state
=uiter_getState(iter1
);
240 /* set the state into the second iterator and compare the results */
241 errorCode
=U_ZERO_ERROR
;
242 uiter_setState(iter2
, state
, &errorCode
);
243 if(U_FAILURE(errorCode
)) {
244 log_err("%s->setState(0x%x) failed: %s\n", n
, state
, u_errorName(errorCode
));
248 c
=iter2
->current(iter2
);
250 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n
, middle
, c
, u
[2]);
253 c
=iter2
->previous(iter2
);
255 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-1, c
, u
[1]);
258 iter2
->move(iter2
, 2, UITER_CURRENT
);
259 c
=iter2
->next(iter2
);
261 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n
, middle
+1, c
, u
[3]);
264 iter2
->move(iter2
, -3, UITER_CURRENT
);
265 c
=iter2
->previous(iter2
);
267 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-2, c
, u
[0]);
270 /* move the second iterator back to the middle */
271 iter2
->move(iter2
, 1, UITER_CURRENT
);
274 /* check that both are in the middle */
275 i
=iter1
->getIndex(iter1
, UITER_CURRENT
);
276 j
=iter2
->getIndex(iter2
, UITER_CURRENT
);
278 log_err("%s->getIndex(current)=%d!=%d as expected\n", n
, i
, middle
);
281 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n
, j
, i
);
284 /* compare lengths */
285 i
=iter1
->getIndex(iter1
, UITER_LENGTH
);
286 j
=iter2
->getIndex(iter2
, UITER_LENGTH
);
288 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n
, i
, j
);
293 TestLenient8Iterator() {
294 static const UChar text
[]={
296 /* dffd 107fd d801 dffd - in UTF-16, U+107fd=<d801 dffd> */
297 0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd,
300 static const uint8_t bytes
[]={
302 /* dffd 107fd d801 dffd - mixture */
303 0xed, 0xbf, 0xbd, 0xf0, 0x90, 0x9f, 0xbd, 0xed, 0xa0, 0x81, 0xed, 0xbf, 0xbd,
307 UCharIterator iter1
, iter2
;
311 puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)");
313 /* compare the same string between UTF-16 and lenient-8 UCharIterators */
314 uiter_setString(&iter1
, text
, -1);
315 uiter_setLenient8(&iter2
, (const char *)bytes
, sizeof(bytes
)-1);
316 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator");
318 /* try again with length=-1 */
319 uiter_setLenient8(&iter2
, (const char *)bytes
, -1);
320 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator_1");
322 /* test get/set state */
323 length
=LENGTHOF(text
)-1;
324 uiter_setLenient8(&iter1
, bytes
, -1);
325 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorState", length
/2);
326 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorStatePlus1", length
/2+1);
328 /* ---------------------------------------------------------------------- */
330 puts("no output so far means that the lenient-8 iterator works fine");
332 puts("iterate forward:\nUTF-16\tlenient-8");
333 uiter_setString(&iter1
, text
, -1);
334 iter1
.move(&iter1
, 0, UITER_START
);
335 iter2
.move(&iter2
, 0, UITER_START
);
337 c1
=iter1
.next(&iter1
);
338 c2
=iter2
.next(&iter2
);
343 printf("\t%04x\n", c2
);
345 printf("%04x\n", c1
);
347 printf("%04x\t%04x\n", c1
, c2
);
353 main(int argc
, const char *argv
[]) {
354 TestLenient8Iterator();