]>
git.saurik.com Git - apple/icu.git/blob - icuSources/samples/uciter8/uciter8.c
2 *******************************************************************************
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html#License
7 *******************************************************************************
8 *******************************************************************************
10 * Copyright (C) 2003-2014, International Business Machines
11 * Corporation and others. All Rights Reserved.
13 *******************************************************************************
14 * file name: uciter8.c
16 * tab size: 8 (not used)
19 * created on: 2003jan10
20 * created by: Markus W. Scherer
22 * This file contains sample code that illustrates reading
23 * 8-bit Unicode text leniently, accepting a mix of UTF-8 and CESU-8
24 * and also accepting single surrogates.
29 #include "unicode/utypes.h"
30 #include "unicode/uiter.h"
34 #define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37 #define log_err printf
39 /* UCharIterator test ------------------------------------------------------- */
42 * The following code is a copy of the UCharIterator test code in
43 * source/test/cintltst/custrtst.c,
44 * testing the lenient-8 iterator instead of the UTF-8 one.
48 * Compare results from two iterators, should be same.
49 * Assume that the text is not empty and that
50 * iteration start==0 and iteration limit==length.
53 compareIterators(UCharIterator
*iter1
, const char *n1
,
54 UCharIterator
*iter2
, const char *n2
) {
55 int32_t i
, pos1
, pos2
, middle
, length
;
59 length
=iter1
->getIndex(iter1
, UITER_LENGTH
);
60 pos2
=iter2
->getIndex(iter2
, UITER_LENGTH
);
62 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1
, length
, pos2
, n2
);
66 /* set into the middle */
69 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
71 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
75 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
77 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
82 c1
=iter1
->current(iter1
);
83 c2
=iter2
->current(iter2
);
85 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1
, c1
, c2
, n2
, middle
);
89 /* move forward 3 UChars */
91 c1
=iter1
->next(iter1
);
92 c2
=iter2
->next(iter2
);
94 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
99 /* move backward 5 UChars */
101 c1
=iter1
->previous(iter1
);
102 c2
=iter2
->previous(iter2
);
104 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
109 /* iterate forward from the beginning */
110 pos1
=iter1
->move(iter1
, 0, UITER_START
);
112 log_err("%s->move(start) failed\n", n1
);
115 if(!iter1
->hasNext(iter1
)) {
116 log_err("%s->hasNext() at the start returns FALSE\n", n1
);
120 pos2
=iter2
->move(iter2
, 0, UITER_START
);
122 log_err("%s->move(start) failed\n", n2
);
125 if(!iter2
->hasNext(iter2
)) {
126 log_err("%s->hasNext() at the start returns FALSE\n", n2
);
131 c1
=iter1
->next(iter1
);
132 c2
=iter2
->next(iter2
);
134 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
139 if(iter1
->hasNext(iter1
)) {
140 log_err("%s->hasNext() at the end returns TRUE\n", n1
);
143 if(iter2
->hasNext(iter2
)) {
144 log_err("%s->hasNext() at the end returns TRUE\n", n2
);
148 /* back to the middle */
149 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
151 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
155 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
157 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
161 /* move to index 1 */
162 pos1
=iter1
->move(iter1
, 1, UITER_ZERO
);
164 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1
, middle
, pos1
);
168 pos2
=iter2
->move(iter2
, 1, UITER_ZERO
);
170 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2
, middle
, pos2
);
174 /* iterate backward from the end */
175 pos1
=iter1
->move(iter1
, 0, UITER_LIMIT
);
177 log_err("%s->move(limit) failed\n", n1
);
180 if(!iter1
->hasPrevious(iter1
)) {
181 log_err("%s->hasPrevious() at the end returns FALSE\n", n1
);
185 pos2
=iter2
->move(iter2
, 0, UITER_LIMIT
);
187 log_err("%s->move(limit) failed\n", n2
);
190 if(!iter2
->hasPrevious(iter2
)) {
191 log_err("%s->hasPrevious() at the end returns FALSE\n", n2
);
196 c1
=iter1
->previous(iter1
);
197 c2
=iter2
->previous(iter2
);
199 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
204 if(iter1
->hasPrevious(iter1
)) {
205 log_err("%s->hasPrevious() at the start returns TRUE\n", n1
);
208 if(iter2
->hasPrevious(iter2
)) {
209 log_err("%s->hasPrevious() at the start returns TRUE\n", n2
);
215 * Test the iterator's getState() and setState() functions.
216 * iter1 and iter2 must be set up for the same iterator type and the same string
217 * but may be physically different structs (different addresses).
219 * Assume that the text is not empty and that
220 * iteration start==0 and iteration limit==length.
221 * It must be 2<=middle<=length-2.
224 testIteratorState(UCharIterator
*iter1
, UCharIterator
*iter2
, const char *n
, int32_t middle
) {
227 UErrorCode errorCode
;
232 /* get four UChars from the middle of the string */
233 iter1
->move(iter1
, middle
-2, UITER_ZERO
);
235 c
=iter1
->next(iter1
);
237 /* the test violates the assumptions, see comment above */
238 log_err("test error: %s[%d]=%d\n", n
, middle
-2+i
, c
);
244 /* move to the middle and get the state */
245 iter1
->move(iter1
, -2, UITER_CURRENT
);
246 state
=uiter_getState(iter1
);
248 /* set the state into the second iterator and compare the results */
249 errorCode
=U_ZERO_ERROR
;
250 uiter_setState(iter2
, state
, &errorCode
);
251 if(U_FAILURE(errorCode
)) {
252 log_err("%s->setState(0x%x) failed: %s\n", n
, state
, u_errorName(errorCode
));
256 c
=iter2
->current(iter2
);
258 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n
, middle
, c
, u
[2]);
261 c
=iter2
->previous(iter2
);
263 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-1, c
, u
[1]);
266 iter2
->move(iter2
, 2, UITER_CURRENT
);
267 c
=iter2
->next(iter2
);
269 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n
, middle
+1, c
, u
[3]);
272 iter2
->move(iter2
, -3, UITER_CURRENT
);
273 c
=iter2
->previous(iter2
);
275 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-2, c
, u
[0]);
278 /* move the second iterator back to the middle */
279 iter2
->move(iter2
, 1, UITER_CURRENT
);
282 /* check that both are in the middle */
283 i
=iter1
->getIndex(iter1
, UITER_CURRENT
);
284 j
=iter2
->getIndex(iter2
, UITER_CURRENT
);
286 log_err("%s->getIndex(current)=%d!=%d as expected\n", n
, i
, middle
);
289 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n
, j
, i
);
292 /* compare lengths */
293 i
=iter1
->getIndex(iter1
, UITER_LENGTH
);
294 j
=iter2
->getIndex(iter2
, UITER_LENGTH
);
296 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n
, i
, j
);
301 TestLenient8Iterator() {
302 static const UChar text
[]={
304 /* dffd 107fd d801 dffd - in UTF-16, U+107fd=<d801 dffd> */
305 0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd,
308 static const uint8_t bytes
[]={
310 /* dffd 107fd d801 dffd - mixture */
311 0xed, 0xbf, 0xbd, 0xf0, 0x90, 0x9f, 0xbd, 0xed, 0xa0, 0x81, 0xed, 0xbf, 0xbd,
315 UCharIterator iter1
, iter2
;
319 puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)");
321 /* compare the same string between UTF-16 and lenient-8 UCharIterators */
322 uiter_setString(&iter1
, text
, -1);
323 uiter_setLenient8(&iter2
, (const char *)bytes
, sizeof(bytes
)-1);
324 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator");
326 /* try again with length=-1 */
327 uiter_setLenient8(&iter2
, (const char *)bytes
, -1);
328 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator_1");
330 /* test get/set state */
331 length
=UPRV_LENGTHOF(text
)-1;
332 uiter_setLenient8(&iter1
, (const char*)bytes
, -1);
333 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorState", length
/2);
334 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorStatePlus1", length
/2+1);
336 /* ---------------------------------------------------------------------- */
338 puts("no output so far means that the lenient-8 iterator works fine");
340 puts("iterate forward:\nUTF-16\tlenient-8");
341 uiter_setString(&iter1
, text
, -1);
342 iter1
.move(&iter1
, 0, UITER_START
);
343 iter2
.move(&iter2
, 0, UITER_START
);
345 c1
=iter1
.next(&iter1
);
346 c2
=iter2
.next(&iter2
);
351 printf("\t%04x\n", c2
);
353 printf("%04x\n", c1
);
355 printf("%04x\t%04x\n", c1
, c2
);
361 main(int argc
, const char *argv
[]) {
362 TestLenient8Iterator();