]>
git.saurik.com Git - apple/icu.git/blob - icuSources/samples/uciter8/uciter8.c
2 *******************************************************************************
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003jan10
14 * created by: Markus W. Scherer
16 * This file contains sample code that illustrates reading
17 * 8-bit Unicode text leniently, accepting a mix of UTF-8 and CESU-8
18 * and also accepting single surrogates.
23 #include "unicode/utypes.h"
24 #include "unicode/uiter.h"
27 #define log_err printf
29 /* UCharIterator test ------------------------------------------------------- */
32 * The following code is a copy of the UCharIterator test code in
33 * source/test/cintltst/custrtst.c,
34 * testing the lenient-8 iterator instead of the UTF-8 one.
38 * Compare results from two iterators, should be same.
39 * Assume that the text is not empty and that
40 * iteration start==0 and iteration limit==length.
43 compareIterators(UCharIterator
*iter1
, const char *n1
,
44 UCharIterator
*iter2
, const char *n2
) {
45 int32_t i
, pos1
, pos2
, middle
, length
;
49 length
=iter1
->getIndex(iter1
, UITER_LENGTH
);
50 pos2
=iter2
->getIndex(iter2
, UITER_LENGTH
);
52 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1
, length
, pos2
, n2
);
56 /* set into the middle */
59 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
61 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
65 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
67 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
72 c1
=iter1
->current(iter1
);
73 c2
=iter2
->current(iter2
);
75 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1
, c1
, c2
, n2
, middle
);
79 /* move forward 3 UChars */
81 c1
=iter1
->next(iter1
);
82 c2
=iter2
->next(iter2
);
84 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
89 /* move backward 5 UChars */
91 c1
=iter1
->previous(iter1
);
92 c2
=iter2
->previous(iter2
);
94 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
99 /* iterate forward from the beginning */
100 pos1
=iter1
->move(iter1
, 0, UITER_START
);
102 log_err("%s->move(start) failed\n", n1
);
105 if(!iter1
->hasNext(iter1
)) {
106 log_err("%s->hasNext() at the start returns FALSE\n", n1
);
110 pos2
=iter2
->move(iter2
, 0, UITER_START
);
112 log_err("%s->move(start) failed\n", n2
);
115 if(!iter2
->hasNext(iter2
)) {
116 log_err("%s->hasNext() at the start returns FALSE\n", n2
);
121 c1
=iter1
->next(iter1
);
122 c2
=iter2
->next(iter2
);
124 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
129 if(iter1
->hasNext(iter1
)) {
130 log_err("%s->hasNext() at the end returns TRUE\n", n1
);
133 if(iter2
->hasNext(iter2
)) {
134 log_err("%s->hasNext() at the end returns TRUE\n", n2
);
138 /* back to the middle */
139 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
141 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
145 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
147 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
151 /* move to index 1 */
152 pos1
=iter1
->move(iter1
, 1, UITER_ZERO
);
154 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1
, middle
, pos1
);
158 pos2
=iter2
->move(iter2
, 1, UITER_ZERO
);
160 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2
, middle
, pos2
);
164 /* iterate backward from the end */
165 pos1
=iter1
->move(iter1
, 0, UITER_LIMIT
);
167 log_err("%s->move(limit) failed\n", n1
);
170 if(!iter1
->hasPrevious(iter1
)) {
171 log_err("%s->hasPrevious() at the end returns FALSE\n", n1
);
175 pos2
=iter2
->move(iter2
, 0, UITER_LIMIT
);
177 log_err("%s->move(limit) failed\n", n2
);
180 if(!iter2
->hasPrevious(iter2
)) {
181 log_err("%s->hasPrevious() at the end returns FALSE\n", n2
);
186 c1
=iter1
->previous(iter1
);
187 c2
=iter2
->previous(iter2
);
189 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
194 if(iter1
->hasPrevious(iter1
)) {
195 log_err("%s->hasPrevious() at the start returns TRUE\n", n1
);
198 if(iter2
->hasPrevious(iter2
)) {
199 log_err("%s->hasPrevious() at the start returns TRUE\n", n2
);
205 * Test the iterator's getState() and setState() functions.
206 * iter1 and iter2 must be set up for the same iterator type and the same string
207 * but may be physically different structs (different addresses).
209 * Assume that the text is not empty and that
210 * iteration start==0 and iteration limit==length.
211 * It must be 2<=middle<=length-2.
214 testIteratorState(UCharIterator
*iter1
, UCharIterator
*iter2
, const char *n
, int32_t middle
) {
217 UErrorCode errorCode
;
222 /* get four UChars from the middle of the string */
223 iter1
->move(iter1
, middle
-2, UITER_ZERO
);
225 c
=iter1
->next(iter1
);
227 /* the test violates the assumptions, see comment above */
228 log_err("test error: %s[%d]=%d\n", n
, middle
-2+i
, c
);
234 /* move to the middle and get the state */
235 iter1
->move(iter1
, -2, UITER_CURRENT
);
236 state
=uiter_getState(iter1
);
238 /* set the state into the second iterator and compare the results */
239 errorCode
=U_ZERO_ERROR
;
240 uiter_setState(iter2
, state
, &errorCode
);
241 if(U_FAILURE(errorCode
)) {
242 log_err("%s->setState(0x%x) failed: %s\n", n
, state
, u_errorName(errorCode
));
246 c
=iter2
->current(iter2
);
248 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n
, middle
, c
, u
[2]);
251 c
=iter2
->previous(iter2
);
253 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-1, c
, u
[1]);
256 iter2
->move(iter2
, 2, UITER_CURRENT
);
257 c
=iter2
->next(iter2
);
259 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n
, middle
+1, c
, u
[3]);
262 iter2
->move(iter2
, -3, UITER_CURRENT
);
263 c
=iter2
->previous(iter2
);
265 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-2, c
, u
[0]);
268 /* move the second iterator back to the middle */
269 iter2
->move(iter2
, 1, UITER_CURRENT
);
272 /* check that both are in the middle */
273 i
=iter1
->getIndex(iter1
, UITER_CURRENT
);
274 j
=iter2
->getIndex(iter2
, UITER_CURRENT
);
276 log_err("%s->getIndex(current)=%d!=%d as expected\n", n
, i
, middle
);
279 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n
, j
, i
);
282 /* compare lengths */
283 i
=iter1
->getIndex(iter1
, UITER_LENGTH
);
284 j
=iter2
->getIndex(iter2
, UITER_LENGTH
);
286 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n
, i
, j
);
291 TestLenient8Iterator() {
292 static const UChar text
[]={
294 /* dffd 107fd d801 dffd - in UTF-16, U+107fd=<d801 dffd> */
295 0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd,
298 static const uint8_t bytes
[]={
300 /* dffd 107fd d801 dffd - mixture */
301 0xed, 0xbf, 0xbd, 0xf0, 0x90, 0x9f, 0xbd, 0xed, 0xa0, 0x81, 0xed, 0xbf, 0xbd,
305 UCharIterator iter1
, iter2
;
309 puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)");
311 /* compare the same string between UTF-16 and lenient-8 UCharIterators */
312 uiter_setString(&iter1
, text
, -1);
313 uiter_setLenient8(&iter2
, (const char *)bytes
, sizeof(bytes
)-1);
314 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator");
316 /* try again with length=-1 */
317 uiter_setLenient8(&iter2
, (const char *)bytes
, -1);
318 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "Lenient8Iterator_1");
320 /* test get/set state */
321 length
=UPRV_LENGTHOF(text
)-1;
322 uiter_setLenient8(&iter1
, (const char*)bytes
, -1);
323 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorState", length
/2);
324 testIteratorState(&iter1
, &iter2
, "Lenient8IteratorStatePlus1", length
/2+1);
326 /* ---------------------------------------------------------------------- */
328 puts("no output so far means that the lenient-8 iterator works fine");
330 puts("iterate forward:\nUTF-16\tlenient-8");
331 uiter_setString(&iter1
, text
, -1);
332 iter1
.move(&iter1
, 0, UITER_START
);
333 iter2
.move(&iter2
, 0, UITER_START
);
335 c1
=iter1
.next(&iter1
);
336 c2
=iter2
.next(&iter2
);
341 printf("\t%04x\n", c2
);
343 printf("%04x\n", c1
);
345 printf("%04x\t%04x\n", c1
, c2
);
351 main(int argc
, const char *argv
[]) {
352 TestLenient8Iterator();