]> git.saurik.com Git - apple/icu.git/blob - icuSources/samples/uciter8/uciter8.c
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / samples / uciter8 / uciter8.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uciter8.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jan10
14 * created by: Markus W. Scherer
15 *
16 * This file contains sample code that illustrates reading
17 * 8-bit Unicode text leniently, accepting a mix of UTF-8 and CESU-8
18 * and also accepting single surrogates.
19 */
20
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicode/utypes.h"
24 #include "unicode/uiter.h"
25 #include "uit_len8.h"
26
27 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
28
29 #define log_err printf
30
31 /* UCharIterator test ------------------------------------------------------- */
32
33 /*
34 * The following code is a copy of the UCharIterator test code in
35 * source/test/cintltst/custrtst.c,
36 * testing the lenient-8 iterator instead of the UTF-8 one.
37 */
38
39 /*
40 * Compare results from two iterators, should be same.
41 * Assume that the text is not empty and that
42 * iteration start==0 and iteration limit==length.
43 */
44 static void
45 compareIterators(UCharIterator *iter1, const char *n1,
46 UCharIterator *iter2, const char *n2) {
47 int32_t i, pos1, pos2, middle, length;
48 UChar32 c1, c2;
49
50 /* compare lengths */
51 length=iter1->getIndex(iter1, UITER_LENGTH);
52 pos2=iter2->getIndex(iter2, UITER_LENGTH);
53 if(length!=pos2) {
54 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
55 return;
56 }
57
58 /* set into the middle */
59 middle=length/2;
60
61 pos1=iter1->move(iter1, middle, UITER_ZERO);
62 if(pos1!=middle) {
63 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
64 return;
65 }
66
67 pos2=iter2->move(iter2, middle, UITER_ZERO);
68 if(pos2!=middle) {
69 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
70 return;
71 }
72
73 /* test current() */
74 c1=iter1->current(iter1);
75 c2=iter2->current(iter2);
76 if(c1!=c2) {
77 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
78 return;
79 }
80
81 /* move forward 3 UChars */
82 for(i=0; i<3; ++i) {
83 c1=iter1->next(iter1);
84 c2=iter2->next(iter2);
85 if(c1!=c2) {
86 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
87 return;
88 }
89 }
90
91 /* move backward 5 UChars */
92 for(i=0; i<5; ++i) {
93 c1=iter1->previous(iter1);
94 c2=iter2->previous(iter2);
95 if(c1!=c2) {
96 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
97 return;
98 }
99 }
100
101 /* iterate forward from the beginning */
102 pos1=iter1->move(iter1, 0, UITER_START);
103 if(pos1<0) {
104 log_err("%s->move(start) failed\n", n1);
105 return;
106 }
107 if(!iter1->hasNext(iter1)) {
108 log_err("%s->hasNext() at the start returns FALSE\n", n1);
109 return;
110 }
111
112 pos2=iter2->move(iter2, 0, UITER_START);
113 if(pos2<0) {
114 log_err("%s->move(start) failed\n", n2);
115 return;
116 }
117 if(!iter2->hasNext(iter2)) {
118 log_err("%s->hasNext() at the start returns FALSE\n", n2);
119 return;
120 }
121
122 do {
123 c1=iter1->next(iter1);
124 c2=iter2->next(iter2);
125 if(c1!=c2) {
126 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
127 return;
128 }
129 } while(c1>=0);
130
131 if(iter1->hasNext(iter1)) {
132 log_err("%s->hasNext() at the end returns TRUE\n", n1);
133 return;
134 }
135 if(iter2->hasNext(iter2)) {
136 log_err("%s->hasNext() at the end returns TRUE\n", n2);
137 return;
138 }
139
140 /* back to the middle */
141 pos1=iter1->move(iter1, middle, UITER_ZERO);
142 if(pos1!=middle) {
143 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
144 return;
145 }
146
147 pos2=iter2->move(iter2, middle, UITER_ZERO);
148 if(pos2!=middle) {
149 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
150 return;
151 }
152
153 /* move to index 1 */
154 pos1=iter1->move(iter1, 1, UITER_ZERO);
155 if(pos1!=1) {
156 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
157 return;
158 }
159
160 pos2=iter2->move(iter2, 1, UITER_ZERO);
161 if(pos2!=1) {
162 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
163 return;
164 }
165
166 /* iterate backward from the end */
167 pos1=iter1->move(iter1, 0, UITER_LIMIT);
168 if(pos1<0) {
169 log_err("%s->move(limit) failed\n", n1);
170 return;
171 }
172 if(!iter1->hasPrevious(iter1)) {
173 log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
174 return;
175 }
176
177 pos2=iter2->move(iter2, 0, UITER_LIMIT);
178 if(pos2<0) {
179 log_err("%s->move(limit) failed\n", n2);
180 return;
181 }
182 if(!iter2->hasPrevious(iter2)) {
183 log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
184 return;
185 }
186
187 do {
188 c1=iter1->previous(iter1);
189 c2=iter2->previous(iter2);
190 if(c1!=c2) {
191 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
192 return;
193 }
194 } while(c1>=0);
195
196 if(iter1->hasPrevious(iter1)) {
197 log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
198 return;
199 }
200 if(iter2->hasPrevious(iter2)) {
201 log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
202 return;
203 }
204 }
205
206 /*
207 * Test the iterator's getState() and setState() functions.
208 * iter1 and iter2 must be set up for the same iterator type and the same string
209 * but may be physically different structs (different addresses).
210 *
211 * Assume that the text is not empty and that
212 * iteration start==0 and iteration limit==length.
213 * It must be 2<=middle<=length-2.
214 */
215 static void
216 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
217 UChar32 u[4];
218
219 UErrorCode errorCode;
220 UChar32 c;
221 uint32_t state;
222 int32_t i, j;
223
224 /* get four UChars from the middle of the string */
225 iter1->move(iter1, middle-2, UITER_ZERO);
226 for(i=0; i<4; ++i) {
227 c=iter1->next(iter1);
228 if(c<0) {
229 /* the test violates the assumptions, see comment above */
230 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
231 return;
232 }
233 u[i]=c;
234 }
235
236 /* move to the middle and get the state */
237 iter1->move(iter1, -2, UITER_CURRENT);
238 state=uiter_getState(iter1);
239
240 /* set the state into the second iterator and compare the results */
241 errorCode=U_ZERO_ERROR;
242 uiter_setState(iter2, state, &errorCode);
243 if(U_FAILURE(errorCode)) {
244 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
245 return;
246 }
247
248 c=iter2->current(iter2);
249 if(c!=u[2]) {
250 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
251 }
252
253 c=iter2->previous(iter2);
254 if(c!=u[1]) {
255 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
256 }
257
258 iter2->move(iter2, 2, UITER_CURRENT);
259 c=iter2->next(iter2);
260 if(c!=u[3]) {
261 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
262 }
263
264 iter2->move(iter2, -3, UITER_CURRENT);
265 c=iter2->previous(iter2);
266 if(c!=u[0]) {
267 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
268 }
269
270 /* move the second iterator back to the middle */
271 iter2->move(iter2, 1, UITER_CURRENT);
272 iter2->next(iter2);
273
274 /* check that both are in the middle */
275 i=iter1->getIndex(iter1, UITER_CURRENT);
276 j=iter2->getIndex(iter2, UITER_CURRENT);
277 if(i!=middle) {
278 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
279 }
280 if(i!=j) {
281 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
282 }
283
284 /* compare lengths */
285 i=iter1->getIndex(iter1, UITER_LENGTH);
286 j=iter2->getIndex(iter2, UITER_LENGTH);
287 if(i!=j) {
288 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
289 }
290 }
291
292 static void
293 TestLenient8Iterator() {
294 static const UChar text[]={
295 0x61, 0x62, 0x63,
296 /* dffd 107fd d801 dffd - in UTF-16, U+107fd=<d801 dffd> */
297 0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd,
298 0x78, 0x79, 0x7a, 0
299 };
300 static const uint8_t bytes[]={
301 0x61, 0x62, 0x63,
302 /* dffd 107fd d801 dffd - mixture */
303 0xed, 0xbf, 0xbd, 0xf0, 0x90, 0x9f, 0xbd, 0xed, 0xa0, 0x81, 0xed, 0xbf, 0xbd,
304 0x78, 0x79, 0x7a, 0
305 };
306
307 UCharIterator iter1, iter2;
308 UChar32 c1, c2;
309 int32_t length;
310
311 puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)");
312
313 /* compare the same string between UTF-16 and lenient-8 UCharIterators */
314 uiter_setString(&iter1, text, -1);
315 uiter_setLenient8(&iter2, (const char *)bytes, sizeof(bytes)-1);
316 compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator");
317
318 /* try again with length=-1 */
319 uiter_setLenient8(&iter2, (const char *)bytes, -1);
320 compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator_1");
321
322 /* test get/set state */
323 length=LENGTHOF(text)-1;
324 uiter_setLenient8(&iter1, bytes, -1);
325 testIteratorState(&iter1, &iter2, "Lenient8IteratorState", length/2);
326 testIteratorState(&iter1, &iter2, "Lenient8IteratorStatePlus1", length/2+1);
327
328 /* ---------------------------------------------------------------------- */
329
330 puts("no output so far means that the lenient-8 iterator works fine");
331
332 puts("iterate forward:\nUTF-16\tlenient-8");
333 uiter_setString(&iter1, text, -1);
334 iter1.move(&iter1, 0, UITER_START);
335 iter2.move(&iter2, 0, UITER_START);
336 for(;;) {
337 c1=iter1.next(&iter1);
338 c2=iter2.next(&iter2);
339 if(c1<0 && c2<0) {
340 break;
341 }
342 if(c1<0) {
343 printf("\t%04x\n", c2);
344 } else if(c2<0) {
345 printf("%04x\n", c1);
346 } else {
347 printf("%04x\t%04x\n", c1, c2);
348 }
349 }
350 }
351
352 extern int
353 main(int argc, const char *argv[]) {
354 TestLenient8Iterator();
355 return 0;
356 }