]> git.saurik.com Git - apple/icu.git/blame - icuSources/samples/ucnv/convsamp.cpp
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / samples / ucnv / convsamp.cpp
CommitLineData
b75a7d8f
A
1/**************************************************************************
2*
3* Copyright (C) 2000-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5*
6***************************************************************************
7* file name: convsamp.c
8* encoding: ASCII (7-bit)
9*
10* created on: 2000may30
11* created by: Steven R. Loomis
12*
13* Sample code for the ICU conversion routines.
14*
15* Note: Nothing special is needed to build this sample. Link with
16* the icu UC and icu I18N libraries.
17*
18* I use 'assert' for error checking, you probably will want
19* something more flexible. '***BEGIN SAMPLE***' and
20* '***END SAMPLE***' mark pieces suitable for stand alone
21* code snippets.
22*
23*
24* Each test can define it's own BUFFERSIZE
25*
26*/
27
28#define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
29
30#include <stdio.h>
31#include <ctype.h> /* for isspace, etc. */
32#include <assert.h>
33#include <string.h>
34#include <stdlib.h> /* malloc */
35
36#include "unicode/utypes.h" /* Basic ICU data types */
37#include "unicode/ucnv.h" /* C Converter API */
38#include "unicode/ustring.h" /* some more string fcns*/
39#include "unicode/uchar.h" /* char names */
40#include "unicode/uloc.h"
41#include "unicode/unistr.h"
42
43#include "flagcb.h"
44
45/* Some utility functions */
46
47static const UChar kNone[] = { 0x0000 };
48
49#define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
50
51/* Print a UChar if possible, in seven characters. */
52void prettyPrintUChar(UChar c)
53{
54 if( (c <= 0x007F) &&
55 (isgraph(c)) ) {
56 printf(" '%c' ", (char)(0x00FF&c));
57 } else if ( c > 0x007F ) {
58 char buf[1000];
59 UErrorCode status = U_ZERO_ERROR;
60 int32_t o;
61
62 o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
63 if(U_SUCCESS(status) && (o>0) ) {
64 buf[6] = 0;
65 printf("%7s", buf);
66 } else {
67 o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
68 if(U_SUCCESS(status) && (o>0)) {
69 buf[5] = 0;
70 printf("~%6s", buf);
71 }
72 else {
73 printf(" ??????");
74 }
75 }
76 } else {
77 switch((char)(c & 0x007F)) {
78 case ' ':
79 printf(" ' ' ");
80 break;
81 case '\t':
82 printf(" \\t ");
83 break;
84 case '\n':
85 printf(" \\n ");
86 break;
87 default:
88 printf(" _ ");
89 break;
90 }
91 }
92}
93
94
95void printUChars(const char *name = "?",
96 const UChar *uch = kNone,
97 int32_t len = -1 )
98{
99 int32_t i;
100
101 if( (len == -1) && (uch) ) {
102 len = u_strlen(uch);
103 }
104
105 printf("%5s: ", name);
106 for( i = 0; i <len; i++) {
107 printf("%-6d ", i);
108 }
109 printf("\n");
110
111 printf("%5s: ", "uni");
112 for( i = 0; i <len; i++) {
113 printf("\\u%04X ", (int)uch[i]);
114 }
115 printf("\n");
116
117 printf("%5s:", "ch");
118 for( i = 0; i <len; i++) {
119 prettyPrintUChar(uch[i]);
120 }
121 printf("\n");
122}
123
124void printBytes(const char *name = "?",
125 const char *uch = "",
126 int32_t len = -1 )
127{
128 int32_t i;
129
130 if( (len == -1) && (uch) ) {
131 len = strlen(uch);
132 }
133
134 printf("%5s: ", name);
135 for( i = 0; i <len; i++) {
136 printf("%-4d ", i);
137 }
138 printf("\n");
139
140 printf("%5s: ", "uni");
141 for( i = 0; i <len; i++) {
142 printf("\\x%02X ", 0x00FF & (int)uch[i]);
143 }
144 printf("\n");
145
146 printf("%5s:", "ch");
147 for( i = 0; i <len; i++) {
148 if(isgraph(0x00FF & (int)uch[i])) {
149 printf(" '%c' ", (char)uch[i]);
150 } else {
151 printf(" ");
152 }
153 }
154 printf("\n");
155}
156
157void printUChar(UChar32 ch32)
158{
159 if(ch32 > 0xFFFF) {
160 printf("ch: U+%06X\n", ch32);
161 }
162 else {
163 UChar ch = (UChar)ch32;
164 printUChars("C", &ch, 1);
165 }
166}
167
168/*******************************************************************
169 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170 followed by an exclamation mark (!) into the KOI8-R Russian code page.
171
172 This example first creates a UChar String out of the Unicode chars.
173
174 targetSize must be set to the amount of space available in the target
175 buffer. After fromUChars is called,
176 len will contain the number of bytes in target[] which were
177 used in the resulting codepage. In this case, there is a 1:1 mapping
178 between the input and output characters. The exclamation mark has the
179 same value in both KOI8-R and Unicode.
180
181 src: 0 1 2 3 4 5 6
182 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
184
185 targ: 0 1 2 3 4 5 6
186 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
187 ch: '!'
188
189
190Converting FROM unicode
191 to koi8-r.
192 You must call ucnv_close to clean up the memory used by the
193 converter.
194
195 'len' returns the number of OUTPUT bytes resulting from the
196 conversion.
197 */
198
199UErrorCode convsample_02()
200{
201 printf("\n\n==============================================\n"
202 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
203
204
205 // **************************** START SAMPLE *******************
206 // "cat<cat>OK"
207 UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208 0x0430, 0x0021, 0x0000 };
209 char target[100];
210 UErrorCode status = U_ZERO_ERROR;
211 UConverter *conv;
212 int32_t len;
213
214 // set up the converter
215 conv = ucnv_open("koi8-r", &status);
216 assert(U_SUCCESS(status));
217
218 // convert to koi8-r
219 len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
220 assert(U_SUCCESS(status));
221
222 // close the converter
223 ucnv_close(conv);
224
225 // ***************************** END SAMPLE ********************
226
227 // Print it out
228 printUChars("src", source);
229 printf("\n");
230 printBytes("targ", target, len);
231
232 return U_ZERO_ERROR;
233}
234
235
236UErrorCode convsample_03()
237{
238 printf("\n\n==============================================\n"
239 "Sample 03: C: print out all converters\n");
240
241 int32_t count;
242 int32_t i;
243
244 // **************************** START SAMPLE *******************
245 count = ucnv_countAvailable();
246 printf("Available converters: %d\n", count);
247
248 for(i=0;i<count;i++)
249 {
250 printf("%s ", ucnv_getAvailableName(i));
251 }
252
253 // ***************************** END SAMPLE ********************
254
255 printf("\n");
256
257 return U_ZERO_ERROR;
258}
259
260
261
262#define BUFFERSIZE 17 /* make it interesting :) */
263
264/*
265 Converting from a codepage to Unicode in bulk..
266 What is the best way to determine the buffer size?
267
268 The 'buffersize' is in bytes of input.
269 For a given converter, divinding this by the minimum char size
270 give you the maximum number of Unicode characters that could be
271 expected for a given number of input bytes.
272 see: ucnv_getMinCharSize()
273
274 For example, a single byte codepage like 'Latin-3' has a
275 minimum char size of 1. (It takes at least 1 byte to represent
276 each Unicode char.) So the unicode buffer has the same number of
277 UChars as the input buffer has bytes.
278
279 In a strictly double byte codepage such as cp1362 (Windows
280 Korean), the minimum char size is 2. So, only half as many Unicode
281 chars as bytes are needed.
282
283 This work to calculate the buffer size is an optimization. Any
284 size of input and output buffer can be used, as long as the
285 program handles the following cases: If the input buffer is empty,
286 the source pointer will be equal to sourceLimit. If the output
287 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
288 */
289
290UErrorCode convsample_05()
291{
292 printf("\n\n==============================================\n"
293 "Sample 05: C: count the number of letters in a UTF-8 document\n");
294
295 FILE *f;
296 int32_t count;
297 char inBuf[BUFFERSIZE];
298 const char *source;
299 const char *sourceLimit;
300 UChar *uBuf;
301 UChar *target;
302 UChar *targetLimit;
303 UChar *p;
304 int32_t uBufSize = 0;
305 UConverter *conv;
306 UErrorCode status = U_ZERO_ERROR;
307 uint32_t letters=0, total=0;
308
309 f = fopen("data01.txt", "r");
310 if(!f)
311 {
312 fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
313 return U_FILE_ACCESS_ERROR;
314 }
315
316 // **************************** START SAMPLE *******************
317 conv = ucnv_open("utf-8", &status);
318 assert(U_SUCCESS(status));
319
320 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
321 printf("input bytes %d / min chars %d = %d UChars\n",
322 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
323 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
324 assert(uBuf!=NULL);
325
326 // grab another buffer's worth
327 while((!feof(f)) &&
328 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
329 {
330 // Convert bytes to unicode
331 source = inBuf;
332 sourceLimit = inBuf + count;
333
334 do
335 {
336 target = uBuf;
337 targetLimit = uBuf + uBufSize;
338
339 ucnv_toUnicode(conv, &target, targetLimit,
340 &source, sourceLimit, NULL,
341 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
342 /* is true (when no more data will come) */
343 &status);
344
345 if(status == U_BUFFER_OVERFLOW_ERROR)
346 {
347 // simply ran out of space - we'll reset the target ptr the next
348 // time through the loop.
349 status = U_ZERO_ERROR;
350 }
351 else
352 {
353 // Check other errors here.
354 assert(U_SUCCESS(status));
355 // Break out of the loop (by force)
356 }
357
358 // Process the Unicode
359 // Todo: handle UTF-16/surrogates
360
361 for(p = uBuf; p<target; p++)
362 {
363 if(u_isalpha(*p))
364 letters++;
365 total++;
366 }
367 } while (source < sourceLimit); // while simply out of space
368 }
369
370 printf("%d letters out of %d total UChars.\n", letters, total);
371
372 // ***************************** END SAMPLE ********************
373 ucnv_close(conv);
374
375 printf("\n");
376
377 return U_ZERO_ERROR;
378}
379#undef BUFFERSIZE
380
381#define BUFFERSIZE 1024
382typedef struct
383{
384 UChar32 codepoint;
385 uint32_t frequency;
386} CharFreqInfo;
387
388UErrorCode convsample_06()
389{
390 printf("\n\n==============================================\n"
391 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
392
393 FILE *f;
394 int32_t count;
395 char inBuf[BUFFERSIZE];
396 const char *source;
397 const char *sourceLimit;
398 UChar *uBuf;
399 int32_t uBufSize = 0;
400 UConverter *conv;
401 UErrorCode status = U_ZERO_ERROR;
402 uint32_t letters=0, total=0;
403
404 CharFreqInfo *info;
405 UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
406 UChar32 p;
407
408 uint32_t ie = 0;
409 uint32_t gh = 0;
410 UChar32 l = 0;
411
412 f = fopen("data06.txt", "r");
413 if(!f)
414 {
415 fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
416 return U_FILE_ACCESS_ERROR;
417 }
418
419 info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
420 if(!info)
421 {
422 fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
423 }
424
425 /* reset frequencies */
426 for(p=0;p<charCount;p++)
427 {
428 info[p].codepoint = p;
429 info[p].frequency = 0;
430 }
431
432 // **************************** START SAMPLE *******************
433 conv = ucnv_open("utf-8", &status);
434 assert(U_SUCCESS(status));
435
436 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
437 printf("input bytes %d / min chars %d = %d UChars\n",
438 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
439 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
440 assert(uBuf!=NULL);
441
442 // grab another buffer's worth
443 while((!feof(f)) &&
444 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
445 {
446 // Convert bytes to unicode
447 source = inBuf;
448 sourceLimit = inBuf + count;
449
450 while(source < sourceLimit)
451 {
452 p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
453 if(U_FAILURE(status))
454 {
455 fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
456 status = U_ZERO_ERROR;
457 continue;
458 }
459 U_ASSERT(status);
460 total++;
461
462 if(u_isalpha(p))
463 letters++;
464
465 if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
466 ie++;
467
468 if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
469 gh++;
470
471 if(p>charCount)
472 {
473 fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
474 return U_UNSUPPORTED_ERROR;
475 }
476 info[p].frequency++;
477 l = p;
478 }
479 }
480
481 fclose(f);
482 ucnv_close(conv);
483
484 printf("%d letters out of %d total UChars.\n", letters, total);
485 printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
486
487 // now, we could sort it..
488
489 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
490
491 for(p=0;p<charCount;p++)
492 {
493 if(info[p].frequency)
494 {
495 printf("% 5d U+%06X ", info[p].frequency, p);
496 if(p <= 0xFFFF)
497 {
498 prettyPrintUChar((UChar)p);
499 }
500 printf("\n");
501 }
502 }
503 free(info);
504 // ***************************** END SAMPLE ********************
505
506 printf("\n");
507
508 return U_ZERO_ERROR;
509}
510#undef BUFFERSIZE
511
512
513/******************************************************
514 You must call ucnv_close to clean up the memory used by the
515 converter.
516
517 'len' returns the number of OUTPUT bytes resulting from the
518 conversion.
519 */
520
521UErrorCode convsample_12()
522{
523 printf("\n\n==============================================\n"
524 "Sample 12: C: simple sjis -> unicode conversion\n");
525
526
527 // **************************** START SAMPLE *******************
528
529 char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
530 UChar target[100];
531 UErrorCode status = U_ZERO_ERROR;
532 UConverter *conv;
533 int32_t len;
534
535 // set up the converter
536 conv = ucnv_open("shift_jis", &status);
537 assert(U_SUCCESS(status));
538
539 // convert to Unicode
540 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
541 target[6] = 0xFDCA;
542 len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
543 U_ASSERT(status);
544 // close the converter
545 ucnv_close(conv);
546
547 // ***************************** END SAMPLE ********************
548
549 // Print it out
550 printBytes("src", source, strlen(source) );
551 printf("\n");
552 printUChars("targ", target, len);
553
554 return U_ZERO_ERROR;
555}
556
557/******************************************************************
558 C: Convert from codepage to Unicode one at a time.
559*/
560
561UErrorCode convsample_13()
562{
563 printf("\n\n==============================================\n"
564 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
565
566
567 const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
568 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
569 const char *source, *sourceLimit;
570 UChar32 target;
571 UErrorCode status = U_ZERO_ERROR;
572 UConverter *conv = NULL;
573 int32_t srcCount=0;
574 int32_t dstCount=0;
575
576 srcCount = sizeof(sourceChars);
577
578 conv = ucnv_open("Big5", &status);
579 U_ASSERT(status);
580
581 source = sourceChars;
582 sourceLimit = sourceChars + sizeof(sourceChars);
583
584 // **************************** START SAMPLE *******************
585
586
587 printBytes("src",source,sourceLimit-source);
588
589 while(source < sourceLimit)
590 {
591 puts("");
592 target = ucnv_getNextUChar (conv,
593 &source,
594 sourceLimit,
595 &status);
596
597 // printBytes("src",source,sourceLimit-source);
598 U_ASSERT(status);
599 printUChar(target);
600 dstCount++;
601 }
602
603
604 // ************************** END SAMPLE *************************
605
606 printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
607 ucnv_close(conv);
608
609 return U_ZERO_ERROR;
610}
611
612
613
614
615UBool convsample_20_didSubstitute(const char *source)
616{
617 UChar uchars[100];
618 char bytes[100];
619 UConverter *conv = NULL;
620 UErrorCode status = U_ZERO_ERROR;
621 uint32_t len, len2;
622 UBool flagVal;
623
624 FromUFLAGContext * context = NULL;
625
626 printf("\n\n==============================================\n"
627 "Sample 20: C: Test for substitution using callbacks\n");
628
629 /* print out the original source */
630 printBytes("src", source);
631 printf("\n");
632
633 /* First, convert from UTF8 to unicode */
634 conv = ucnv_open("utf-8", &status);
635 U_ASSERT(status);
636
637 len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
638 U_ASSERT(status);
639
640 printUChars("uch", uchars, len);
641 printf("\n");
642
643 /* Now, close the converter */
644 ucnv_close(conv);
645
646 /* Now, convert to windows-1252 */
647 conv = ucnv_open("windows-1252", &status);
648 U_ASSERT(status);
649
650 /* Converter starts out with the SUBSTITUTE callback set. */
651
652 /* initialize our callback */
653 context = flagCB_fromU_openContext();
654
655 /* Set our special callback */
656 ucnv_setFromUCallBack(conv,
657 flagCB_fromU,
658 context,
659 &(context->subCallback),
660 &(context->subContext),
661 &status);
662
663 U_ASSERT(status);
664
665 len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
666 U_ASSERT(status);
667
668 flagVal = context->flag; /* it's about to go away when we close the cnv */
669
670 ucnv_close(conv);
671
672 /* print out the original source */
673 printBytes("bytes", bytes, len2);
674
675 return flagVal; /* true if callback was called */
676}
677
678UErrorCode convsample_20()
679{
680 const char *sample1 = "abc\xdf\xbf";
681 const char *sample2 = "abc_def";
682
683
684 if(convsample_20_didSubstitute(sample1))
685 {
686 printf("DID substitute.\n******\n");
687 }
688 else
689 {
690 printf("Did NOT substitute.\n*****\n");
691 }
692
693 if(convsample_20_didSubstitute(sample2))
694 {
695 printf("DID substitute.\n******\n");
696 }
697 else
698 {
699 printf("Did NOT substitute.\n*****\n");
700 }
701
702 return U_ZERO_ERROR;
703}
704
705// 21 - C, callback, with clone and debug
706
707
708
709UBool convsample_21_didSubstitute(const char *source)
710{
711 UChar uchars[100];
712 char bytes[100];
713 UConverter *conv = NULL, *cloneCnv = NULL;
714 UErrorCode status = U_ZERO_ERROR;
715 uint32_t len, len2;
716 int32_t cloneLen;
717 UBool flagVal = FALSE;
718 UConverterFromUCallback junkCB;
719
720 FromUFLAGContext *flagCtx = NULL,
721 *cloneFlagCtx = NULL;
722
723 debugCBContext *debugCtx1 = NULL,
724 *debugCtx2 = NULL,
725 *cloneDebugCtx = NULL;
726
727 printf("\n\n==============================================\n"
728 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
729
730 /* print out the original source */
731 printBytes("src", source);
732 printf("\n");
733
734 /* First, convert from UTF8 to unicode */
735 conv = ucnv_open("utf-8", &status);
736 U_ASSERT(status);
737
738 len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
739 U_ASSERT(status);
740
741 printUChars("uch", uchars, len);
742 printf("\n");
743
744 /* Now, close the converter */
745 ucnv_close(conv);
746
747 /* Now, convert to windows-1252 */
748 conv = ucnv_open("windows-1252", &status);
749 U_ASSERT(status);
750
751 /* Converter starts out with the SUBSTITUTE callback set. */
752
753 /* initialize our callback */
754 /* from the 'bottom' innermost, out
755 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
756
757#if DEBUG_TMI
758 printf("flagCB_fromU = %p\n", &flagCB_fromU);
759 printf("debugCB_fromU = %p\n", &debugCB_fromU);
760#endif
761
762 debugCtx1 = debugCB_openContext();
763 flagCtx = flagCB_fromU_openContext();
764 debugCtx2 = debugCB_openContext();
765
766 debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */
767 debugCtx1->subContext = flagCtx;
768
769 flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */
770 flagCtx->subContext = debugCtx2;
771
772 debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
773 debugCtx2->subContext = NULL;
774
775 /* Set our special callback */
776
777 ucnv_setFromUCallBack(conv,
778 debugCB_fromU,
779 debugCtx1,
780 &(debugCtx2->subCallback),
781 &(debugCtx2->subContext),
782 &status);
783
784 U_ASSERT(status);
785
786#if DEBUG_TMI
787 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
788 conv, debugCtx1, debugCtx1->subCallback,
789 debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
790#endif
791
792 cloneLen = 1; /* but passing in null so it will clone */
793 cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status);
794
795 U_ASSERT(status);
796
797#if DEBUG_TMI
798 printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv);
799#endif
800
801 ucnv_close(conv);
802
803#if DEBUG_TMI
804 printf("%p closed.\n", conv);
805#endif
806
807 U_ASSERT(status);
808 /* Now, we have to extract the context */
809 cloneDebugCtx = NULL;
810 cloneFlagCtx = NULL;
811
812 ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
813 if(cloneDebugCtx != NULL) {
814 cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
815 }
816
817 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
818 cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
819
820 len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
821 U_ASSERT(status);
822
823 if(cloneFlagCtx != NULL) {
824 flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */
825 } else {
826 printf("** Warning, couldn't get the subcallback \n");
827 }
828
829 ucnv_close(cloneCnv);
830
831 /* print out the original source */
832 printBytes("bytes", bytes, len2);
833
834 return flagVal; /* true if callback was called */
835}
836
837UErrorCode convsample_21()
838{
839 const char *sample1 = "abc\xdf\xbf";
840 const char *sample2 = "abc_def";
841
842 if(convsample_21_didSubstitute(sample1))
843 {
844 printf("DID substitute.\n******\n");
845 }
846 else
847 {
848 printf("Did NOT substitute.\n*****\n");
849 }
850
851 if(convsample_21_didSubstitute(sample2))
852 {
853 printf("DID substitute.\n******\n");
854 }
855 else
856 {
857 printf("Did NOT substitute.\n*****\n");
858 }
859
860 return U_ZERO_ERROR;
861}
862
863
864// 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
865
866#define BUFFERSIZE 17 /* make it interesting :) */
867
868UErrorCode convsample_40()
869{
870 printf("\n\n==============================================\n"
871 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
872
873 FILE *f;
874 FILE *out;
875 int32_t count;
876 char inBuf[BUFFERSIZE];
877 const char *source;
878 const char *sourceLimit;
879 UChar *uBuf;
880 UChar *target;
881 UChar *targetLimit;
882 int32_t uBufSize = 0;
883 UConverter *conv = NULL;
884 UErrorCode status = U_ZERO_ERROR;
885 uint32_t inbytes=0, total=0;
886
887 f = fopen("data02.bin", "rb");
888 if(!f)
889 {
890 fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
891 return U_FILE_ACCESS_ERROR;
892 }
893
894 out = fopen("data40.utf16", "wb");
895 if(!out)
896 {
897 fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
898 return U_FILE_ACCESS_ERROR;
899 }
900
901 // **************************** START SAMPLE *******************
902 conv = ucnv_openCCSID(37, UCNV_IBM, &status);
903 assert(U_SUCCESS(status));
904
905 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
906 printf("input bytes %d / min chars %d = %d UChars\n",
907 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
908 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
909 assert(uBuf!=NULL);
910
911 // grab another buffer's worth
912 while((!feof(f)) &&
913 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
914 {
915 inbytes += count;
916
917 // Convert bytes to unicode
918 source = inBuf;
919 sourceLimit = inBuf + count;
920
921 do
922 {
923 target = uBuf;
924 targetLimit = uBuf + uBufSize;
925
926 ucnv_toUnicode( conv, &target, targetLimit,
927 &source, sourceLimit, NULL,
928 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
929 /* is true (when no more data will come) */
930 &status);
931
932 if(status == U_BUFFER_OVERFLOW_ERROR)
933 {
934 // simply ran out of space - we'll reset the target ptr the next
935 // time through the loop.
936 status = U_ZERO_ERROR;
937 }
938 else
939 {
940 // Check other errors here.
941 assert(U_SUCCESS(status));
942 // Break out of the loop (by force)
943 }
944
945 // Process the Unicode
946 // Todo: handle UTF-16/surrogates
947 assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
948 (size_t)(target-uBuf));
949 total += (target-uBuf);
950 } while (source < sourceLimit); // while simply out of space
951 }
952
953 printf("%d bytes in, %d UChars out.\n", inbytes, total);
954
955 // ***************************** END SAMPLE ********************
956 ucnv_close(conv);
957
958 fclose(f);
959 fclose(out);
960 printf("\n");
961
962 return U_ZERO_ERROR;
963}
964#undef BUFFERSIZE
965
966
967
968// 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
969
970#define BUFFERSIZE 24 /* make it interesting :) */
971
972UErrorCode convsample_46()
973{
974 printf("\n\n==============================================\n"
975 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
976
977 FILE *f;
978 FILE *out;
979 int32_t count;
980 UChar inBuf[BUFFERSIZE];
981 const UChar *source;
982 const UChar *sourceLimit;
983 char *buf;
984 char *target;
985 char *targetLimit;
986
987 int32_t bufSize = 0;
988 UConverter *conv = NULL;
989 UErrorCode status = U_ZERO_ERROR;
990 uint32_t inchars=0, total=0;
991
992 f = fopen("data40.utf16", "rb");
993 if(!f)
994 {
995 fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
996 return U_FILE_ACCESS_ERROR;
997 }
998
999 out = fopen("data46.out", "wb");
1000 if(!out)
1001 {
1002 fprintf(stderr, "Couldn't create file 'data46.out'.\n");
1003 return U_FILE_ACCESS_ERROR;
1004 }
1005
1006 // **************************** START SAMPLE *******************
1007 conv = ucnv_open( "iso-8859-2", &status);
1008 assert(U_SUCCESS(status));
1009
1010 bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1011 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1012 BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1013 buf = (char*)malloc(bufSize * sizeof(char));
1014 assert(buf!=NULL);
1015
1016 // grab another buffer's worth
1017 while((!feof(f)) &&
1018 ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1019 {
1020 inchars += count;
1021
1022 // Convert bytes to unicode
1023 source = inBuf;
1024 sourceLimit = inBuf + count;
1025
1026 do
1027 {
1028 target = buf;
1029 targetLimit = buf + bufSize;
1030
1031 ucnv_fromUnicode( conv, &target, targetLimit,
1032 &source, sourceLimit, NULL,
1033 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
1034 /* is true (when no more data will come) */
1035 &status);
1036
1037 if(status == U_BUFFER_OVERFLOW_ERROR)
1038 {
1039 // simply ran out of space - we'll reset the target ptr the next
1040 // time through the loop.
1041 status = U_ZERO_ERROR;
1042 }
1043 else
1044 {
1045 // Check other errors here.
1046 assert(U_SUCCESS(status));
1047 // Break out of the loop (by force)
1048 }
1049
1050 // Process the Unicode
1051 assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1052 (size_t)(target-buf));
1053 total += (target-buf);
1054 } while (source < sourceLimit); // while simply out of space
1055 }
1056
1057 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1058
1059 // ***************************** END SAMPLE ********************
1060 ucnv_close(conv);
1061
1062 fclose(f);
1063 fclose(out);
1064 printf("\n");
1065
1066 return U_ZERO_ERROR;
1067}
1068#undef BUFFERSIZE
1069
1070#define BUFFERSIZE 219
1071
1072
1073/* main */
1074
1075int main()
1076{
1077
1078 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1079
1080 convsample_02(); // C , u->koi8r, conv
1081 convsample_03(); // C, iterate
1082
1083 convsample_05(); // C, utf8->u, getNextUChar
1084 convsample_06(); // C freq counter thingy
1085
1086 convsample_12(); // C, sjis->u, conv
1087 convsample_13(); // C, big5->u, getNextU
1088
1089 convsample_20(); // C, callback
1090 convsample_21(); // C, callback debug
1091
1092 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1093
1094 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1095
1096 printf("End of converter samples.\n");
1097
1098 fflush(stdout);
1099 fflush(stderr);
1100
1101 return 0;
1102}