1 /**************************************************************************
3 * Copyright (C) 2000-2003, International Business Machines
4 * Corporation and others. All Rights Reserved.
6 ***************************************************************************
7 * file name: convsamp.c
8 * encoding: ASCII (7-bit)
10 * created on: 2000may30
11 * created by: Steven R. Loomis
13 * Sample code for the ICU conversion routines.
15 * Note: Nothing special is needed to build this sample. Link with
16 * the icu UC and icu I18N libraries.
18 * I use 'assert' for error checking, you probably will want
19 * something more flexible. '***BEGIN SAMPLE***' and
20 * '***END SAMPLE***' mark pieces suitable for stand alone
24 * Each test can define it's own BUFFERSIZE
28 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
31 #include <ctype.h> /* for isspace, etc. */
34 #include <stdlib.h> /* malloc */
36 #include "unicode/utypes.h" /* Basic ICU data types */
37 #include "unicode/ucnv.h" /* C Converter API */
38 #include "unicode/ustring.h" /* some more string fcns*/
39 #include "unicode/uchar.h" /* char names */
40 #include "unicode/uloc.h"
41 #include "unicode/unistr.h"
45 /* Some utility functions */
47 static const UChar kNone
[] = { 0x0000 };
49 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
51 /* Print a UChar if possible, in seven characters. */
52 void prettyPrintUChar(UChar c
)
56 printf(" '%c' ", (char)(0x00FF&c
));
57 } else if ( c
> 0x007F ) {
59 UErrorCode status
= U_ZERO_ERROR
;
62 o
= u_charName(c
, U_UNICODE_CHAR_NAME
, buf
, 1000, &status
);
63 if(U_SUCCESS(status
) && (o
>0) ) {
67 o
= u_charName(c
, U_UNICODE_10_CHAR_NAME
, buf
, 1000, &status
);
68 if(U_SUCCESS(status
) && (o
>0)) {
77 switch((char)(c
& 0x007F)) {
95 void printUChars(const char *name
= "?",
96 const UChar
*uch
= kNone
,
101 if( (len
== -1) && (uch
) ) {
105 printf("%5s: ", name
);
106 for( i
= 0; i
<len
; i
++) {
111 printf("%5s: ", "uni");
112 for( i
= 0; i
<len
; i
++) {
113 printf("\\u%04X ", (int)uch
[i
]);
117 printf("%5s:", "ch");
118 for( i
= 0; i
<len
; i
++) {
119 prettyPrintUChar(uch
[i
]);
124 void printBytes(const char *name
= "?",
125 const char *uch
= "",
130 if( (len
== -1) && (uch
) ) {
134 printf("%5s: ", name
);
135 for( i
= 0; i
<len
; i
++) {
140 printf("%5s: ", "uni");
141 for( i
= 0; i
<len
; i
++) {
142 printf("\\x%02X ", 0x00FF & (int)uch
[i
]);
146 printf("%5s:", "ch");
147 for( i
= 0; i
<len
; i
++) {
148 if(isgraph(0x00FF & (int)uch
[i
])) {
149 printf(" '%c' ", (char)uch
[i
]);
157 void printUChar(UChar32 ch32
)
160 printf("ch: U+%06X\n", ch32
);
163 UChar ch
= (UChar
)ch32
;
164 printUChars("C", &ch
, 1);
168 /*******************************************************************
169 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170 followed by an exclamation mark (!) into the KOI8-R Russian code page.
172 This example first creates a UChar String out of the Unicode chars.
174 targetSize must be set to the amount of space available in the target
175 buffer. After fromUChars is called,
176 len will contain the number of bytes in target[] which were
177 used in the resulting codepage. In this case, there is a 1:1 mapping
178 between the input and output characters. The exclamation mark has the
179 same value in both KOI8-R and Unicode.
182 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
186 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
190 Converting FROM unicode
192 You must call ucnv_close to clean up the memory used by the
195 'len' returns the number of OUTPUT bytes resulting from the
199 UErrorCode
convsample_02()
201 printf("\n\n==============================================\n"
202 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
205 // **************************** START SAMPLE *******************
207 UChar source
[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208 0x0430, 0x0021, 0x0000 };
210 UErrorCode status
= U_ZERO_ERROR
;
214 // set up the converter
215 conv
= ucnv_open("koi8-r", &status
);
216 assert(U_SUCCESS(status
));
219 len
= ucnv_fromUChars(conv
, target
, 100, source
, -1, &status
);
220 assert(U_SUCCESS(status
));
222 // close the converter
225 // ***************************** END SAMPLE ********************
228 printUChars("src", source
);
230 printBytes("targ", target
, len
);
236 UErrorCode
convsample_03()
238 printf("\n\n==============================================\n"
239 "Sample 03: C: print out all converters\n");
244 // **************************** START SAMPLE *******************
245 count
= ucnv_countAvailable();
246 printf("Available converters: %d\n", count
);
250 printf("%s ", ucnv_getAvailableName(i
));
253 // ***************************** END SAMPLE ********************
262 #define BUFFERSIZE 17 /* make it interesting :) */
265 Converting from a codepage to Unicode in bulk..
266 What is the best way to determine the buffer size?
268 The 'buffersize' is in bytes of input.
269 For a given converter, divinding this by the minimum char size
270 give you the maximum number of Unicode characters that could be
271 expected for a given number of input bytes.
272 see: ucnv_getMinCharSize()
274 For example, a single byte codepage like 'Latin-3' has a
275 minimum char size of 1. (It takes at least 1 byte to represent
276 each Unicode char.) So the unicode buffer has the same number of
277 UChars as the input buffer has bytes.
279 In a strictly double byte codepage such as cp1362 (Windows
280 Korean), the minimum char size is 2. So, only half as many Unicode
281 chars as bytes are needed.
283 This work to calculate the buffer size is an optimization. Any
284 size of input and output buffer can be used, as long as the
285 program handles the following cases: If the input buffer is empty,
286 the source pointer will be equal to sourceLimit. If the output
287 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
290 UErrorCode
convsample_05()
292 printf("\n\n==============================================\n"
293 "Sample 05: C: count the number of letters in a UTF-8 document\n");
297 char inBuf
[BUFFERSIZE
];
299 const char *sourceLimit
;
304 int32_t uBufSize
= 0;
306 UErrorCode status
= U_ZERO_ERROR
;
307 uint32_t letters
=0, total
=0;
309 f
= fopen("data01.txt", "r");
312 fprintf(stderr
, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
313 return U_FILE_ACCESS_ERROR
;
316 // **************************** START SAMPLE *******************
317 conv
= ucnv_open("utf-8", &status
);
318 assert(U_SUCCESS(status
));
320 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
321 printf("input bytes %d / min chars %d = %d UChars\n",
322 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
323 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
326 // grab another buffer's worth
328 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
330 // Convert bytes to unicode
332 sourceLimit
= inBuf
+ count
;
337 targetLimit
= uBuf
+ uBufSize
;
339 ucnv_toUnicode(conv
, &target
, targetLimit
,
340 &source
, sourceLimit
, NULL
,
341 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
342 /* is true (when no more data will come) */
345 if(status
== U_BUFFER_OVERFLOW_ERROR
)
347 // simply ran out of space - we'll reset the target ptr the next
348 // time through the loop.
349 status
= U_ZERO_ERROR
;
353 // Check other errors here.
354 assert(U_SUCCESS(status
));
355 // Break out of the loop (by force)
358 // Process the Unicode
359 // Todo: handle UTF-16/surrogates
361 for(p
= uBuf
; p
<target
; p
++)
367 } while (source
< sourceLimit
); // while simply out of space
370 printf("%d letters out of %d total UChars.\n", letters
, total
);
372 // ***************************** END SAMPLE ********************
381 #define BUFFERSIZE 1024
388 UErrorCode
convsample_06()
390 printf("\n\n==============================================\n"
391 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
395 char inBuf
[BUFFERSIZE
];
397 const char *sourceLimit
;
399 int32_t uBufSize
= 0;
401 UErrorCode status
= U_ZERO_ERROR
;
402 uint32_t letters
=0, total
=0;
405 UChar32 charCount
= 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
412 f
= fopen("data06.txt", "r");
415 fprintf(stderr
, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
416 return U_FILE_ACCESS_ERROR
;
419 info
= (CharFreqInfo
*)malloc(sizeof(CharFreqInfo
) * charCount
);
422 fprintf(stderr
, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo
)*charCount
);
425 /* reset frequencies */
426 for(p
=0;p
<charCount
;p
++)
428 info
[p
].codepoint
= p
;
429 info
[p
].frequency
= 0;
432 // **************************** START SAMPLE *******************
433 conv
= ucnv_open("utf-8", &status
);
434 assert(U_SUCCESS(status
));
436 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
437 printf("input bytes %d / min chars %d = %d UChars\n",
438 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
439 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
442 // grab another buffer's worth
444 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
446 // Convert bytes to unicode
448 sourceLimit
= inBuf
+ count
;
450 while(source
< sourceLimit
)
452 p
= ucnv_getNextUChar(conv
, &source
, sourceLimit
, &status
);
453 if(U_FAILURE(status
))
455 fprintf(stderr
, "%s @ %d\n", u_errorName(status
), total
);
456 status
= U_ZERO_ERROR
;
465 if((u_tolower(l
) == 'i') && (u_tolower(p
) == 'e'))
468 if((u_tolower(l
) == 'g') && (u_tolower(p
) == 0x0127))
473 fprintf(stderr
, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p
);
474 return U_UNSUPPORTED_ERROR
;
484 printf("%d letters out of %d total UChars.\n", letters
, total
);
485 printf("%d ie digraphs, %d gh digraphs.\n", ie
, gh
);
487 // now, we could sort it..
489 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
491 for(p
=0;p
<charCount
;p
++)
493 if(info
[p
].frequency
)
495 printf("% 5d U+%06X ", info
[p
].frequency
, p
);
498 prettyPrintUChar((UChar
)p
);
504 // ***************************** END SAMPLE ********************
513 /******************************************************
514 You must call ucnv_close to clean up the memory used by the
517 'len' returns the number of OUTPUT bytes resulting from the
521 UErrorCode
convsample_12()
523 printf("\n\n==============================================\n"
524 "Sample 12: C: simple sjis -> unicode conversion\n");
527 // **************************** START SAMPLE *******************
529 char source
[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
531 UErrorCode status
= U_ZERO_ERROR
;
535 // set up the converter
536 conv
= ucnv_open("shift_jis", &status
);
537 assert(U_SUCCESS(status
));
539 // convert to Unicode
540 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
542 len
= ucnv_toUChars(conv
, target
, 100, source
, strlen(source
), &status
);
544 // close the converter
547 // ***************************** END SAMPLE ********************
550 printBytes("src", source
, strlen(source
) );
552 printUChars("targ", target
, len
);
557 /******************************************************************
558 C: Convert from codepage to Unicode one at a time.
561 UErrorCode
convsample_13()
563 printf("\n\n==============================================\n"
564 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
567 const char sourceChars
[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
568 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
569 const char *source
, *sourceLimit
;
571 UErrorCode status
= U_ZERO_ERROR
;
572 UConverter
*conv
= NULL
;
576 srcCount
= sizeof(sourceChars
);
578 conv
= ucnv_open("Big5", &status
);
581 source
= sourceChars
;
582 sourceLimit
= sourceChars
+ sizeof(sourceChars
);
584 // **************************** START SAMPLE *******************
587 printBytes("src",source
,sourceLimit
-source
);
589 while(source
< sourceLimit
)
592 target
= ucnv_getNextUChar (conv
,
597 // printBytes("src",source,sourceLimit-source);
604 // ************************** END SAMPLE *************************
606 printf("src=%d bytes, dst=%d uchars\n", srcCount
, dstCount
);
615 UBool
convsample_20_didSubstitute(const char *source
)
619 UConverter
*conv
= NULL
;
620 UErrorCode status
= U_ZERO_ERROR
;
624 FromUFLAGContext
* context
= NULL
;
626 printf("\n\n==============================================\n"
627 "Sample 20: C: Test for substitution using callbacks\n");
629 /* print out the original source */
630 printBytes("src", source
);
633 /* First, convert from UTF8 to unicode */
634 conv
= ucnv_open("utf-8", &status
);
637 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
640 printUChars("uch", uchars
, len
);
643 /* Now, close the converter */
646 /* Now, convert to windows-1252 */
647 conv
= ucnv_open("windows-1252", &status
);
650 /* Converter starts out with the SUBSTITUTE callback set. */
652 /* initialize our callback */
653 context
= flagCB_fromU_openContext();
655 /* Set our special callback */
656 ucnv_setFromUCallBack(conv
,
659 &(context
->subCallback
),
660 &(context
->subContext
),
665 len2
= ucnv_fromUChars(conv
, bytes
, 100, uchars
, len
, &status
);
668 flagVal
= context
->flag
; /* it's about to go away when we close the cnv */
672 /* print out the original source */
673 printBytes("bytes", bytes
, len2
);
675 return flagVal
; /* true if callback was called */
678 UErrorCode
convsample_20()
680 const char *sample1
= "abc\xdf\xbf";
681 const char *sample2
= "abc_def";
684 if(convsample_20_didSubstitute(sample1
))
686 printf("DID substitute.\n******\n");
690 printf("Did NOT substitute.\n*****\n");
693 if(convsample_20_didSubstitute(sample2
))
695 printf("DID substitute.\n******\n");
699 printf("Did NOT substitute.\n*****\n");
705 // 21 - C, callback, with clone and debug
709 UBool
convsample_21_didSubstitute(const char *source
)
713 UConverter
*conv
= NULL
, *cloneCnv
= NULL
;
714 UErrorCode status
= U_ZERO_ERROR
;
717 UBool flagVal
= FALSE
;
718 UConverterFromUCallback junkCB
;
720 FromUFLAGContext
*flagCtx
= NULL
,
721 *cloneFlagCtx
= NULL
;
723 debugCBContext
*debugCtx1
= NULL
,
725 *cloneDebugCtx
= NULL
;
727 printf("\n\n==============================================\n"
728 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
730 /* print out the original source */
731 printBytes("src", source
);
734 /* First, convert from UTF8 to unicode */
735 conv
= ucnv_open("utf-8", &status
);
738 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
741 printUChars("uch", uchars
, len
);
744 /* Now, close the converter */
747 /* Now, convert to windows-1252 */
748 conv
= ucnv_open("windows-1252", &status
);
751 /* Converter starts out with the SUBSTITUTE callback set. */
753 /* initialize our callback */
754 /* from the 'bottom' innermost, out
755 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
758 printf("flagCB_fromU = %p\n", &flagCB_fromU
);
759 printf("debugCB_fromU = %p\n", &debugCB_fromU
);
762 debugCtx1
= debugCB_openContext();
763 flagCtx
= flagCB_fromU_openContext();
764 debugCtx2
= debugCB_openContext();
766 debugCtx1
->subCallback
= flagCB_fromU
; /* debug1 -> flag */
767 debugCtx1
->subContext
= flagCtx
;
769 flagCtx
->subCallback
= debugCB_fromU
; /* flag -> debug2 */
770 flagCtx
->subContext
= debugCtx2
;
772 debugCtx2
->subCallback
= UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
773 debugCtx2
->subContext
= NULL
;
775 /* Set our special callback */
777 ucnv_setFromUCallBack(conv
,
780 &(debugCtx2
->subCallback
),
781 &(debugCtx2
->subContext
),
787 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
788 conv
, debugCtx1
, debugCtx1
->subCallback
,
789 debugCtx1
->subContext
, flagCtx
, debugCtx2
, debugCtx2
->subCallback
);
792 cloneLen
= 1; /* but passing in null so it will clone */
793 cloneCnv
= ucnv_safeClone(conv
, NULL
, &cloneLen
, &status
);
798 printf("Cloned converter from %p -> %p. Closing %p.\n", conv
, cloneCnv
, conv
);
804 printf("%p closed.\n", conv
);
808 /* Now, we have to extract the context */
809 cloneDebugCtx
= NULL
;
812 ucnv_getFromUCallBack(cloneCnv
, &junkCB
, (const void **)&cloneDebugCtx
);
813 if(cloneDebugCtx
!= NULL
) {
814 cloneFlagCtx
= (FromUFLAGContext
*) cloneDebugCtx
-> subContext
;
817 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
818 cloneCnv
, cloneDebugCtx
, cloneFlagCtx
, cloneFlagCtx
?cloneFlagCtx
->subContext
:NULL
);
820 len2
= ucnv_fromUChars(cloneCnv
, bytes
, 100, uchars
, len
, &status
);
823 if(cloneFlagCtx
!= NULL
) {
824 flagVal
= cloneFlagCtx
->flag
; /* it's about to go away when we close the cnv */
826 printf("** Warning, couldn't get the subcallback \n");
829 ucnv_close(cloneCnv
);
831 /* print out the original source */
832 printBytes("bytes", bytes
, len2
);
834 return flagVal
; /* true if callback was called */
837 UErrorCode
convsample_21()
839 const char *sample1
= "abc\xdf\xbf";
840 const char *sample2
= "abc_def";
842 if(convsample_21_didSubstitute(sample1
))
844 printf("DID substitute.\n******\n");
848 printf("Did NOT substitute.\n*****\n");
851 if(convsample_21_didSubstitute(sample2
))
853 printf("DID substitute.\n******\n");
857 printf("Did NOT substitute.\n*****\n");
864 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
866 #define BUFFERSIZE 17 /* make it interesting :) */
868 UErrorCode
convsample_40()
870 printf("\n\n==============================================\n"
871 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
876 char inBuf
[BUFFERSIZE
];
878 const char *sourceLimit
;
882 int32_t uBufSize
= 0;
883 UConverter
*conv
= NULL
;
884 UErrorCode status
= U_ZERO_ERROR
;
885 uint32_t inbytes
=0, total
=0;
887 f
= fopen("data02.bin", "rb");
890 fprintf(stderr
, "Couldn't open file 'data02.bin' (cp37 data file).\n");
891 return U_FILE_ACCESS_ERROR
;
894 out
= fopen("data40.utf16", "wb");
897 fprintf(stderr
, "Couldn't create file 'data40.utf16'.\n");
898 return U_FILE_ACCESS_ERROR
;
901 // **************************** START SAMPLE *******************
902 conv
= ucnv_openCCSID(37, UCNV_IBM
, &status
);
903 assert(U_SUCCESS(status
));
905 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
906 printf("input bytes %d / min chars %d = %d UChars\n",
907 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
908 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
911 // grab another buffer's worth
913 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
917 // Convert bytes to unicode
919 sourceLimit
= inBuf
+ count
;
924 targetLimit
= uBuf
+ uBufSize
;
926 ucnv_toUnicode( conv
, &target
, targetLimit
,
927 &source
, sourceLimit
, NULL
,
928 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
929 /* is true (when no more data will come) */
932 if(status
== U_BUFFER_OVERFLOW_ERROR
)
934 // simply ran out of space - we'll reset the target ptr the next
935 // time through the loop.
936 status
= U_ZERO_ERROR
;
940 // Check other errors here.
941 assert(U_SUCCESS(status
));
942 // Break out of the loop (by force)
945 // Process the Unicode
946 // Todo: handle UTF-16/surrogates
947 assert(fwrite(uBuf
, sizeof(uBuf
[0]), (target
-uBuf
), out
) ==
948 (size_t)(target
-uBuf
));
949 total
+= (target
-uBuf
);
950 } while (source
< sourceLimit
); // while simply out of space
953 printf("%d bytes in, %d UChars out.\n", inbytes
, total
);
955 // ***************************** END SAMPLE ********************
968 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
970 #define BUFFERSIZE 24 /* make it interesting :) */
972 UErrorCode
convsample_46()
974 printf("\n\n==============================================\n"
975 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
980 UChar inBuf
[BUFFERSIZE
];
982 const UChar
*sourceLimit
;
988 UConverter
*conv
= NULL
;
989 UErrorCode status
= U_ZERO_ERROR
;
990 uint32_t inchars
=0, total
=0;
992 f
= fopen("data40.utf16", "rb");
995 fprintf(stderr
, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
996 return U_FILE_ACCESS_ERROR
;
999 out
= fopen("data46.out", "wb");
1002 fprintf(stderr
, "Couldn't create file 'data46.out'.\n");
1003 return U_FILE_ACCESS_ERROR
;
1006 // **************************** START SAMPLE *******************
1007 conv
= ucnv_open( "iso-8859-2", &status
);
1008 assert(U_SUCCESS(status
));
1010 bufSize
= (BUFFERSIZE
*ucnv_getMaxCharSize(conv
));
1011 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1012 BUFFERSIZE
, ucnv_getMaxCharSize(conv
), bufSize
);
1013 buf
= (char*)malloc(bufSize
* sizeof(char));
1016 // grab another buffer's worth
1018 ((count
=fread(inBuf
, sizeof(UChar
), BUFFERSIZE
, f
)) > 0) )
1022 // Convert bytes to unicode
1024 sourceLimit
= inBuf
+ count
;
1029 targetLimit
= buf
+ bufSize
;
1031 ucnv_fromUnicode( conv
, &target
, targetLimit
,
1032 &source
, sourceLimit
, NULL
,
1033 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
1034 /* is true (when no more data will come) */
1037 if(status
== U_BUFFER_OVERFLOW_ERROR
)
1039 // simply ran out of space - we'll reset the target ptr the next
1040 // time through the loop.
1041 status
= U_ZERO_ERROR
;
1045 // Check other errors here.
1046 assert(U_SUCCESS(status
));
1047 // Break out of the loop (by force)
1050 // Process the Unicode
1051 assert(fwrite(buf
, sizeof(buf
[0]), (target
-buf
), out
) ==
1052 (size_t)(target
-buf
));
1053 total
+= (target
-buf
);
1054 } while (source
< sourceLimit
); // while simply out of space
1057 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars
, inchars
* sizeof(UChar
), total
);
1059 // ***************************** END SAMPLE ********************
1066 return U_ZERO_ERROR
;
1070 #define BUFFERSIZE 219
1078 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1080 convsample_02(); // C , u->koi8r, conv
1081 convsample_03(); // C, iterate
1083 convsample_05(); // C, utf8->u, getNextUChar
1084 convsample_06(); // C freq counter thingy
1086 convsample_12(); // C, sjis->u, conv
1087 convsample_13(); // C, big5->u, getNextU
1089 convsample_20(); // C, callback
1090 convsample_21(); // C, callback debug
1092 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1094 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1096 printf("End of converter samples.\n");