1 /**************************************************************************
3 * Copyright (C) 2000-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
6 ***************************************************************************
7 * file name: convsamp.c
8 * encoding: ASCII (7-bit)
10 * created on: 2000may30
11 * created by: Steven R. Loomis
13 * Sample code for the ICU conversion routines.
15 * Note: Nothing special is needed to build this sample. Link with
16 * the icu UC and icu I18N libraries.
18 * I use 'assert' for error checking, you probably will want
19 * something more flexible. '***BEGIN SAMPLE***' and
20 * '***END SAMPLE***' mark pieces suitable for stand alone
24 * Each test can define it's own BUFFERSIZE
28 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
31 #include <ctype.h> /* for isspace, etc. */
34 #include <stdlib.h> /* malloc */
36 #include "unicode/utypes.h" /* Basic ICU data types */
37 #include "unicode/ucnv.h" /* C Converter API */
38 #include "unicode/ustring.h" /* some more string fcns*/
39 #include "unicode/uchar.h" /* char names */
40 #include "unicode/uloc.h"
41 #include "unicode/unistr.h"
45 /* Some utility functions */
47 static const UChar kNone
[] = { 0x0000 };
49 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
51 /* Print a UChar if possible, in seven characters. */
52 void prettyPrintUChar(UChar c
)
56 printf(" '%c' ", (char)(0x00FF&c
));
57 } else if ( c
> 0x007F ) {
59 UErrorCode status
= U_ZERO_ERROR
;
62 o
= u_charName(c
, U_UNICODE_CHAR_NAME
, buf
, 1000, &status
);
63 if(U_SUCCESS(status
) && (o
>0) ) {
67 o
= u_charName(c
, U_UNICODE_10_CHAR_NAME
, buf
, 1000, &status
);
68 if(U_SUCCESS(status
) && (o
>0)) {
77 switch((char)(c
& 0x007F)) {
95 void printUChars(const char *name
= "?",
96 const UChar
*uch
= kNone
,
101 if( (len
== -1) && (uch
) ) {
105 printf("%5s: ", name
);
106 for( i
= 0; i
<len
; i
++) {
111 printf("%5s: ", "uni");
112 for( i
= 0; i
<len
; i
++) {
113 printf("\\u%04X ", (int)uch
[i
]);
117 printf("%5s:", "ch");
118 for( i
= 0; i
<len
; i
++) {
119 prettyPrintUChar(uch
[i
]);
124 void printBytes(const char *name
= "?",
125 const char *uch
= "",
130 if( (len
== -1) && (uch
) ) {
134 printf("%5s: ", name
);
135 for( i
= 0; i
<len
; i
++) {
140 printf("%5s: ", "uni");
141 for( i
= 0; i
<len
; i
++) {
142 printf("\\x%02X ", 0x00FF & (int)uch
[i
]);
146 printf("%5s:", "ch");
147 for( i
= 0; i
<len
; i
++) {
148 if(isgraph(0x00FF & (int)uch
[i
])) {
149 printf(" '%c' ", (char)uch
[i
]);
157 void printUChar(UChar32 ch32
)
160 printf("ch: U+%06X\n", ch32
);
163 UChar ch
= (UChar
)ch32
;
164 printUChars("C", &ch
, 1);
168 /*******************************************************************
169 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170 followed by an exclamation mark (!) into the KOI8-R Russian code page.
172 This example first creates a UChar String out of the Unicode chars.
174 targetSize must be set to the amount of space available in the target
175 buffer. After fromUChars is called,
176 len will contain the number of bytes in target[] which were
177 used in the resulting codepage. In this case, there is a 1:1 mapping
178 between the input and output characters. The exclamation mark has the
179 same value in both KOI8-R and Unicode.
182 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
186 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
190 Converting FROM unicode
192 You must call ucnv_close to clean up the memory used by the
195 'len' returns the number of OUTPUT bytes resulting from the
199 UErrorCode
convsample_02()
201 printf("\n\n==============================================\n"
202 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
205 // **************************** START SAMPLE *******************
207 UChar source
[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208 0x0430, 0x0021, 0x0000 };
210 UErrorCode status
= U_ZERO_ERROR
;
214 // set up the converter
215 conv
= ucnv_open("koi8-r", &status
);
216 assert(U_SUCCESS(status
));
219 len
= ucnv_fromUChars(conv
, target
, 100, source
, -1, &status
);
220 assert(U_SUCCESS(status
));
222 // close the converter
225 // ***************************** END SAMPLE ********************
228 printUChars("src", source
);
230 printBytes("targ", target
, len
);
236 UErrorCode
convsample_03()
238 printf("\n\n==============================================\n"
239 "Sample 03: C: print out all converters\n");
244 // **************************** START SAMPLE *******************
245 count
= ucnv_countAvailable();
246 printf("Available converters: %d\n", count
);
250 printf("%s ", ucnv_getAvailableName(i
));
253 // ***************************** END SAMPLE ********************
262 #define BUFFERSIZE 17 /* make it interesting :) */
265 Converting from a codepage to Unicode in bulk..
266 What is the best way to determine the buffer size?
268 The 'buffersize' is in bytes of input.
269 For a given converter, divinding this by the minimum char size
270 give you the maximum number of Unicode characters that could be
271 expected for a given number of input bytes.
272 see: ucnv_getMinCharSize()
274 For example, a single byte codepage like 'Latin-3' has a
275 minimum char size of 1. (It takes at least 1 byte to represent
276 each Unicode char.) So the unicode buffer has the same number of
277 UChars as the input buffer has bytes.
279 In a strictly double byte codepage such as cp1362 (Windows
280 Korean), the minimum char size is 2. So, only half as many Unicode
281 chars as bytes are needed.
283 This work to calculate the buffer size is an optimization. Any
284 size of input and output buffer can be used, as long as the
285 program handles the following cases: If the input buffer is empty,
286 the source pointer will be equal to sourceLimit. If the output
287 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
290 UErrorCode
convsample_05()
292 printf("\n\n==============================================\n"
293 "Sample 05: C: count the number of letters in a UTF-8 document\n");
297 char inBuf
[BUFFERSIZE
];
299 const char *sourceLimit
;
304 int32_t uBufSize
= 0;
306 UErrorCode status
= U_ZERO_ERROR
;
307 uint32_t letters
=0, total
=0;
309 f
= fopen("data01.txt", "r");
312 fprintf(stderr
, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
313 return U_FILE_ACCESS_ERROR
;
316 // **************************** START SAMPLE *******************
317 conv
= ucnv_open("utf-8", &status
);
318 assert(U_SUCCESS(status
));
320 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
321 printf("input bytes %d / min chars %d = %d UChars\n",
322 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
323 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
326 // grab another buffer's worth
328 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
330 // Convert bytes to unicode
332 sourceLimit
= inBuf
+ count
;
337 targetLimit
= uBuf
+ uBufSize
;
339 ucnv_toUnicode(conv
, &target
, targetLimit
,
340 &source
, sourceLimit
, NULL
,
341 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
342 /* is true (when no more data will come) */
345 if(status
== U_BUFFER_OVERFLOW_ERROR
)
347 // simply ran out of space - we'll reset the target ptr the next
348 // time through the loop.
349 status
= U_ZERO_ERROR
;
353 // Check other errors here.
354 assert(U_SUCCESS(status
));
355 // Break out of the loop (by force)
358 // Process the Unicode
359 // Todo: handle UTF-16/surrogates
361 for(p
= uBuf
; p
<target
; p
++)
367 } while (source
< sourceLimit
); // while simply out of space
370 printf("%d letters out of %d total UChars.\n", letters
, total
);
372 // ***************************** END SAMPLE ********************
383 #define BUFFERSIZE 1024
390 UErrorCode
convsample_06()
392 printf("\n\n==============================================\n"
393 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
397 char inBuf
[BUFFERSIZE
];
399 const char *sourceLimit
;
401 int32_t uBufSize
= 0;
403 UErrorCode status
= U_ZERO_ERROR
;
404 uint32_t letters
=0, total
=0;
407 UChar32 charCount
= 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
414 f
= fopen("data06.txt", "r");
417 fprintf(stderr
, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
418 return U_FILE_ACCESS_ERROR
;
421 info
= (CharFreqInfo
*)malloc(sizeof(CharFreqInfo
) * charCount
);
424 fprintf(stderr
, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo
)*charCount
);
427 /* reset frequencies */
428 for(p
=0;p
<charCount
;p
++)
430 info
[p
].codepoint
= p
;
431 info
[p
].frequency
= 0;
434 // **************************** START SAMPLE *******************
435 conv
= ucnv_open("utf-8", &status
);
436 assert(U_SUCCESS(status
));
438 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
439 printf("input bytes %d / min chars %d = %d UChars\n",
440 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
441 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
444 // grab another buffer's worth
446 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
448 // Convert bytes to unicode
450 sourceLimit
= inBuf
+ count
;
452 while(source
< sourceLimit
)
454 p
= ucnv_getNextUChar(conv
, &source
, sourceLimit
, &status
);
455 if(U_FAILURE(status
))
457 fprintf(stderr
, "%s @ %d\n", u_errorName(status
), total
);
458 status
= U_ZERO_ERROR
;
467 if((u_tolower(l
) == 'i') && (u_tolower(p
) == 'e'))
470 if((u_tolower(l
) == 'g') && (u_tolower(p
) == 0x0127))
475 fprintf(stderr
, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p
);
476 return U_UNSUPPORTED_ERROR
;
486 printf("%d letters out of %d total UChars.\n", letters
, total
);
487 printf("%d ie digraphs, %d gh digraphs.\n", ie
, gh
);
489 // now, we could sort it..
491 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
493 for(p
=0;p
<charCount
;p
++)
495 if(info
[p
].frequency
)
497 printf("% 5d U+%06X ", info
[p
].frequency
, p
);
500 prettyPrintUChar((UChar
)p
);
506 // ***************************** END SAMPLE ********************
515 /******************************************************
516 You must call ucnv_close to clean up the memory used by the
519 'len' returns the number of OUTPUT bytes resulting from the
523 UErrorCode
convsample_12()
525 printf("\n\n==============================================\n"
526 "Sample 12: C: simple sjis -> unicode conversion\n");
529 // **************************** START SAMPLE *******************
531 char source
[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
533 UErrorCode status
= U_ZERO_ERROR
;
537 // set up the converter
538 conv
= ucnv_open("shift_jis", &status
);
539 assert(U_SUCCESS(status
));
541 // convert to Unicode
542 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
544 len
= ucnv_toUChars(conv
, target
, 100, source
, strlen(source
), &status
);
546 // close the converter
549 // ***************************** END SAMPLE ********************
552 printBytes("src", source
, strlen(source
) );
554 printUChars("targ", target
, len
);
559 /******************************************************************
560 C: Convert from codepage to Unicode one at a time.
563 UErrorCode
convsample_13()
565 printf("\n\n==============================================\n"
566 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
569 const char sourceChars
[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
570 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
571 const char *source
, *sourceLimit
;
573 UErrorCode status
= U_ZERO_ERROR
;
574 UConverter
*conv
= NULL
;
578 srcCount
= sizeof(sourceChars
);
580 conv
= ucnv_open("Big5", &status
);
583 source
= sourceChars
;
584 sourceLimit
= sourceChars
+ sizeof(sourceChars
);
586 // **************************** START SAMPLE *******************
589 printBytes("src",source
,sourceLimit
-source
);
591 while(source
< sourceLimit
)
594 target
= ucnv_getNextUChar (conv
,
599 // printBytes("src",source,sourceLimit-source);
606 // ************************** END SAMPLE *************************
608 printf("src=%d bytes, dst=%d uchars\n", srcCount
, dstCount
);
617 UBool
convsample_20_didSubstitute(const char *source
)
621 UConverter
*conv
= NULL
;
622 UErrorCode status
= U_ZERO_ERROR
;
626 FromUFLAGContext
* context
= NULL
;
628 printf("\n\n==============================================\n"
629 "Sample 20: C: Test for substitution using callbacks\n");
631 /* print out the original source */
632 printBytes("src", source
);
635 /* First, convert from UTF8 to unicode */
636 conv
= ucnv_open("utf-8", &status
);
639 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
642 printUChars("uch", uchars
, len
);
645 /* Now, close the converter */
648 /* Now, convert to windows-1252 */
649 conv
= ucnv_open("windows-1252", &status
);
652 /* Converter starts out with the SUBSTITUTE callback set. */
654 /* initialize our callback */
655 context
= flagCB_fromU_openContext();
657 /* Set our special callback */
658 ucnv_setFromUCallBack(conv
,
661 &(context
->subCallback
),
662 &(context
->subContext
),
667 len2
= ucnv_fromUChars(conv
, bytes
, 100, uchars
, len
, &status
);
670 flagVal
= context
->flag
; /* it's about to go away when we close the cnv */
674 /* print out the original source */
675 printBytes("bytes", bytes
, len2
);
677 return flagVal
; /* true if callback was called */
680 UErrorCode
convsample_20()
682 const char *sample1
= "abc\xdf\xbf";
683 const char *sample2
= "abc_def";
686 if(convsample_20_didSubstitute(sample1
))
688 printf("DID substitute.\n******\n");
692 printf("Did NOT substitute.\n*****\n");
695 if(convsample_20_didSubstitute(sample2
))
697 printf("DID substitute.\n******\n");
701 printf("Did NOT substitute.\n*****\n");
707 // 21 - C, callback, with clone and debug
711 UBool
convsample_21_didSubstitute(const char *source
)
715 UConverter
*conv
= NULL
, *cloneCnv
= NULL
;
716 UErrorCode status
= U_ZERO_ERROR
;
719 UBool flagVal
= FALSE
;
720 UConverterFromUCallback junkCB
;
722 FromUFLAGContext
*flagCtx
= NULL
,
723 *cloneFlagCtx
= NULL
;
725 debugCBContext
*debugCtx1
= NULL
,
727 *cloneDebugCtx
= NULL
;
729 printf("\n\n==============================================\n"
730 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
732 /* print out the original source */
733 printBytes("src", source
);
736 /* First, convert from UTF8 to unicode */
737 conv
= ucnv_open("utf-8", &status
);
740 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
743 printUChars("uch", uchars
, len
);
746 /* Now, close the converter */
749 /* Now, convert to windows-1252 */
750 conv
= ucnv_open("windows-1252", &status
);
753 /* Converter starts out with the SUBSTITUTE callback set. */
755 /* initialize our callback */
756 /* from the 'bottom' innermost, out
757 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
760 printf("flagCB_fromU = %p\n", &flagCB_fromU
);
761 printf("debugCB_fromU = %p\n", &debugCB_fromU
);
764 debugCtx1
= debugCB_openContext();
765 flagCtx
= flagCB_fromU_openContext();
766 debugCtx2
= debugCB_openContext();
768 debugCtx1
->subCallback
= flagCB_fromU
; /* debug1 -> flag */
769 debugCtx1
->subContext
= flagCtx
;
771 flagCtx
->subCallback
= debugCB_fromU
; /* flag -> debug2 */
772 flagCtx
->subContext
= debugCtx2
;
774 debugCtx2
->subCallback
= UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
775 debugCtx2
->subContext
= NULL
;
777 /* Set our special callback */
779 ucnv_setFromUCallBack(conv
,
782 &(debugCtx2
->subCallback
),
783 &(debugCtx2
->subContext
),
789 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
790 conv
, debugCtx1
, debugCtx1
->subCallback
,
791 debugCtx1
->subContext
, flagCtx
, debugCtx2
, debugCtx2
->subCallback
);
794 cloneLen
= 1; /* but passing in null so it will clone */
795 cloneCnv
= ucnv_safeClone(conv
, NULL
, &cloneLen
, &status
);
800 printf("Cloned converter from %p -> %p. Closing %p.\n", conv
, cloneCnv
, conv
);
806 printf("%p closed.\n", conv
);
810 /* Now, we have to extract the context */
811 cloneDebugCtx
= NULL
;
814 ucnv_getFromUCallBack(cloneCnv
, &junkCB
, (const void **)&cloneDebugCtx
);
815 if(cloneDebugCtx
!= NULL
) {
816 cloneFlagCtx
= (FromUFLAGContext
*) cloneDebugCtx
-> subContext
;
819 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
820 cloneCnv
, cloneDebugCtx
, cloneFlagCtx
, cloneFlagCtx
?cloneFlagCtx
->subContext
:NULL
);
822 len2
= ucnv_fromUChars(cloneCnv
, bytes
, 100, uchars
, len
, &status
);
825 if(cloneFlagCtx
!= NULL
) {
826 flagVal
= cloneFlagCtx
->flag
; /* it's about to go away when we close the cnv */
828 printf("** Warning, couldn't get the subcallback \n");
831 ucnv_close(cloneCnv
);
833 /* print out the original source */
834 printBytes("bytes", bytes
, len2
);
836 return flagVal
; /* true if callback was called */
839 UErrorCode
convsample_21()
841 const char *sample1
= "abc\xdf\xbf";
842 const char *sample2
= "abc_def";
844 if(convsample_21_didSubstitute(sample1
))
846 printf("DID substitute.\n******\n");
850 printf("Did NOT substitute.\n*****\n");
853 if(convsample_21_didSubstitute(sample2
))
855 printf("DID substitute.\n******\n");
859 printf("Did NOT substitute.\n*****\n");
866 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
868 #define BUFFERSIZE 17 /* make it interesting :) */
870 UErrorCode
convsample_40()
872 printf("\n\n==============================================\n"
873 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
878 char inBuf
[BUFFERSIZE
];
880 const char *sourceLimit
;
884 int32_t uBufSize
= 0;
885 UConverter
*conv
= NULL
;
886 UErrorCode status
= U_ZERO_ERROR
;
887 uint32_t inbytes
=0, total
=0;
889 f
= fopen("data02.bin", "rb");
892 fprintf(stderr
, "Couldn't open file 'data02.bin' (cp37 data file).\n");
893 return U_FILE_ACCESS_ERROR
;
896 out
= fopen("data40.utf16", "wb");
899 fprintf(stderr
, "Couldn't create file 'data40.utf16'.\n");
901 return U_FILE_ACCESS_ERROR
;
904 // **************************** START SAMPLE *******************
905 conv
= ucnv_openCCSID(37, UCNV_IBM
, &status
);
906 assert(U_SUCCESS(status
));
908 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
909 printf("input bytes %d / min chars %d = %d UChars\n",
910 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
911 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
914 // grab another buffer's worth
916 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
920 // Convert bytes to unicode
922 sourceLimit
= inBuf
+ count
;
927 targetLimit
= uBuf
+ uBufSize
;
929 ucnv_toUnicode( conv
, &target
, targetLimit
,
930 &source
, sourceLimit
, NULL
,
931 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
932 /* is true (when no more data will come) */
935 if(status
== U_BUFFER_OVERFLOW_ERROR
)
937 // simply ran out of space - we'll reset the target ptr the next
938 // time through the loop.
939 status
= U_ZERO_ERROR
;
943 // Check other errors here.
944 assert(U_SUCCESS(status
));
945 // Break out of the loop (by force)
948 // Process the Unicode
949 // Todo: handle UTF-16/surrogates
950 assert(fwrite(uBuf
, sizeof(uBuf
[0]), (target
-uBuf
), out
) ==
951 (size_t)(target
-uBuf
));
952 total
+= (target
-uBuf
);
953 } while (source
< sourceLimit
); // while simply out of space
956 printf("%d bytes in, %d UChars out.\n", inbytes
, total
);
958 // ***************************** END SAMPLE ********************
971 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
973 #define BUFFERSIZE 24 /* make it interesting :) */
975 UErrorCode
convsample_46()
977 printf("\n\n==============================================\n"
978 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
983 UChar inBuf
[BUFFERSIZE
];
985 const UChar
*sourceLimit
;
991 UConverter
*conv
= NULL
;
992 UErrorCode status
= U_ZERO_ERROR
;
993 uint32_t inchars
=0, total
=0;
995 f
= fopen("data40.utf16", "rb");
998 fprintf(stderr
, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
999 return U_FILE_ACCESS_ERROR
;
1002 out
= fopen("data46.out", "wb");
1005 fprintf(stderr
, "Couldn't create file 'data46.out'.\n");
1007 return U_FILE_ACCESS_ERROR
;
1010 // **************************** START SAMPLE *******************
1011 conv
= ucnv_open( "iso-8859-2", &status
);
1012 assert(U_SUCCESS(status
));
1014 bufSize
= (BUFFERSIZE
*ucnv_getMaxCharSize(conv
));
1015 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1016 BUFFERSIZE
, ucnv_getMaxCharSize(conv
), bufSize
);
1017 buf
= (char*)malloc(bufSize
* sizeof(char));
1020 // grab another buffer's worth
1022 ((count
=fread(inBuf
, sizeof(UChar
), BUFFERSIZE
, f
)) > 0) )
1026 // Convert bytes to unicode
1028 sourceLimit
= inBuf
+ count
;
1033 targetLimit
= buf
+ bufSize
;
1035 ucnv_fromUnicode( conv
, &target
, targetLimit
,
1036 &source
, sourceLimit
, NULL
,
1037 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
1038 /* is true (when no more data will come) */
1041 if(status
== U_BUFFER_OVERFLOW_ERROR
)
1043 // simply ran out of space - we'll reset the target ptr the next
1044 // time through the loop.
1045 status
= U_ZERO_ERROR
;
1049 // Check other errors here.
1050 assert(U_SUCCESS(status
));
1051 // Break out of the loop (by force)
1054 // Process the Unicode
1055 assert(fwrite(buf
, sizeof(buf
[0]), (target
-buf
), out
) ==
1056 (size_t)(target
-buf
));
1057 total
+= (target
-buf
);
1058 } while (source
< sourceLimit
); // while simply out of space
1061 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars
, inchars
* sizeof(UChar
), total
);
1063 // ***************************** END SAMPLE ********************
1070 return U_ZERO_ERROR
;
1074 #define BUFFERSIZE 219
1082 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1084 convsample_02(); // C , u->koi8r, conv
1085 convsample_03(); // C, iterate
1087 convsample_05(); // C, utf8->u, getNextUChar
1088 convsample_06(); // C freq counter thingy
1090 convsample_12(); // C, sjis->u, conv
1091 convsample_13(); // C, big5->u, getNextU
1093 convsample_20(); // C, callback
1094 convsample_21(); // C, callback debug
1096 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1098 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1100 printf("End of converter samples.\n");