1 /**************************************************************************
3 * Copyright (C) 2000-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
6 ***************************************************************************
7 * file name: convsamp.c
8 * encoding: ASCII (7-bit)
10 * created on: 2000may30
11 * created by: Steven R. Loomis
13 * Sample code for the ICU conversion routines.
15 * Note: Nothing special is needed to build this sample. Link with
16 * the icu UC and icu I18N libraries.
18 * I use 'assert' for error checking, you probably will want
19 * something more flexible. '***BEGIN SAMPLE***' and
20 * '***END SAMPLE***' mark pieces suitable for stand alone
24 * Each test can define it's own BUFFERSIZE
28 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
31 #include <ctype.h> /* for isspace, etc. */
34 #include <stdlib.h> /* malloc */
36 #include "unicode/utypes.h" /* Basic ICU data types */
37 #include "unicode/ucnv.h" /* C Converter API */
38 #include "unicode/ustring.h" /* some more string fcns*/
39 #include "unicode/uchar.h" /* char names */
40 #include "unicode/uloc.h"
41 #include "unicode/unistr.h"
45 /* Some utility functions */
47 static const UChar kNone
[] = { 0x0000 };
49 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
51 /* Print a UChar if possible, in seven characters. */
52 void prettyPrintUChar(UChar c
)
56 printf(" '%c' ", (char)(0x00FF&c
));
57 } else if ( c
> 0x007F ) {
59 UErrorCode status
= U_ZERO_ERROR
;
62 o
= u_charName(c
, U_EXTENDED_CHAR_NAME
, buf
, 1000, &status
);
63 if(U_SUCCESS(status
) && (o
>0) ) {
70 switch((char)(c
& 0x007F)) {
88 void printUChars(const char *name
= "?",
89 const UChar
*uch
= kNone
,
94 if( (len
== -1) && (uch
) ) {
98 printf("%5s: ", name
);
99 for( i
= 0; i
<len
; i
++) {
104 printf("%5s: ", "uni");
105 for( i
= 0; i
<len
; i
++) {
106 printf("\\u%04X ", (int)uch
[i
]);
110 printf("%5s:", "ch");
111 for( i
= 0; i
<len
; i
++) {
112 prettyPrintUChar(uch
[i
]);
117 void printBytes(const char *name
= "?",
118 const char *uch
= "",
123 if( (len
== -1) && (uch
) ) {
127 printf("%5s: ", name
);
128 for( i
= 0; i
<len
; i
++) {
133 printf("%5s: ", "uni");
134 for( i
= 0; i
<len
; i
++) {
135 printf("\\x%02X ", 0x00FF & (int)uch
[i
]);
139 printf("%5s:", "ch");
140 for( i
= 0; i
<len
; i
++) {
141 if(isgraph(0x00FF & (int)uch
[i
])) {
142 printf(" '%c' ", (char)uch
[i
]);
150 void printUChar(UChar32 ch32
)
153 printf("ch: U+%06X\n", ch32
);
156 UChar ch
= (UChar
)ch32
;
157 printUChars("C", &ch
, 1);
161 /*******************************************************************
162 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
163 followed by an exclamation mark (!) into the KOI8-R Russian code page.
165 This example first creates a UChar String out of the Unicode chars.
167 targetSize must be set to the amount of space available in the target
168 buffer. After fromUChars is called,
169 len will contain the number of bytes in target[] which were
170 used in the resulting codepage. In this case, there is a 1:1 mapping
171 between the input and output characters. The exclamation mark has the
172 same value in both KOI8-R and Unicode.
175 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
176 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
179 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
183 Converting FROM unicode
185 You must call ucnv_close to clean up the memory used by the
188 'len' returns the number of OUTPUT bytes resulting from the
192 UErrorCode
convsample_02()
194 printf("\n\n==============================================\n"
195 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
198 // **************************** START SAMPLE *******************
200 UChar source
[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
201 0x0430, 0x0021, 0x0000 };
203 UErrorCode status
= U_ZERO_ERROR
;
207 // set up the converter
209 conv
= ucnv_open("koi8-r", &status
);
211 assert(U_SUCCESS(status
));
214 len
= ucnv_fromUChars(conv
, target
, 100, source
, -1, &status
);
215 assert(U_SUCCESS(status
));
217 // close the converter
220 // ***************************** END SAMPLE ********************
223 printUChars("src", source
);
225 printBytes("targ", target
, len
);
231 UErrorCode
convsample_03()
233 printf("\n\n==============================================\n"
234 "Sample 03: C: print out all converters\n");
239 // **************************** START SAMPLE *******************
240 count
= ucnv_countAvailable();
241 printf("Available converters: %d\n", count
);
245 printf("%s ", ucnv_getAvailableName(i
));
248 // ***************************** END SAMPLE ********************
257 #define BUFFERSIZE 17 /* make it interesting :) */
260 Converting from a codepage to Unicode in bulk..
261 What is the best way to determine the buffer size?
263 The 'buffersize' is in bytes of input.
264 For a given converter, divinding this by the minimum char size
265 give you the maximum number of Unicode characters that could be
266 expected for a given number of input bytes.
267 see: ucnv_getMinCharSize()
269 For example, a single byte codepage like 'Latin-3' has a
270 minimum char size of 1. (It takes at least 1 byte to represent
271 each Unicode char.) So the unicode buffer has the same number of
272 UChars as the input buffer has bytes.
274 In a strictly double byte codepage such as cp1362 (Windows
275 Korean), the minimum char size is 2. So, only half as many Unicode
276 chars as bytes are needed.
278 This work to calculate the buffer size is an optimization. Any
279 size of input and output buffer can be used, as long as the
280 program handles the following cases: If the input buffer is empty,
281 the source pointer will be equal to sourceLimit. If the output
282 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
285 UErrorCode
convsample_05()
287 printf("\n\n==============================================\n"
288 "Sample 05: C: count the number of letters in a UTF-8 document\n");
292 char inBuf
[BUFFERSIZE
];
294 const char *sourceLimit
;
299 int32_t uBufSize
= 0;
301 UErrorCode status
= U_ZERO_ERROR
;
302 uint32_t letters
=0, total
=0;
304 f
= fopen("data01.txt", "r");
307 fprintf(stderr
, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
308 return U_FILE_ACCESS_ERROR
;
311 // **************************** START SAMPLE *******************
312 conv
= ucnv_open("utf-8", &status
);
313 assert(U_SUCCESS(status
));
315 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
316 printf("input bytes %d / min chars %d = %d UChars\n",
317 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
318 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
321 // grab another buffer's worth
323 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
325 // Convert bytes to unicode
327 sourceLimit
= inBuf
+ count
;
332 targetLimit
= uBuf
+ uBufSize
;
334 ucnv_toUnicode(conv
, &target
, targetLimit
,
335 &source
, sourceLimit
, NULL
,
336 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
337 /* is true (when no more data will come) */
340 if(status
== U_BUFFER_OVERFLOW_ERROR
)
342 // simply ran out of space - we'll reset the target ptr the next
343 // time through the loop.
344 status
= U_ZERO_ERROR
;
348 // Check other errors here.
349 assert(U_SUCCESS(status
));
350 // Break out of the loop (by force)
353 // Process the Unicode
354 // Todo: handle UTF-16/surrogates
356 for(p
= uBuf
; p
<target
; p
++)
362 } while (source
< sourceLimit
); // while simply out of space
365 printf("%d letters out of %d total UChars.\n", letters
, total
);
367 // ***************************** END SAMPLE ********************
378 #define BUFFERSIZE 1024
385 UErrorCode
convsample_06()
387 printf("\n\n==============================================\n"
388 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
392 char inBuf
[BUFFERSIZE
];
394 const char *sourceLimit
;
395 int32_t uBufSize
= 0;
397 UErrorCode status
= U_ZERO_ERROR
;
398 uint32_t letters
=0, total
=0;
401 UChar32 charCount
= 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
408 f
= fopen("data06.txt", "r");
411 fprintf(stderr
, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
412 return U_FILE_ACCESS_ERROR
;
415 info
= (CharFreqInfo
*)malloc(sizeof(CharFreqInfo
) * charCount
);
418 fprintf(stderr
, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo
)*charCount
);
421 /* reset frequencies */
422 for(p
=0;p
<charCount
;p
++)
424 info
[p
].codepoint
= p
;
425 info
[p
].frequency
= 0;
428 // **************************** START SAMPLE *******************
429 conv
= ucnv_open("utf-8", &status
);
430 assert(U_SUCCESS(status
));
432 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
433 printf("input bytes %d / min chars %d = %d UChars\n",
434 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
436 // grab another buffer's worth
438 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
440 // Convert bytes to unicode
442 sourceLimit
= inBuf
+ count
;
444 while(source
< sourceLimit
)
446 p
= ucnv_getNextUChar(conv
, &source
, sourceLimit
, &status
);
447 if(U_FAILURE(status
))
449 fprintf(stderr
, "%s @ %d\n", u_errorName(status
), total
);
450 status
= U_ZERO_ERROR
;
459 if((u_tolower(l
) == 'i') && (u_tolower(p
) == 'e'))
462 if((u_tolower(l
) == 'g') && (u_tolower(p
) == 0x0127))
467 fprintf(stderr
, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p
);
471 return U_UNSUPPORTED_ERROR
;
481 printf("%d letters out of %d total UChars.\n", letters
, total
);
482 printf("%d ie digraphs, %d gh digraphs.\n", ie
, gh
);
484 // now, we could sort it..
486 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
488 for(p
=0;p
<charCount
;p
++)
490 if(info
[p
].frequency
)
492 printf("% 5d U+%06X ", info
[p
].frequency
, p
);
495 prettyPrintUChar((UChar
)p
);
501 // ***************************** END SAMPLE ********************
510 /******************************************************
511 You must call ucnv_close to clean up the memory used by the
514 'len' returns the number of OUTPUT bytes resulting from the
518 UErrorCode
convsample_12()
520 printf("\n\n==============================================\n"
521 "Sample 12: C: simple sjis -> unicode conversion\n");
524 // **************************** START SAMPLE *******************
526 char source
[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
528 UErrorCode status
= U_ZERO_ERROR
;
532 // set up the converter
533 conv
= ucnv_open("shift_jis", &status
);
534 assert(U_SUCCESS(status
));
536 // convert to Unicode
537 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
539 len
= ucnv_toUChars(conv
, target
, 100, source
, strlen(source
), &status
);
541 // close the converter
544 // ***************************** END SAMPLE ********************
547 printBytes("src", source
, strlen(source
) );
549 printUChars("targ", target
, len
);
554 /******************************************************************
555 C: Convert from codepage to Unicode one at a time.
558 UErrorCode
convsample_13()
560 printf("\n\n==============================================\n"
561 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
564 const char sourceChars
[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
565 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
566 const char *source
, *sourceLimit
;
568 UErrorCode status
= U_ZERO_ERROR
;
569 UConverter
*conv
= NULL
;
573 srcCount
= sizeof(sourceChars
);
575 conv
= ucnv_open("Big5", &status
);
578 source
= sourceChars
;
579 sourceLimit
= sourceChars
+ sizeof(sourceChars
);
581 // **************************** START SAMPLE *******************
584 printBytes("src",source
,sourceLimit
-source
);
586 while(source
< sourceLimit
)
589 target
= ucnv_getNextUChar (conv
,
594 // printBytes("src",source,sourceLimit-source);
601 // ************************** END SAMPLE *************************
603 printf("src=%d bytes, dst=%d uchars\n", srcCount
, dstCount
);
612 UBool
convsample_20_didSubstitute(const char *source
)
616 UConverter
*conv
= NULL
;
617 UErrorCode status
= U_ZERO_ERROR
;
621 FromUFLAGContext
* context
= NULL
;
623 printf("\n\n==============================================\n"
624 "Sample 20: C: Test for substitution using callbacks\n");
626 /* print out the original source */
627 printBytes("src", source
);
630 /* First, convert from UTF8 to unicode */
631 conv
= ucnv_open("utf-8", &status
);
634 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
637 printUChars("uch", uchars
, len
);
640 /* Now, close the converter */
643 /* Now, convert to windows-1252 */
644 conv
= ucnv_open("windows-1252", &status
);
647 /* Converter starts out with the SUBSTITUTE callback set. */
649 /* initialize our callback */
650 context
= flagCB_fromU_openContext();
652 /* Set our special callback */
653 ucnv_setFromUCallBack(conv
,
656 &(context
->subCallback
),
657 &(context
->subContext
),
662 len2
= ucnv_fromUChars(conv
, bytes
, 100, uchars
, len
, &status
);
665 flagVal
= context
->flag
; /* it's about to go away when we close the cnv */
669 /* print out the original source */
670 printBytes("bytes", bytes
, len2
);
672 return flagVal
; /* true if callback was called */
675 UErrorCode
convsample_20()
677 const char *sample1
= "abc\xdf\xbf";
678 const char *sample2
= "abc_def";
681 if(convsample_20_didSubstitute(sample1
))
683 printf("DID substitute.\n******\n");
687 printf("Did NOT substitute.\n*****\n");
690 if(convsample_20_didSubstitute(sample2
))
692 printf("DID substitute.\n******\n");
696 printf("Did NOT substitute.\n*****\n");
702 // 21 - C, callback, with clone and debug
706 UBool
convsample_21_didSubstitute(const char *source
)
710 UConverter
*conv
= NULL
, *cloneCnv
= NULL
;
711 UErrorCode status
= U_ZERO_ERROR
;
714 UBool flagVal
= FALSE
;
715 UConverterFromUCallback junkCB
;
717 FromUFLAGContext
*flagCtx
= NULL
,
718 *cloneFlagCtx
= NULL
;
720 debugCBContext
*debugCtx1
= NULL
,
722 *cloneDebugCtx
= NULL
;
724 printf("\n\n==============================================\n"
725 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
727 /* print out the original source */
728 printBytes("src", source
);
731 /* First, convert from UTF8 to unicode */
732 conv
= ucnv_open("utf-8", &status
);
735 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
738 printUChars("uch", uchars
, len
);
741 /* Now, close the converter */
744 /* Now, convert to windows-1252 */
745 conv
= ucnv_open("windows-1252", &status
);
748 /* Converter starts out with the SUBSTITUTE callback set. */
750 /* initialize our callback */
751 /* from the 'bottom' innermost, out
752 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
755 printf("flagCB_fromU = %p\n", &flagCB_fromU
);
756 printf("debugCB_fromU = %p\n", &debugCB_fromU
);
759 debugCtx1
= debugCB_openContext();
760 flagCtx
= flagCB_fromU_openContext();
761 debugCtx2
= debugCB_openContext();
763 debugCtx1
->subCallback
= flagCB_fromU
; /* debug1 -> flag */
764 debugCtx1
->subContext
= flagCtx
;
766 flagCtx
->subCallback
= debugCB_fromU
; /* flag -> debug2 */
767 flagCtx
->subContext
= debugCtx2
;
769 debugCtx2
->subCallback
= UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
770 debugCtx2
->subContext
= NULL
;
772 /* Set our special callback */
774 ucnv_setFromUCallBack(conv
,
777 &(debugCtx2
->subCallback
),
778 &(debugCtx2
->subContext
),
784 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
785 conv
, debugCtx1
, debugCtx1
->subCallback
,
786 debugCtx1
->subContext
, flagCtx
, debugCtx2
, debugCtx2
->subCallback
);
789 cloneLen
= 1; /* but passing in null so it will clone */
790 cloneCnv
= ucnv_safeClone(conv
, NULL
, &cloneLen
, &status
);
795 printf("Cloned converter from %p -> %p. Closing %p.\n", conv
, cloneCnv
, conv
);
801 printf("%p closed.\n", conv
);
805 /* Now, we have to extract the context */
806 cloneDebugCtx
= NULL
;
809 ucnv_getFromUCallBack(cloneCnv
, &junkCB
, (const void **)&cloneDebugCtx
);
810 if(cloneDebugCtx
!= NULL
) {
811 cloneFlagCtx
= (FromUFLAGContext
*) cloneDebugCtx
-> subContext
;
814 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
815 cloneCnv
, cloneDebugCtx
, cloneFlagCtx
, cloneFlagCtx
?cloneFlagCtx
->subContext
:NULL
);
817 len2
= ucnv_fromUChars(cloneCnv
, bytes
, 100, uchars
, len
, &status
);
820 if(cloneFlagCtx
!= NULL
) {
821 flagVal
= cloneFlagCtx
->flag
; /* it's about to go away when we close the cnv */
823 printf("** Warning, couldn't get the subcallback \n");
826 ucnv_close(cloneCnv
);
828 /* print out the original source */
829 printBytes("bytes", bytes
, len2
);
831 return flagVal
; /* true if callback was called */
834 UErrorCode
convsample_21()
836 const char *sample1
= "abc\xdf\xbf";
837 const char *sample2
= "abc_def";
839 if(convsample_21_didSubstitute(sample1
))
841 printf("DID substitute.\n******\n");
845 printf("Did NOT substitute.\n*****\n");
848 if(convsample_21_didSubstitute(sample2
))
850 printf("DID substitute.\n******\n");
854 printf("Did NOT substitute.\n*****\n");
861 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
863 #define BUFFERSIZE 17 /* make it interesting :) */
865 UErrorCode
convsample_40()
867 printf("\n\n==============================================\n"
868 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
873 char inBuf
[BUFFERSIZE
];
875 const char *sourceLimit
;
879 int32_t uBufSize
= 0;
880 UConverter
*conv
= NULL
;
881 UErrorCode status
= U_ZERO_ERROR
;
882 uint32_t inbytes
=0, total
=0;
884 f
= fopen("data02.bin", "rb");
887 fprintf(stderr
, "Couldn't open file 'data02.bin' (cp37 data file).\n");
888 return U_FILE_ACCESS_ERROR
;
891 out
= fopen("data40.utf16", "wb");
894 fprintf(stderr
, "Couldn't create file 'data40.utf16'.\n");
896 return U_FILE_ACCESS_ERROR
;
899 // **************************** START SAMPLE *******************
900 conv
= ucnv_openCCSID(37, UCNV_IBM
, &status
);
901 assert(U_SUCCESS(status
));
903 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
904 printf("input bytes %d / min chars %d = %d UChars\n",
905 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
906 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
909 // grab another buffer's worth
911 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
915 // Convert bytes to unicode
917 sourceLimit
= inBuf
+ count
;
922 targetLimit
= uBuf
+ uBufSize
;
924 ucnv_toUnicode( conv
, &target
, targetLimit
,
925 &source
, sourceLimit
, NULL
,
926 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
927 /* is true (when no more data will come) */
930 if(status
== U_BUFFER_OVERFLOW_ERROR
)
932 // simply ran out of space - we'll reset the target ptr the next
933 // time through the loop.
934 status
= U_ZERO_ERROR
;
938 // Check other errors here.
939 assert(U_SUCCESS(status
));
940 // Break out of the loop (by force)
943 // Process the Unicode
944 // Todo: handle UTF-16/surrogates
945 assert(fwrite(uBuf
, sizeof(uBuf
[0]), (target
-uBuf
), out
) ==
946 (size_t)(target
-uBuf
));
947 total
+= (target
-uBuf
);
948 } while (source
< sourceLimit
); // while simply out of space
951 printf("%d bytes in, %d UChars out.\n", inbytes
, total
);
953 // ***************************** END SAMPLE ********************
966 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
968 #define BUFFERSIZE 24 /* make it interesting :) */
970 UErrorCode
convsample_46()
972 printf("\n\n==============================================\n"
973 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
978 UChar inBuf
[BUFFERSIZE
];
980 const UChar
*sourceLimit
;
986 UConverter
*conv
= NULL
;
987 UErrorCode status
= U_ZERO_ERROR
;
988 uint32_t inchars
=0, total
=0;
990 f
= fopen("data40.utf16", "rb");
993 fprintf(stderr
, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
994 return U_FILE_ACCESS_ERROR
;
997 out
= fopen("data46.out", "wb");
1000 fprintf(stderr
, "Couldn't create file 'data46.out'.\n");
1002 return U_FILE_ACCESS_ERROR
;
1005 // **************************** START SAMPLE *******************
1006 conv
= ucnv_open( "iso-8859-2", &status
);
1007 assert(U_SUCCESS(status
));
1009 bufSize
= (BUFFERSIZE
*ucnv_getMaxCharSize(conv
));
1010 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1011 BUFFERSIZE
, ucnv_getMaxCharSize(conv
), bufSize
);
1012 buf
= (char*)malloc(bufSize
* sizeof(char));
1015 // grab another buffer's worth
1017 ((count
=fread(inBuf
, sizeof(UChar
), BUFFERSIZE
, f
)) > 0) )
1021 // Convert bytes to unicode
1023 sourceLimit
= inBuf
+ count
;
1028 targetLimit
= buf
+ bufSize
;
1030 ucnv_fromUnicode( conv
, &target
, targetLimit
,
1031 &source
, sourceLimit
, NULL
,
1032 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
1033 /* is true (when no more data will come) */
1036 if(status
== U_BUFFER_OVERFLOW_ERROR
)
1038 // simply ran out of space - we'll reset the target ptr the next
1039 // time through the loop.
1040 status
= U_ZERO_ERROR
;
1044 // Check other errors here.
1045 assert(U_SUCCESS(status
));
1046 // Break out of the loop (by force)
1049 // Process the Unicode
1050 assert(fwrite(buf
, sizeof(buf
[0]), (target
-buf
), out
) ==
1051 (size_t)(target
-buf
));
1052 total
+= (target
-buf
);
1053 } while (source
< sourceLimit
); // while simply out of space
1056 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars
, inchars
* sizeof(UChar
), total
);
1058 // ***************************** END SAMPLE ********************
1065 return U_ZERO_ERROR
;
1069 #define BUFFERSIZE 219
1071 void convsample_50() {
1072 printf("\n\n==============================================\n"
1073 "Sample 50: C: ucnv_detectUnicodeSignature\n");
1075 //! [ucnv_detectUnicodeSignature]
1076 UErrorCode err
= U_ZERO_ERROR
;
1077 UBool discardSignature
= TRUE
; /* set to TRUE to throw away the initial U+FEFF */
1078 char input
[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
1079 int32_t signatureLength
= 0;
1080 const char *encoding
= ucnv_detectUnicodeSignature(input
,sizeof(input
),&signatureLength
,&err
);
1081 UConverter
*conv
= NULL
;
1083 UChar
*target
= output
, *out
;
1084 const char *source
= input
;
1085 if(encoding
!=NULL
&& U_SUCCESS(err
)){
1086 // should signature be discarded ?
1087 conv
= ucnv_open(encoding
, &err
);
1088 // do the conversion
1089 ucnv_toUnicode(conv
,
1090 &target
, output
+ sizeof(output
)/U_SIZEOF_UCHAR
,
1091 &source
, input
+ sizeof(input
),
1094 if (discardSignature
){
1095 ++out
; // ignore initial U+FEFF
1097 while(out
!= target
) {
1098 printf("%04x ", *out
++);
1102 //! [ucnv_detectUnicodeSignature]
1113 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1115 convsample_02(); // C , u->koi8r, conv
1116 convsample_03(); // C, iterate
1118 convsample_05(); // C, utf8->u, getNextUChar
1119 convsample_06(); // C freq counter thingy
1121 convsample_12(); // C, sjis->u, conv
1122 convsample_13(); // C, big5->u, getNextU
1124 convsample_20(); // C, callback
1125 convsample_21(); // C, callback debug
1127 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1129 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1131 convsample_50(); // C, detect unicode signature
1133 printf("End of converter samples.\n");