1 /**************************************************************************
3 * Copyright (C) 2000-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
6 ***************************************************************************
7 * file name: convsamp.c
8 * encoding: ASCII (7-bit)
10 * created on: 2000may30
11 * created by: Steven R. Loomis
13 * Sample code for the ICU conversion routines.
15 * Note: Nothing special is needed to build this sample. Link with
16 * the icu UC and icu I18N libraries.
18 * I use 'assert' for error checking, you probably will want
19 * something more flexible. '***BEGIN SAMPLE***' and
20 * '***END SAMPLE***' mark pieces suitable for stand alone
24 * Each test can define it's own BUFFERSIZE
28 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
31 #include <ctype.h> /* for isspace, etc. */
34 #include <stdlib.h> /* malloc */
37 #include "unicode/utypes.h" /* Basic ICU data types */
38 #include "unicode/ucnv.h" /* C Converter API */
39 #include "unicode/ustring.h" /* some more string fcns*/
40 #include "unicode/uchar.h" /* char names */
41 #include "unicode/uloc.h"
42 #include "unicode/unistr.h"
46 /* Some utility functions */
48 static const UChar kNone
[] = { 0x0000 };
50 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
52 /* Print a UChar if possible, in seven characters. */
53 void prettyPrintUChar(UChar c
)
57 printf(" '%c' ", (char)(0x00FF&c
));
58 } else if ( c
> 0x007F ) {
60 UErrorCode status
= U_ZERO_ERROR
;
63 o
= u_charName(c
, U_EXTENDED_CHAR_NAME
, buf
, 1000, &status
);
64 if(U_SUCCESS(status
) && (o
>0) ) {
71 switch((char)(c
& 0x007F)) {
89 void printUChars(const char *name
= "?",
90 const UChar
*uch
= kNone
,
95 if( (len
== -1) && (uch
) ) {
99 printf("%5s: ", name
);
100 for( i
= 0; i
<len
; i
++) {
105 printf("%5s: ", "uni");
106 for( i
= 0; i
<len
; i
++) {
107 printf("\\u%04X ", (int)uch
[i
]);
111 printf("%5s:", "ch");
112 for( i
= 0; i
<len
; i
++) {
113 prettyPrintUChar(uch
[i
]);
118 void printBytes(const char *name
= "?",
119 const char *uch
= "",
124 if( (len
== -1) && (uch
) ) {
128 printf("%5s: ", name
);
129 for( i
= 0; i
<len
; i
++) {
134 printf("%5s: ", "uni");
135 for( i
= 0; i
<len
; i
++) {
136 printf("\\x%02X ", 0x00FF & (int)uch
[i
]);
140 printf("%5s:", "ch");
141 for( i
= 0; i
<len
; i
++) {
142 if(isgraph(0x00FF & (int)uch
[i
])) {
143 printf(" '%c' ", (char)uch
[i
]);
151 void printUChar(UChar32 ch32
)
154 printf("ch: U+%06X\n", ch32
);
157 UChar ch
= (UChar
)ch32
;
158 printUChars("C", &ch
, 1);
162 /*******************************************************************
163 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
164 followed by an exclamation mark (!) into the KOI8-R Russian code page.
166 This example first creates a UChar String out of the Unicode chars.
168 targetSize must be set to the amount of space available in the target
169 buffer. After fromUChars is called,
170 len will contain the number of bytes in target[] which were
171 used in the resulting codepage. In this case, there is a 1:1 mapping
172 between the input and output characters. The exclamation mark has the
173 same value in both KOI8-R and Unicode.
176 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
177 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
180 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
184 Converting FROM unicode
186 You must call ucnv_close to clean up the memory used by the
189 'len' returns the number of OUTPUT bytes resulting from the
193 UErrorCode
convsample_02()
195 printf("\n\n==============================================\n"
196 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
199 // **************************** START SAMPLE *******************
201 UChar source
[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
202 0x0430, 0x0021, 0x0000 };
204 UErrorCode status
= U_ZERO_ERROR
;
208 // set up the converter
210 conv
= ucnv_open("koi8-r", &status
);
212 assert(U_SUCCESS(status
));
215 len
= ucnv_fromUChars(conv
, target
, 100, source
, -1, &status
);
216 assert(U_SUCCESS(status
));
218 // close the converter
221 // ***************************** END SAMPLE ********************
224 printUChars("src", source
);
226 printBytes("targ", target
, len
);
232 UErrorCode
convsample_03()
234 printf("\n\n==============================================\n"
235 "Sample 03: C: print out all converters\n");
240 // **************************** START SAMPLE *******************
241 count
= ucnv_countAvailable();
242 printf("Available converters: %d\n", count
);
246 printf("%s ", ucnv_getAvailableName(i
));
249 // ***************************** END SAMPLE ********************
258 #define BUFFERSIZE 17 /* make it interesting :) */
261 Converting from a codepage to Unicode in bulk..
262 What is the best way to determine the buffer size?
264 The 'buffersize' is in bytes of input.
265 For a given converter, divinding this by the minimum char size
266 give you the maximum number of Unicode characters that could be
267 expected for a given number of input bytes.
268 see: ucnv_getMinCharSize()
270 For example, a single byte codepage like 'Latin-3' has a
271 minimum char size of 1. (It takes at least 1 byte to represent
272 each Unicode char.) So the unicode buffer has the same number of
273 UChars as the input buffer has bytes.
275 In a strictly double byte codepage such as cp1362 (Windows
276 Korean), the minimum char size is 2. So, only half as many Unicode
277 chars as bytes are needed.
279 This work to calculate the buffer size is an optimization. Any
280 size of input and output buffer can be used, as long as the
281 program handles the following cases: If the input buffer is empty,
282 the source pointer will be equal to sourceLimit. If the output
283 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
286 UErrorCode
convsample_05()
288 printf("\n\n==============================================\n"
289 "Sample 05: C: count the number of letters in a UTF-8 document\n");
293 char inBuf
[BUFFERSIZE
];
295 const char *sourceLimit
;
300 int32_t uBufSize
= 0;
302 UErrorCode status
= U_ZERO_ERROR
;
303 uint32_t letters
=0, total
=0;
305 f
= fopen("data01.txt", "r");
308 fprintf(stderr
, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
309 return U_FILE_ACCESS_ERROR
;
312 // **************************** START SAMPLE *******************
313 conv
= ucnv_open("utf-8", &status
);
314 assert(U_SUCCESS(status
));
316 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
317 printf("input bytes %d / min chars %d = %d UChars\n",
318 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
319 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
322 // grab another buffer's worth
324 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
326 // Convert bytes to unicode
328 sourceLimit
= inBuf
+ count
;
333 targetLimit
= uBuf
+ uBufSize
;
335 ucnv_toUnicode(conv
, &target
, targetLimit
,
336 &source
, sourceLimit
, NULL
,
337 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
338 /* is true (when no more data will come) */
341 if(status
== U_BUFFER_OVERFLOW_ERROR
)
343 // simply ran out of space - we'll reset the target ptr the next
344 // time through the loop.
345 status
= U_ZERO_ERROR
;
349 // Check other errors here.
350 assert(U_SUCCESS(status
));
351 // Break out of the loop (by force)
354 // Process the Unicode
355 // Todo: handle UTF-16/surrogates
357 for(p
= uBuf
; p
<target
; p
++)
363 } while (source
< sourceLimit
); // while simply out of space
366 printf("%d letters out of %d total UChars.\n", letters
, total
);
368 // ***************************** END SAMPLE ********************
379 #define BUFFERSIZE 1024
386 UErrorCode
convsample_06()
388 printf("\n\n==============================================\n"
389 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
393 char inBuf
[BUFFERSIZE
];
395 const char *sourceLimit
;
396 int32_t uBufSize
= 0;
398 UErrorCode status
= U_ZERO_ERROR
;
399 uint32_t letters
=0, total
=0;
402 UChar32 charCount
= 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
409 f
= fopen("data06.txt", "r");
412 fprintf(stderr
, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
413 return U_FILE_ACCESS_ERROR
;
416 info
= (CharFreqInfo
*)malloc(sizeof(CharFreqInfo
) * charCount
);
419 fprintf(stderr
, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo
)*charCount
);
422 /* reset frequencies */
423 for(p
=0;p
<charCount
;p
++)
425 info
[p
].codepoint
= p
;
426 info
[p
].frequency
= 0;
429 // **************************** START SAMPLE *******************
430 conv
= ucnv_open("utf-8", &status
);
431 assert(U_SUCCESS(status
));
433 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
434 printf("input bytes %d / min chars %d = %d UChars\n",
435 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
437 // grab another buffer's worth
439 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
441 // Convert bytes to unicode
443 sourceLimit
= inBuf
+ count
;
445 while(source
< sourceLimit
)
447 p
= ucnv_getNextUChar(conv
, &source
, sourceLimit
, &status
);
448 if(U_FAILURE(status
))
450 fprintf(stderr
, "%s @ %d\n", u_errorName(status
), total
);
451 status
= U_ZERO_ERROR
;
460 if((u_tolower(l
) == 'i') && (u_tolower(p
) == 'e'))
463 if((u_tolower(l
) == 'g') && (u_tolower(p
) == 0x0127))
468 fprintf(stderr
, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p
);
472 return U_UNSUPPORTED_ERROR
;
482 printf("%d letters out of %d total UChars.\n", letters
, total
);
483 printf("%d ie digraphs, %d gh digraphs.\n", ie
, gh
);
485 // now, we could sort it..
487 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
489 for(p
=0;p
<charCount
;p
++)
491 if(info
[p
].frequency
)
493 printf("% 5d U+%06X ", info
[p
].frequency
, p
);
496 prettyPrintUChar((UChar
)p
);
502 // ***************************** END SAMPLE ********************
511 /******************************************************
512 You must call ucnv_close to clean up the memory used by the
515 'len' returns the number of OUTPUT bytes resulting from the
519 UErrorCode
convsample_12()
521 printf("\n\n==============================================\n"
522 "Sample 12: C: simple sjis -> unicode conversion\n");
525 // **************************** START SAMPLE *******************
527 char source
[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
529 UErrorCode status
= U_ZERO_ERROR
;
533 // set up the converter
534 conv
= ucnv_open("shift_jis", &status
);
535 assert(U_SUCCESS(status
));
537 // convert to Unicode
538 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
540 len
= ucnv_toUChars(conv
, target
, 100, source
, strlen(source
), &status
);
542 // close the converter
545 // ***************************** END SAMPLE ********************
548 printBytes("src", source
, strlen(source
) );
550 printUChars("targ", target
, len
);
555 /******************************************************************
556 C: Convert from codepage to Unicode one at a time.
559 UErrorCode
convsample_13()
561 printf("\n\n==============================================\n"
562 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
565 const char sourceChars
[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
566 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
567 const char *source
, *sourceLimit
;
569 UErrorCode status
= U_ZERO_ERROR
;
570 UConverter
*conv
= NULL
;
574 srcCount
= sizeof(sourceChars
);
576 conv
= ucnv_open("Big5", &status
);
579 source
= sourceChars
;
580 sourceLimit
= sourceChars
+ sizeof(sourceChars
);
582 // **************************** START SAMPLE *******************
585 printBytes("src",source
,sourceLimit
-source
);
587 while(source
< sourceLimit
)
590 target
= ucnv_getNextUChar (conv
,
595 // printBytes("src",source,sourceLimit-source);
602 // ************************** END SAMPLE *************************
604 printf("src=%d bytes, dst=%d uchars\n", srcCount
, dstCount
);
613 UBool
convsample_20_didSubstitute(const char *source
)
617 UConverter
*conv
= NULL
;
618 UErrorCode status
= U_ZERO_ERROR
;
622 FromUFLAGContext
* context
= NULL
;
624 printf("\n\n==============================================\n"
625 "Sample 20: C: Test for substitution using callbacks\n");
627 /* print out the original source */
628 printBytes("src", source
);
631 /* First, convert from UTF8 to unicode */
632 conv
= ucnv_open("utf-8", &status
);
635 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
638 printUChars("uch", uchars
, len
);
641 /* Now, close the converter */
644 /* Now, convert to windows-1252 */
645 conv
= ucnv_open("windows-1252", &status
);
648 /* Converter starts out with the SUBSTITUTE callback set. */
650 /* initialize our callback */
651 context
= flagCB_fromU_openContext();
653 /* Set our special callback */
654 ucnv_setFromUCallBack(conv
,
657 &(context
->subCallback
),
658 &(context
->subContext
),
663 len2
= ucnv_fromUChars(conv
, bytes
, 100, uchars
, len
, &status
);
666 flagVal
= context
->flag
; /* it's about to go away when we close the cnv */
670 /* print out the original source */
671 printBytes("bytes", bytes
, len2
);
673 return flagVal
; /* true if callback was called */
676 UErrorCode
convsample_20()
678 const char *sample1
= "abc\xdf\xbf";
679 const char *sample2
= "abc_def";
682 if(convsample_20_didSubstitute(sample1
))
684 printf("DID substitute.\n******\n");
688 printf("Did NOT substitute.\n*****\n");
691 if(convsample_20_didSubstitute(sample2
))
693 printf("DID substitute.\n******\n");
697 printf("Did NOT substitute.\n*****\n");
703 // 21 - C, callback, with clone and debug
707 UBool
convsample_21_didSubstitute(const char *source
)
711 UConverter
*conv
= NULL
, *cloneCnv
= NULL
;
712 UErrorCode status
= U_ZERO_ERROR
;
715 UBool flagVal
= FALSE
;
716 UConverterFromUCallback junkCB
;
718 FromUFLAGContext
*flagCtx
= NULL
,
719 *cloneFlagCtx
= NULL
;
721 debugCBContext
*debugCtx1
= NULL
,
723 *cloneDebugCtx
= NULL
;
725 printf("\n\n==============================================\n"
726 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
728 /* print out the original source */
729 printBytes("src", source
);
732 /* First, convert from UTF8 to unicode */
733 conv
= ucnv_open("utf-8", &status
);
736 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
739 printUChars("uch", uchars
, len
);
742 /* Now, close the converter */
745 /* Now, convert to windows-1252 */
746 conv
= ucnv_open("windows-1252", &status
);
749 /* Converter starts out with the SUBSTITUTE callback set. */
751 /* initialize our callback */
752 /* from the 'bottom' innermost, out
753 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
756 printf("flagCB_fromU = %p\n", &flagCB_fromU
);
757 printf("debugCB_fromU = %p\n", &debugCB_fromU
);
760 debugCtx1
= debugCB_openContext();
761 flagCtx
= flagCB_fromU_openContext();
762 debugCtx2
= debugCB_openContext();
764 debugCtx1
->subCallback
= flagCB_fromU
; /* debug1 -> flag */
765 debugCtx1
->subContext
= flagCtx
;
767 flagCtx
->subCallback
= debugCB_fromU
; /* flag -> debug2 */
768 flagCtx
->subContext
= debugCtx2
;
770 debugCtx2
->subCallback
= UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
771 debugCtx2
->subContext
= NULL
;
773 /* Set our special callback */
775 ucnv_setFromUCallBack(conv
,
778 &(debugCtx2
->subCallback
),
779 &(debugCtx2
->subContext
),
785 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
786 conv
, debugCtx1
, debugCtx1
->subCallback
,
787 debugCtx1
->subContext
, flagCtx
, debugCtx2
, debugCtx2
->subCallback
);
790 cloneCnv
= ucnv_safeClone(conv
, NULL
, NULL
, &status
);
795 printf("Cloned converter from %p -> %p. Closing %p.\n", conv
, cloneCnv
, conv
);
801 printf("%p closed.\n", conv
);
805 /* Now, we have to extract the context */
806 cloneDebugCtx
= NULL
;
809 ucnv_getFromUCallBack(cloneCnv
, &junkCB
, (const void **)&cloneDebugCtx
);
810 if(cloneDebugCtx
!= NULL
) {
811 cloneFlagCtx
= (FromUFLAGContext
*) cloneDebugCtx
-> subContext
;
814 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
815 cloneCnv
, cloneDebugCtx
, cloneFlagCtx
, cloneFlagCtx
?cloneFlagCtx
->subContext
:NULL
);
817 len2
= ucnv_fromUChars(cloneCnv
, bytes
, 100, uchars
, len
, &status
);
820 if(cloneFlagCtx
!= NULL
) {
821 flagVal
= cloneFlagCtx
->flag
; /* it's about to go away when we close the cnv */
823 printf("** Warning, couldn't get the subcallback \n");
826 ucnv_close(cloneCnv
);
828 /* print out the original source */
829 printBytes("bytes", bytes
, len2
);
831 return flagVal
; /* true if callback was called */
834 UErrorCode
convsample_21()
836 const char *sample1
= "abc\xdf\xbf";
837 const char *sample2
= "abc_def";
839 if(convsample_21_didSubstitute(sample1
))
841 printf("DID substitute.\n******\n");
845 printf("Did NOT substitute.\n*****\n");
848 if(convsample_21_didSubstitute(sample2
))
850 printf("DID substitute.\n******\n");
854 printf("Did NOT substitute.\n*****\n");
861 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
863 #define BUFFERSIZE 17 /* make it interesting :) */
865 UErrorCode
convsample_40()
867 printf("\n\n==============================================\n"
868 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
873 char inBuf
[BUFFERSIZE
];
875 const char *sourceLimit
;
879 int32_t uBufSize
= 0;
880 UConverter
*conv
= NULL
;
881 UErrorCode status
= U_ZERO_ERROR
;
882 uint32_t inbytes
=0, total
=0;
884 f
= fopen("data02.bin", "rb");
887 fprintf(stderr
, "Couldn't open file 'data02.bin' (cp37 data file).\n");
888 return U_FILE_ACCESS_ERROR
;
891 out
= fopen("data40.utf16", "wb");
894 fprintf(stderr
, "Couldn't create file 'data40.utf16'.\n");
896 return U_FILE_ACCESS_ERROR
;
899 // **************************** START SAMPLE *******************
900 conv
= ucnv_openCCSID(37, UCNV_IBM
, &status
);
901 assert(U_SUCCESS(status
));
903 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
904 printf("input bytes %d / min chars %d = %d UChars\n",
905 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
906 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
909 // grab another buffer's worth
911 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
915 // Convert bytes to unicode
917 sourceLimit
= inBuf
+ count
;
922 targetLimit
= uBuf
+ uBufSize
;
924 ucnv_toUnicode( conv
, &target
, targetLimit
,
925 &source
, sourceLimit
, NULL
,
926 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
927 /* is true (when no more data will come) */
930 if(status
== U_BUFFER_OVERFLOW_ERROR
)
932 // simply ran out of space - we'll reset the target ptr the next
933 // time through the loop.
934 status
= U_ZERO_ERROR
;
938 // Check other errors here.
939 assert(U_SUCCESS(status
));
940 // Break out of the loop (by force)
943 // Process the Unicode
944 // Todo: handle UTF-16/surrogates
945 assert(fwrite(uBuf
, sizeof(uBuf
[0]), (target
-uBuf
), out
) ==
946 (size_t)(target
-uBuf
));
947 total
+= (target
-uBuf
);
948 } while (source
< sourceLimit
); // while simply out of space
951 printf("%d bytes in, %d UChars out.\n", inbytes
, total
);
953 // ***************************** END SAMPLE ********************
966 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
968 #define BUFFERSIZE 24 /* make it interesting :) */
970 UErrorCode
convsample_46()
972 printf("\n\n==============================================\n"
973 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
978 UChar inBuf
[BUFFERSIZE
];
980 const UChar
*sourceLimit
;
986 UConverter
*conv
= NULL
;
987 UErrorCode status
= U_ZERO_ERROR
;
988 uint32_t inchars
=0, total
=0;
990 f
= fopen("data40.utf16", "rb");
993 fprintf(stderr
, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
994 return U_FILE_ACCESS_ERROR
;
997 out
= fopen("data46.out", "wb");
1000 fprintf(stderr
, "Couldn't create file 'data46.out'.\n");
1002 return U_FILE_ACCESS_ERROR
;
1005 // **************************** START SAMPLE *******************
1006 conv
= ucnv_open( "iso-8859-2", &status
);
1007 assert(U_SUCCESS(status
));
1009 bufSize
= (BUFFERSIZE
*ucnv_getMaxCharSize(conv
));
1010 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1011 BUFFERSIZE
, ucnv_getMaxCharSize(conv
), bufSize
);
1012 buf
= (char*)malloc(bufSize
* sizeof(char));
1015 // grab another buffer's worth
1017 ((count
=fread(inBuf
, sizeof(UChar
), BUFFERSIZE
, f
)) > 0) )
1021 // Convert bytes to unicode
1023 sourceLimit
= inBuf
+ count
;
1028 targetLimit
= buf
+ bufSize
;
1030 ucnv_fromUnicode( conv
, &target
, targetLimit
,
1031 &source
, sourceLimit
, NULL
,
1032 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
1033 /* is true (when no more data will come) */
1036 if(status
== U_BUFFER_OVERFLOW_ERROR
)
1038 // simply ran out of space - we'll reset the target ptr the next
1039 // time through the loop.
1040 status
= U_ZERO_ERROR
;
1044 // Check other errors here.
1045 assert(U_SUCCESS(status
));
1046 // Break out of the loop (by force)
1049 // Process the Unicode
1050 assert(fwrite(buf
, sizeof(buf
[0]), (target
-buf
), out
) ==
1051 (size_t)(target
-buf
));
1052 total
+= (target
-buf
);
1053 } while (source
< sourceLimit
); // while simply out of space
1056 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars
, inchars
* sizeof(UChar
), total
);
1058 // ***************************** END SAMPLE ********************
1065 return U_ZERO_ERROR
;
1069 #define BUFFERSIZE 219
1071 void convsample_50() {
1072 printf("\n\n==============================================\n"
1073 "Sample 50: C: ucnv_detectUnicodeSignature\n");
1075 //! [ucnv_detectUnicodeSignature]
1076 UErrorCode err
= U_ZERO_ERROR
;
1077 UBool discardSignature
= TRUE
; /* set to TRUE to throw away the initial U+FEFF */
1078 char input
[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
1079 int32_t signatureLength
= 0;
1080 const char *encoding
= ucnv_detectUnicodeSignature(input
,sizeof(input
),&signatureLength
,&err
);
1081 UConverter
*conv
= NULL
;
1083 UChar
*target
= output
, *out
;
1084 const char *source
= input
;
1085 if(encoding
!=NULL
&& U_SUCCESS(err
)){
1086 // should signature be discarded ?
1087 conv
= ucnv_open(encoding
, &err
);
1088 // do the conversion
1089 ucnv_toUnicode(conv
,
1090 &target
, output
+ UPRV_LENGTHOF(output
),
1091 &source
, input
+ sizeof(input
),
1094 if (discardSignature
){
1095 ++out
; // ignore initial U+FEFF
1097 while(out
!= target
) {
1098 printf("%04x ", *out
++);
1102 //! [ucnv_detectUnicodeSignature]
1113 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1115 convsample_02(); // C , u->koi8r, conv
1116 convsample_03(); // C, iterate
1118 convsample_05(); // C, utf8->u, getNextUChar
1119 convsample_06(); // C freq counter thingy
1121 convsample_12(); // C, sjis->u, conv
1122 convsample_13(); // C, big5->u, getNextU
1124 convsample_20(); // C, callback
1125 convsample_21(); // C, callback debug
1127 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1129 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1131 convsample_50(); // C, detect unicode signature
1133 printf("End of converter samples.\n");