1 /**************************************************************************
3 * Copyright (C) 2000-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
6 ***************************************************************************
7 * file name: convsamp.c
8 * encoding: ASCII (7-bit)
10 * created on: 2000may30
11 * created by: Steven R. Loomis
13 * Sample code for the ICU conversion routines.
15 * Note: Nothing special is needed to build this sample. Link with
16 * the icu UC and icu I18N libraries.
18 * I use 'assert' for error checking, you probably will want
19 * something more flexible. '***BEGIN SAMPLE***' and
20 * '***END SAMPLE***' mark pieces suitable for stand alone
24 * Each test can define it's own BUFFERSIZE
28 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
31 #include <ctype.h> /* for isspace, etc. */
34 #include <stdlib.h> /* malloc */
36 #include "unicode/utypes.h" /* Basic ICU data types */
37 #include "unicode/ucnv.h" /* C Converter API */
38 #include "unicode/ustring.h" /* some more string fcns*/
39 #include "unicode/uchar.h" /* char names */
40 #include "unicode/uloc.h"
41 #include "unicode/unistr.h"
45 /* Some utility functions */
47 static const UChar kNone
[] = { 0x0000 };
49 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
51 /* Print a UChar if possible, in seven characters. */
52 void prettyPrintUChar(UChar c
)
56 printf(" '%c' ", (char)(0x00FF&c
));
57 } else if ( c
> 0x007F ) {
59 UErrorCode status
= U_ZERO_ERROR
;
62 o
= u_charName(c
, U_EXTENDED_CHAR_NAME
, buf
, 1000, &status
);
63 if(U_SUCCESS(status
) && (o
>0) ) {
70 switch((char)(c
& 0x007F)) {
88 void printUChars(const char *name
= "?",
89 const UChar
*uch
= kNone
,
94 if( (len
== -1) && (uch
) ) {
98 printf("%5s: ", name
);
99 for( i
= 0; i
<len
; i
++) {
104 printf("%5s: ", "uni");
105 for( i
= 0; i
<len
; i
++) {
106 printf("\\u%04X ", (int)uch
[i
]);
110 printf("%5s:", "ch");
111 for( i
= 0; i
<len
; i
++) {
112 prettyPrintUChar(uch
[i
]);
117 void printBytes(const char *name
= "?",
118 const char *uch
= "",
123 if( (len
== -1) && (uch
) ) {
127 printf("%5s: ", name
);
128 for( i
= 0; i
<len
; i
++) {
133 printf("%5s: ", "uni");
134 for( i
= 0; i
<len
; i
++) {
135 printf("\\x%02X ", 0x00FF & (int)uch
[i
]);
139 printf("%5s:", "ch");
140 for( i
= 0; i
<len
; i
++) {
141 if(isgraph(0x00FF & (int)uch
[i
])) {
142 printf(" '%c' ", (char)uch
[i
]);
150 void printUChar(UChar32 ch32
)
153 printf("ch: U+%06X\n", ch32
);
156 UChar ch
= (UChar
)ch32
;
157 printUChars("C", &ch
, 1);
161 /*******************************************************************
162 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
163 followed by an exclamation mark (!) into the KOI8-R Russian code page.
165 This example first creates a UChar String out of the Unicode chars.
167 targetSize must be set to the amount of space available in the target
168 buffer. After fromUChars is called,
169 len will contain the number of bytes in target[] which were
170 used in the resulting codepage. In this case, there is a 1:1 mapping
171 between the input and output characters. The exclamation mark has the
172 same value in both KOI8-R and Unicode.
175 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
176 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
179 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
183 Converting FROM unicode
185 You must call ucnv_close to clean up the memory used by the
188 'len' returns the number of OUTPUT bytes resulting from the
192 UErrorCode
convsample_02()
194 printf("\n\n==============================================\n"
195 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
198 // **************************** START SAMPLE *******************
200 UChar source
[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
201 0x0430, 0x0021, 0x0000 };
203 UErrorCode status
= U_ZERO_ERROR
;
207 // set up the converter
209 conv
= ucnv_open("koi8-r", &status
);
211 assert(U_SUCCESS(status
));
214 len
= ucnv_fromUChars(conv
, target
, 100, source
, -1, &status
);
215 assert(U_SUCCESS(status
));
217 // close the converter
220 // ***************************** END SAMPLE ********************
223 printUChars("src", source
);
225 printBytes("targ", target
, len
);
231 UErrorCode
convsample_03()
233 printf("\n\n==============================================\n"
234 "Sample 03: C: print out all converters\n");
239 // **************************** START SAMPLE *******************
240 count
= ucnv_countAvailable();
241 printf("Available converters: %d\n", count
);
245 printf("%s ", ucnv_getAvailableName(i
));
248 // ***************************** END SAMPLE ********************
257 #define BUFFERSIZE 17 /* make it interesting :) */
260 Converting from a codepage to Unicode in bulk..
261 What is the best way to determine the buffer size?
263 The 'buffersize' is in bytes of input.
264 For a given converter, divinding this by the minimum char size
265 give you the maximum number of Unicode characters that could be
266 expected for a given number of input bytes.
267 see: ucnv_getMinCharSize()
269 For example, a single byte codepage like 'Latin-3' has a
270 minimum char size of 1. (It takes at least 1 byte to represent
271 each Unicode char.) So the unicode buffer has the same number of
272 UChars as the input buffer has bytes.
274 In a strictly double byte codepage such as cp1362 (Windows
275 Korean), the minimum char size is 2. So, only half as many Unicode
276 chars as bytes are needed.
278 This work to calculate the buffer size is an optimization. Any
279 size of input and output buffer can be used, as long as the
280 program handles the following cases: If the input buffer is empty,
281 the source pointer will be equal to sourceLimit. If the output
282 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
285 UErrorCode
convsample_05()
287 printf("\n\n==============================================\n"
288 "Sample 05: C: count the number of letters in a UTF-8 document\n");
292 char inBuf
[BUFFERSIZE
];
294 const char *sourceLimit
;
299 int32_t uBufSize
= 0;
301 UErrorCode status
= U_ZERO_ERROR
;
302 uint32_t letters
=0, total
=0;
304 f
= fopen("data01.txt", "r");
307 fprintf(stderr
, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
308 return U_FILE_ACCESS_ERROR
;
311 // **************************** START SAMPLE *******************
312 conv
= ucnv_open("utf-8", &status
);
313 assert(U_SUCCESS(status
));
315 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
316 printf("input bytes %d / min chars %d = %d UChars\n",
317 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
318 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
321 // grab another buffer's worth
323 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
325 // Convert bytes to unicode
327 sourceLimit
= inBuf
+ count
;
332 targetLimit
= uBuf
+ uBufSize
;
334 ucnv_toUnicode(conv
, &target
, targetLimit
,
335 &source
, sourceLimit
, NULL
,
336 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
337 /* is true (when no more data will come) */
340 if(status
== U_BUFFER_OVERFLOW_ERROR
)
342 // simply ran out of space - we'll reset the target ptr the next
343 // time through the loop.
344 status
= U_ZERO_ERROR
;
348 // Check other errors here.
349 assert(U_SUCCESS(status
));
350 // Break out of the loop (by force)
353 // Process the Unicode
354 // Todo: handle UTF-16/surrogates
356 for(p
= uBuf
; p
<target
; p
++)
362 } while (source
< sourceLimit
); // while simply out of space
365 printf("%d letters out of %d total UChars.\n", letters
, total
);
367 // ***************************** END SAMPLE ********************
378 #define BUFFERSIZE 1024
385 UErrorCode
convsample_06()
387 printf("\n\n==============================================\n"
388 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
392 char inBuf
[BUFFERSIZE
];
394 const char *sourceLimit
;
395 int32_t uBufSize
= 0;
397 UErrorCode status
= U_ZERO_ERROR
;
398 uint32_t letters
=0, total
=0;
401 UChar32 charCount
= 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
408 f
= fopen("data06.txt", "r");
411 fprintf(stderr
, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
412 return U_FILE_ACCESS_ERROR
;
415 info
= (CharFreqInfo
*)malloc(sizeof(CharFreqInfo
) * charCount
);
418 fprintf(stderr
, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo
)*charCount
);
421 /* reset frequencies */
422 for(p
=0;p
<charCount
;p
++)
424 info
[p
].codepoint
= p
;
425 info
[p
].frequency
= 0;
428 // **************************** START SAMPLE *******************
429 conv
= ucnv_open("utf-8", &status
);
430 assert(U_SUCCESS(status
));
432 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
433 printf("input bytes %d / min chars %d = %d UChars\n",
434 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
436 // grab another buffer's worth
438 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
440 // Convert bytes to unicode
442 sourceLimit
= inBuf
+ count
;
444 while(source
< sourceLimit
)
446 p
= ucnv_getNextUChar(conv
, &source
, sourceLimit
, &status
);
447 if(U_FAILURE(status
))
449 fprintf(stderr
, "%s @ %d\n", u_errorName(status
), total
);
450 status
= U_ZERO_ERROR
;
459 if((u_tolower(l
) == 'i') && (u_tolower(p
) == 'e'))
462 if((u_tolower(l
) == 'g') && (u_tolower(p
) == 0x0127))
467 fprintf(stderr
, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p
);
471 return U_UNSUPPORTED_ERROR
;
481 printf("%d letters out of %d total UChars.\n", letters
, total
);
482 printf("%d ie digraphs, %d gh digraphs.\n", ie
, gh
);
484 // now, we could sort it..
486 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
488 for(p
=0;p
<charCount
;p
++)
490 if(info
[p
].frequency
)
492 printf("% 5d U+%06X ", info
[p
].frequency
, p
);
495 prettyPrintUChar((UChar
)p
);
501 // ***************************** END SAMPLE ********************
510 /******************************************************
511 You must call ucnv_close to clean up the memory used by the
514 'len' returns the number of OUTPUT bytes resulting from the
518 UErrorCode
convsample_12()
520 printf("\n\n==============================================\n"
521 "Sample 12: C: simple sjis -> unicode conversion\n");
524 // **************************** START SAMPLE *******************
526 char source
[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
528 UErrorCode status
= U_ZERO_ERROR
;
532 // set up the converter
533 conv
= ucnv_open("shift_jis", &status
);
534 assert(U_SUCCESS(status
));
536 // convert to Unicode
537 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
539 len
= ucnv_toUChars(conv
, target
, 100, source
, strlen(source
), &status
);
541 // close the converter
544 // ***************************** END SAMPLE ********************
547 printBytes("src", source
, strlen(source
) );
549 printUChars("targ", target
, len
);
554 /******************************************************************
555 C: Convert from codepage to Unicode one at a time.
558 UErrorCode
convsample_13()
560 printf("\n\n==============================================\n"
561 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
564 const char sourceChars
[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
565 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
566 const char *source
, *sourceLimit
;
568 UErrorCode status
= U_ZERO_ERROR
;
569 UConverter
*conv
= NULL
;
573 srcCount
= sizeof(sourceChars
);
575 conv
= ucnv_open("Big5", &status
);
578 source
= sourceChars
;
579 sourceLimit
= sourceChars
+ sizeof(sourceChars
);
581 // **************************** START SAMPLE *******************
584 printBytes("src",source
,sourceLimit
-source
);
586 while(source
< sourceLimit
)
589 target
= ucnv_getNextUChar (conv
,
594 // printBytes("src",source,sourceLimit-source);
601 // ************************** END SAMPLE *************************
603 printf("src=%d bytes, dst=%d uchars\n", srcCount
, dstCount
);
612 UBool
convsample_20_didSubstitute(const char *source
)
616 UConverter
*conv
= NULL
;
617 UErrorCode status
= U_ZERO_ERROR
;
621 FromUFLAGContext
* context
= NULL
;
623 printf("\n\n==============================================\n"
624 "Sample 20: C: Test for substitution using callbacks\n");
626 /* print out the original source */
627 printBytes("src", source
);
630 /* First, convert from UTF8 to unicode */
631 conv
= ucnv_open("utf-8", &status
);
634 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
637 printUChars("uch", uchars
, len
);
640 /* Now, close the converter */
643 /* Now, convert to windows-1252 */
644 conv
= ucnv_open("windows-1252", &status
);
647 /* Converter starts out with the SUBSTITUTE callback set. */
649 /* initialize our callback */
650 context
= flagCB_fromU_openContext();
652 /* Set our special callback */
653 ucnv_setFromUCallBack(conv
,
656 &(context
->subCallback
),
657 &(context
->subContext
),
662 len2
= ucnv_fromUChars(conv
, bytes
, 100, uchars
, len
, &status
);
665 flagVal
= context
->flag
; /* it's about to go away when we close the cnv */
669 /* print out the original source */
670 printBytes("bytes", bytes
, len2
);
672 return flagVal
; /* true if callback was called */
675 UErrorCode
convsample_20()
677 const char *sample1
= "abc\xdf\xbf";
678 const char *sample2
= "abc_def";
681 if(convsample_20_didSubstitute(sample1
))
683 printf("DID substitute.\n******\n");
687 printf("Did NOT substitute.\n*****\n");
690 if(convsample_20_didSubstitute(sample2
))
692 printf("DID substitute.\n******\n");
696 printf("Did NOT substitute.\n*****\n");
702 // 21 - C, callback, with clone and debug
706 UBool
convsample_21_didSubstitute(const char *source
)
710 UConverter
*conv
= NULL
, *cloneCnv
= NULL
;
711 UErrorCode status
= U_ZERO_ERROR
;
714 UBool flagVal
= FALSE
;
715 UConverterFromUCallback junkCB
;
717 FromUFLAGContext
*flagCtx
= NULL
,
718 *cloneFlagCtx
= NULL
;
720 debugCBContext
*debugCtx1
= NULL
,
722 *cloneDebugCtx
= NULL
;
724 printf("\n\n==============================================\n"
725 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
727 /* print out the original source */
728 printBytes("src", source
);
731 /* First, convert from UTF8 to unicode */
732 conv
= ucnv_open("utf-8", &status
);
735 len
= ucnv_toUChars(conv
, uchars
, 100, source
, strlen(source
), &status
);
738 printUChars("uch", uchars
, len
);
741 /* Now, close the converter */
744 /* Now, convert to windows-1252 */
745 conv
= ucnv_open("windows-1252", &status
);
748 /* Converter starts out with the SUBSTITUTE callback set. */
750 /* initialize our callback */
751 /* from the 'bottom' innermost, out
752 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
755 printf("flagCB_fromU = %p\n", &flagCB_fromU
);
756 printf("debugCB_fromU = %p\n", &debugCB_fromU
);
759 debugCtx1
= debugCB_openContext();
760 flagCtx
= flagCB_fromU_openContext();
761 debugCtx2
= debugCB_openContext();
763 debugCtx1
->subCallback
= flagCB_fromU
; /* debug1 -> flag */
764 debugCtx1
->subContext
= flagCtx
;
766 flagCtx
->subCallback
= debugCB_fromU
; /* flag -> debug2 */
767 flagCtx
->subContext
= debugCtx2
;
769 debugCtx2
->subCallback
= UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
770 debugCtx2
->subContext
= NULL
;
772 /* Set our special callback */
774 ucnv_setFromUCallBack(conv
,
777 &(debugCtx2
->subCallback
),
778 &(debugCtx2
->subContext
),
784 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
785 conv
, debugCtx1
, debugCtx1
->subCallback
,
786 debugCtx1
->subContext
, flagCtx
, debugCtx2
, debugCtx2
->subCallback
);
789 cloneCnv
= ucnv_safeClone(conv
, NULL
, NULL
, &status
);
794 printf("Cloned converter from %p -> %p. Closing %p.\n", conv
, cloneCnv
, conv
);
800 printf("%p closed.\n", conv
);
804 /* Now, we have to extract the context */
805 cloneDebugCtx
= NULL
;
808 ucnv_getFromUCallBack(cloneCnv
, &junkCB
, (const void **)&cloneDebugCtx
);
809 if(cloneDebugCtx
!= NULL
) {
810 cloneFlagCtx
= (FromUFLAGContext
*) cloneDebugCtx
-> subContext
;
813 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
814 cloneCnv
, cloneDebugCtx
, cloneFlagCtx
, cloneFlagCtx
?cloneFlagCtx
->subContext
:NULL
);
816 len2
= ucnv_fromUChars(cloneCnv
, bytes
, 100, uchars
, len
, &status
);
819 if(cloneFlagCtx
!= NULL
) {
820 flagVal
= cloneFlagCtx
->flag
; /* it's about to go away when we close the cnv */
822 printf("** Warning, couldn't get the subcallback \n");
825 ucnv_close(cloneCnv
);
827 /* print out the original source */
828 printBytes("bytes", bytes
, len2
);
830 return flagVal
; /* true if callback was called */
833 UErrorCode
convsample_21()
835 const char *sample1
= "abc\xdf\xbf";
836 const char *sample2
= "abc_def";
838 if(convsample_21_didSubstitute(sample1
))
840 printf("DID substitute.\n******\n");
844 printf("Did NOT substitute.\n*****\n");
847 if(convsample_21_didSubstitute(sample2
))
849 printf("DID substitute.\n******\n");
853 printf("Did NOT substitute.\n*****\n");
860 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
862 #define BUFFERSIZE 17 /* make it interesting :) */
864 UErrorCode
convsample_40()
866 printf("\n\n==============================================\n"
867 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
872 char inBuf
[BUFFERSIZE
];
874 const char *sourceLimit
;
878 int32_t uBufSize
= 0;
879 UConverter
*conv
= NULL
;
880 UErrorCode status
= U_ZERO_ERROR
;
881 uint32_t inbytes
=0, total
=0;
883 f
= fopen("data02.bin", "rb");
886 fprintf(stderr
, "Couldn't open file 'data02.bin' (cp37 data file).\n");
887 return U_FILE_ACCESS_ERROR
;
890 out
= fopen("data40.utf16", "wb");
893 fprintf(stderr
, "Couldn't create file 'data40.utf16'.\n");
895 return U_FILE_ACCESS_ERROR
;
898 // **************************** START SAMPLE *******************
899 conv
= ucnv_openCCSID(37, UCNV_IBM
, &status
);
900 assert(U_SUCCESS(status
));
902 uBufSize
= (BUFFERSIZE
/ucnv_getMinCharSize(conv
));
903 printf("input bytes %d / min chars %d = %d UChars\n",
904 BUFFERSIZE
, ucnv_getMinCharSize(conv
), uBufSize
);
905 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
908 // grab another buffer's worth
910 ((count
=fread(inBuf
, 1, BUFFERSIZE
, f
)) > 0) )
914 // Convert bytes to unicode
916 sourceLimit
= inBuf
+ count
;
921 targetLimit
= uBuf
+ uBufSize
;
923 ucnv_toUnicode( conv
, &target
, targetLimit
,
924 &source
, sourceLimit
, NULL
,
925 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
926 /* is true (when no more data will come) */
929 if(status
== U_BUFFER_OVERFLOW_ERROR
)
931 // simply ran out of space - we'll reset the target ptr the next
932 // time through the loop.
933 status
= U_ZERO_ERROR
;
937 // Check other errors here.
938 assert(U_SUCCESS(status
));
939 // Break out of the loop (by force)
942 // Process the Unicode
943 // Todo: handle UTF-16/surrogates
944 assert(fwrite(uBuf
, sizeof(uBuf
[0]), (target
-uBuf
), out
) ==
945 (size_t)(target
-uBuf
));
946 total
+= (target
-uBuf
);
947 } while (source
< sourceLimit
); // while simply out of space
950 printf("%d bytes in, %d UChars out.\n", inbytes
, total
);
952 // ***************************** END SAMPLE ********************
965 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
967 #define BUFFERSIZE 24 /* make it interesting :) */
969 UErrorCode
convsample_46()
971 printf("\n\n==============================================\n"
972 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
977 UChar inBuf
[BUFFERSIZE
];
979 const UChar
*sourceLimit
;
985 UConverter
*conv
= NULL
;
986 UErrorCode status
= U_ZERO_ERROR
;
987 uint32_t inchars
=0, total
=0;
989 f
= fopen("data40.utf16", "rb");
992 fprintf(stderr
, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
993 return U_FILE_ACCESS_ERROR
;
996 out
= fopen("data46.out", "wb");
999 fprintf(stderr
, "Couldn't create file 'data46.out'.\n");
1001 return U_FILE_ACCESS_ERROR
;
1004 // **************************** START SAMPLE *******************
1005 conv
= ucnv_open( "iso-8859-2", &status
);
1006 assert(U_SUCCESS(status
));
1008 bufSize
= (BUFFERSIZE
*ucnv_getMaxCharSize(conv
));
1009 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1010 BUFFERSIZE
, ucnv_getMaxCharSize(conv
), bufSize
);
1011 buf
= (char*)malloc(bufSize
* sizeof(char));
1014 // grab another buffer's worth
1016 ((count
=fread(inBuf
, sizeof(UChar
), BUFFERSIZE
, f
)) > 0) )
1020 // Convert bytes to unicode
1022 sourceLimit
= inBuf
+ count
;
1027 targetLimit
= buf
+ bufSize
;
1029 ucnv_fromUnicode( conv
, &target
, targetLimit
,
1030 &source
, sourceLimit
, NULL
,
1031 feof(f
)?TRUE
:FALSE
, /* pass 'flush' when eof */
1032 /* is true (when no more data will come) */
1035 if(status
== U_BUFFER_OVERFLOW_ERROR
)
1037 // simply ran out of space - we'll reset the target ptr the next
1038 // time through the loop.
1039 status
= U_ZERO_ERROR
;
1043 // Check other errors here.
1044 assert(U_SUCCESS(status
));
1045 // Break out of the loop (by force)
1048 // Process the Unicode
1049 assert(fwrite(buf
, sizeof(buf
[0]), (target
-buf
), out
) ==
1050 (size_t)(target
-buf
));
1051 total
+= (target
-buf
);
1052 } while (source
< sourceLimit
); // while simply out of space
1055 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars
, inchars
* sizeof(UChar
), total
);
1057 // ***************************** END SAMPLE ********************
1064 return U_ZERO_ERROR
;
1068 #define BUFFERSIZE 219
1070 void convsample_50() {
1071 printf("\n\n==============================================\n"
1072 "Sample 50: C: ucnv_detectUnicodeSignature\n");
1074 //! [ucnv_detectUnicodeSignature]
1075 UErrorCode err
= U_ZERO_ERROR
;
1076 UBool discardSignature
= TRUE
; /* set to TRUE to throw away the initial U+FEFF */
1077 char input
[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
1078 int32_t signatureLength
= 0;
1079 const char *encoding
= ucnv_detectUnicodeSignature(input
,sizeof(input
),&signatureLength
,&err
);
1080 UConverter
*conv
= NULL
;
1082 UChar
*target
= output
, *out
;
1083 const char *source
= input
;
1084 if(encoding
!=NULL
&& U_SUCCESS(err
)){
1085 // should signature be discarded ?
1086 conv
= ucnv_open(encoding
, &err
);
1087 // do the conversion
1088 ucnv_toUnicode(conv
,
1089 &target
, output
+ sizeof(output
)/U_SIZEOF_UCHAR
,
1090 &source
, input
+ sizeof(input
),
1093 if (discardSignature
){
1094 ++out
; // ignore initial U+FEFF
1096 while(out
!= target
) {
1097 printf("%04x ", *out
++);
1101 //! [ucnv_detectUnicodeSignature]
1112 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1114 convsample_02(); // C , u->koi8r, conv
1115 convsample_03(); // C, iterate
1117 convsample_05(); // C, utf8->u, getNextUChar
1118 convsample_06(); // C freq counter thingy
1120 convsample_12(); // C, sjis->u, conv
1121 convsample_13(); // C, big5->u, getNextU
1123 convsample_20(); // C, callback
1124 convsample_21(); // C, callback debug
1126 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1128 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1130 convsample_50(); // C, detect unicode signature
1132 printf("End of converter samples.\n");