/**************************************************************************
*
-* Copyright (C) 2000-2010, International Business Machines
+* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
***************************************************************************
char buf[1000];
UErrorCode status = U_ZERO_ERROR;
int32_t o;
-
- o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
+
+ o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
if(U_SUCCESS(status) && (o>0) ) {
buf[6] = 0;
printf("%7s", buf);
} else {
- o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
- if(U_SUCCESS(status) && (o>0)) {
- buf[5] = 0;
- printf("~%6s", buf);
- }
- else {
- printf(" ??????");
- }
+ printf(" ??????");
}
} else {
switch((char)(c & 0x007F)) {
int32_t len;
// set up the converter
+ //! [ucnv_open]
conv = ucnv_open("koi8-r", &status);
+ //! [ucnv_open]
assert(U_SUCCESS(status));
// convert to koi8-r
char inBuf[BUFFERSIZE];
const char *source;
const char *sourceLimit;
- UChar *uBuf;
int32_t uBufSize = 0;
UConverter *conv;
UErrorCode status = U_ZERO_ERROR;
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
printf("input bytes %d / min chars %d = %d UChars\n",
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
- uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
- assert(uBuf!=NULL);
// grab another buffer's worth
while((!feof(f)) &&
if(p>charCount)
{
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
+ free(info);
+ fclose(f);
+ ucnv_close(conv);
return U_UNSUPPORTED_ERROR;
}
info[p].frequency++;
#define BUFFERSIZE 219
+void convsample_50() {
+ printf("\n\n==============================================\n"
+ "Sample 50: C: ucnv_detectUnicodeSignature\n");
+
+ //! [ucnv_detectUnicodeSignature]
+ UErrorCode err = U_ZERO_ERROR;
+ UBool discardSignature = TRUE; /* set to TRUE to throw away the initial U+FEFF */
+ char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
+ int32_t signatureLength = 0;
+ const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
+ UConverter *conv = NULL;
+ UChar output[100];
+ UChar *target = output, *out;
+ const char *source = input;
+ if(encoding!=NULL && U_SUCCESS(err)){
+ // should signature be discarded ?
+ conv = ucnv_open(encoding, &err);
+ // do the conversion
+ ucnv_toUnicode(conv,
+ &target, output + sizeof(output)/U_SIZEOF_UCHAR,
+ &source, input + sizeof(input),
+ NULL, TRUE, &err);
+ out = output;
+ if (discardSignature){
+ ++out; // ignore initial U+FEFF
+ }
+ while(out != target) {
+ printf("%04x ", *out++);
+ }
+ puts("");
+ }
+ //! [ucnv_detectUnicodeSignature]
+ puts("");
+}
+
+
/* main */
convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
+
+ convsample_50(); // C, detect unicode signature
printf("End of converter samples.\n");