X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..249c4c5ea9376c24572daf9c2effa7484a282f14:/icuSources/samples/ucnv/convsamp.cpp diff --git a/icuSources/samples/ucnv/convsamp.cpp b/icuSources/samples/ucnv/convsamp.cpp index 506a1b64..a692b77b 100644 --- a/icuSources/samples/ucnv/convsamp.cpp +++ b/icuSources/samples/ucnv/convsamp.cpp @@ -1,6 +1,12 @@ -/************************************************************************** +/************************************************************************* * -* Copyright (C) 2000-2010, International Business Machines +* © 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html#License +* +************************************************************************** +************************************************************************** +* +* Copyright (C) 2000-2016, International Business Machines * Corporation and others. All Rights Reserved. * *************************************************************************** @@ -43,6 +49,9 @@ #include "flagcb.h" /* Some utility functions */ +#ifndef UPRV_LENGTHOF +#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#endif static const UChar kNone[] = { 0x0000 }; @@ -58,20 +67,13 @@ void prettyPrintUChar(UChar c) char buf[1000]; UErrorCode status = U_ZERO_ERROR; int32_t o; - - o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status); + + o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status); if(U_SUCCESS(status) && (o>0) ) { buf[6] = 0; printf("%7s", buf); } else { - o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status); - if(U_SUCCESS(status) && (o>0)) { - buf[5] = 0; - printf("~%6s", buf); - } - else { - printf(" ??????"); - } + printf(" ??????"); } } else { switch((char)(c & 0x007F)) { @@ -128,7 +130,7 @@ void printBytes(const char *name = "?", int32_t i; if( (len == -1) && (uch) ) { - len = strlen(uch); + len = static_cast(strlen(uch)); } printf("%5s: ", name); @@ -212,7 +214,9 @@ UErrorCode convsample_02() int32_t len; // set up the converter + //! [ucnv_open] conv = ucnv_open("koi8-r", &status); + //! [ucnv_open] assert(U_SUCCESS(status)); // convert to koi8-r @@ -325,7 +329,7 @@ UErrorCode convsample_05() // grab another buffer's worth while((!feof(f)) && - ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) ) + ((count=static_cast(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) { // Convert bytes to unicode source = inBuf; @@ -397,7 +401,6 @@ UErrorCode convsample_06() char inBuf[BUFFERSIZE]; const char *source; const char *sourceLimit; - UChar *uBuf; int32_t uBufSize = 0; UConverter *conv; UErrorCode status = U_ZERO_ERROR; @@ -421,7 +424,7 @@ UErrorCode convsample_06() info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount); if(!info) { - fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount); + fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", static_cast(sizeof(CharFreqInfo)*charCount)); } /* reset frequencies */ @@ -438,12 +441,10 @@ UErrorCode convsample_06() uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv)); printf("input bytes %d / min chars %d = %d UChars\n", BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize); - uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); - assert(uBuf!=NULL); // grab another buffer's worth while((!feof(f)) && - ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) ) + ((count=static_cast(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) { // Convert bytes to unicode source = inBuf; @@ -473,6 +474,9 @@ UErrorCode convsample_06() if(p>charCount) { fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p); + free(info); + fclose(f); + ucnv_close(conv); return U_UNSUPPORTED_ERROR; } info[p].frequency++; @@ -541,7 +545,7 @@ UErrorCode convsample_12() // convert to Unicode // Note: we can use strlen, we know it's an 8 bit null terminated codepage target[6] = 0xFDCA; - len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status); + len = ucnv_toUChars(conv, target, 100, source, static_cast(strlen(source)), &status); U_ASSERT(status); // close the converter ucnv_close(conv); @@ -549,7 +553,7 @@ UErrorCode convsample_12() // ***************************** END SAMPLE ******************** // Print it out - printBytes("src", source, strlen(source) ); + printBytes("src", source, static_cast(strlen(source)) ); printf("\n"); printUChars("targ", target, len); @@ -586,7 +590,7 @@ UErrorCode convsample_13() // **************************** START SAMPLE ******************* - printBytes("src",source,sourceLimit-source); + printBytes("src", source, static_cast(sourceLimit - source)); while(source < sourceLimit) { @@ -636,7 +640,7 @@ UBool convsample_20_didSubstitute(const char *source) conv = ucnv_open("utf-8", &status); U_ASSERT(status); - len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); + len = ucnv_toUChars(conv, uchars, 100, source, static_cast(strlen(source)), &status); U_ASSERT(status); printUChars("uch", uchars, len); @@ -715,7 +719,6 @@ UBool convsample_21_didSubstitute(const char *source) UConverter *conv = NULL, *cloneCnv = NULL; UErrorCode status = U_ZERO_ERROR; uint32_t len, len2; - int32_t cloneLen; UBool flagVal = FALSE; UConverterFromUCallback junkCB; @@ -737,7 +740,7 @@ UBool convsample_21_didSubstitute(const char *source) conv = ucnv_open("utf-8", &status); U_ASSERT(status); - len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); + len = ucnv_toUChars(conv, uchars, 100, source, static_cast(strlen(source)), &status); U_ASSERT(status); printUChars("uch", uchars, len); @@ -791,8 +794,7 @@ UBool convsample_21_didSubstitute(const char *source) debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback); #endif - cloneLen = 1; /* but passing in null so it will clone */ - cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status); + cloneCnv = ucnv_safeClone(conv, NULL, NULL, &status); U_ASSERT(status); @@ -913,7 +915,7 @@ UErrorCode convsample_40() // grab another buffer's worth while((!feof(f)) && - ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) ) + ((count=static_cast(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) { inbytes += count; @@ -947,9 +949,8 @@ UErrorCode convsample_40() // Process the Unicode // Todo: handle UTF-16/surrogates - assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == - (size_t)(target-uBuf)); - total += (target-uBuf); + assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == (size_t)(target-uBuf)); + total += static_cast((target-uBuf)); } while (source < sourceLimit); // while simply out of space } @@ -1019,7 +1020,7 @@ UErrorCode convsample_46() // grab another buffer's worth while((!feof(f)) && - ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) ) + ((count=static_cast(fread(inBuf, sizeof(UChar), BUFFERSIZE , f))) > 0) ) { inchars += count; @@ -1052,13 +1053,12 @@ UErrorCode convsample_46() } // Process the Unicode - assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) == - (size_t)(target-buf)); - total += (target-buf); + assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) == (size_t)(target-buf)); + total += static_cast((target-buf)); } while (source < sourceLimit); // while simply out of space } - printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total); + printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, static_cast(inchars * sizeof(UChar)), total); // ***************************** END SAMPLE ******************** ucnv_close(conv); @@ -1073,6 +1073,42 @@ UErrorCode convsample_46() #define BUFFERSIZE 219 +void convsample_50() { + printf("\n\n==============================================\n" + "Sample 50: C: ucnv_detectUnicodeSignature\n"); + + //! [ucnv_detectUnicodeSignature] + UErrorCode err = U_ZERO_ERROR; + UBool discardSignature = TRUE; /* set to TRUE to throw away the initial U+FEFF */ + char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' }; + int32_t signatureLength = 0; + const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err); + UConverter *conv = NULL; + UChar output[100]; + UChar *target = output, *out; + const char *source = input; + if(encoding!=NULL && U_SUCCESS(err)){ + // should signature be discarded ? + conv = ucnv_open(encoding, &err); + // do the conversion + ucnv_toUnicode(conv, + &target, output + UPRV_LENGTHOF(output), + &source, input + sizeof(input), + NULL, TRUE, &err); + out = output; + if (discardSignature){ + ++out; // ignore initial U+FEFF + } + while(out != target) { + printf("%04x ", *out++); + } + puts(""); + } + //! [ucnv_detectUnicodeSignature] + puts(""); +} + + /* main */ @@ -1096,6 +1132,8 @@ int main() convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16] convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out] + + convsample_50(); // C, detect unicode signature printf("End of converter samples.\n");