-/**************************************************************************
+/*************************************************************************
*
-* Copyright (C) 2000-2010, International Business Machines
+* © 2016 and later: Unicode, Inc. and others.
+* License & terms of use: http://www.unicode.org/copyright.html#License
+*
+**************************************************************************
+**************************************************************************
+*
+* Copyright (C) 2000-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
***************************************************************************
#include "flagcb.h"
/* Some utility functions */
+#ifndef UPRV_LENGTHOF
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#endif
static const UChar kNone[] = { 0x0000 };
char buf[1000];
UErrorCode status = U_ZERO_ERROR;
int32_t o;
-
- o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
+
+ o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
if(U_SUCCESS(status) && (o>0) ) {
buf[6] = 0;
printf("%7s", buf);
} else {
- o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
- if(U_SUCCESS(status) && (o>0)) {
- buf[5] = 0;
- printf("~%6s", buf);
- }
- else {
- printf(" ??????");
- }
+ printf(" ??????");
}
} else {
switch((char)(c & 0x007F)) {
int32_t i;
if( (len == -1) && (uch) ) {
- len = strlen(uch);
+ len = static_cast<int32_t>(strlen(uch));
}
printf("%5s: ", name);
int32_t len;
// set up the converter
+ //! [ucnv_open]
conv = ucnv_open("koi8-r", &status);
+ //! [ucnv_open]
assert(U_SUCCESS(status));
// convert to koi8-r
// grab another buffer's worth
while((!feof(f)) &&
- ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
+ ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
{
// Convert bytes to unicode
source = inBuf;
char inBuf[BUFFERSIZE];
const char *source;
const char *sourceLimit;
- UChar *uBuf;
int32_t uBufSize = 0;
UConverter *conv;
UErrorCode status = U_ZERO_ERROR;
info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
if(!info)
{
- fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
+ fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", static_cast<int>(sizeof(CharFreqInfo)*charCount));
}
/* reset frequencies */
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
printf("input bytes %d / min chars %d = %d UChars\n",
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
- uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
- assert(uBuf!=NULL);
// grab another buffer's worth
while((!feof(f)) &&
- ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
+ ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
{
// Convert bytes to unicode
source = inBuf;
if(p>charCount)
{
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
+ free(info);
+ fclose(f);
+ ucnv_close(conv);
return U_UNSUPPORTED_ERROR;
}
info[p].frequency++;
// convert to Unicode
// Note: we can use strlen, we know it's an 8 bit null terminated codepage
target[6] = 0xFDCA;
- len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
+ len = ucnv_toUChars(conv, target, 100, source, static_cast<int32_t>(strlen(source)), &status);
U_ASSERT(status);
// close the converter
ucnv_close(conv);
// ***************************** END SAMPLE ********************
// Print it out
- printBytes("src", source, strlen(source) );
+ printBytes("src", source, static_cast<int32_t>(strlen(source)) );
printf("\n");
printUChars("targ", target, len);
// **************************** START SAMPLE *******************
- printBytes("src",source,sourceLimit-source);
+ printBytes("src", source, static_cast<int32_t>(sourceLimit - source));
while(source < sourceLimit)
{
conv = ucnv_open("utf-8", &status);
U_ASSERT(status);
- len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
+ len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status);
U_ASSERT(status);
printUChars("uch", uchars, len);
UConverter *conv = NULL, *cloneCnv = NULL;
UErrorCode status = U_ZERO_ERROR;
uint32_t len, len2;
- int32_t cloneLen;
UBool flagVal = FALSE;
UConverterFromUCallback junkCB;
conv = ucnv_open("utf-8", &status);
U_ASSERT(status);
- len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
+ len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status);
U_ASSERT(status);
printUChars("uch", uchars, len);
debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
#endif
- cloneLen = 1; /* but passing in null so it will clone */
- cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status);
+ cloneCnv = ucnv_safeClone(conv, NULL, NULL, &status);
U_ASSERT(status);
// grab another buffer's worth
while((!feof(f)) &&
- ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
+ ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
{
inbytes += count;
// Process the Unicode
// Todo: handle UTF-16/surrogates
- assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
- (size_t)(target-uBuf));
- total += (target-uBuf);
+ assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == (size_t)(target-uBuf));
+ total += static_cast<uint32_t>((target-uBuf));
} while (source < sourceLimit); // while simply out of space
}
// grab another buffer's worth
while((!feof(f)) &&
- ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
+ ((count=static_cast<int32_t>(fread(inBuf, sizeof(UChar), BUFFERSIZE , f))) > 0) )
{
inchars += count;
}
// Process the Unicode
- assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
- (size_t)(target-buf));
- total += (target-buf);
+ assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) == (size_t)(target-buf));
+ total += static_cast<uint32_t>((target-buf));
} while (source < sourceLimit); // while simply out of space
}
- printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
+ printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, static_cast<int>(inchars * sizeof(UChar)), total);
// ***************************** END SAMPLE ********************
ucnv_close(conv);
#define BUFFERSIZE 219
+void convsample_50() {
+ printf("\n\n==============================================\n"
+ "Sample 50: C: ucnv_detectUnicodeSignature\n");
+
+ //! [ucnv_detectUnicodeSignature]
+ UErrorCode err = U_ZERO_ERROR;
+ UBool discardSignature = TRUE; /* set to TRUE to throw away the initial U+FEFF */
+ char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
+ int32_t signatureLength = 0;
+ const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
+ UConverter *conv = NULL;
+ UChar output[100];
+ UChar *target = output, *out;
+ const char *source = input;
+ if(encoding!=NULL && U_SUCCESS(err)){
+ // should signature be discarded ?
+ conv = ucnv_open(encoding, &err);
+ // do the conversion
+ ucnv_toUnicode(conv,
+ &target, output + UPRV_LENGTHOF(output),
+ &source, input + sizeof(input),
+ NULL, TRUE, &err);
+ out = output;
+ if (discardSignature){
+ ++out; // ignore initial U+FEFF
+ }
+ while(out != target) {
+ printf("%04x ", *out++);
+ }
+ puts("");
+ }
+ //! [ucnv_detectUnicodeSignature]
+ puts("");
+}
+
+
/* main */
convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
+
+ convsample_50(); // C, detect unicode signature
printf("End of converter samples.\n");