/*
******************************************************************************
*
-* Copyright (C) 2002, International Business Machines
+* Copyright (C) 2002-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* This is the reference implementation of BOCU-1,
* the MIME-friendly form of the Binary Ordered Compression for Unicode,
-* taken directly from ### http://oss.software.ibm.com/cvs/icu/icuhtml/design/conversion/bocu1/
+* taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/
* The files bocu1.h and bocu1.c from the design folder are taken
* verbatim (minus copyright and #include) and copied together into this file.
* The reference code and some of the reference bocu1tst.c
#define BOCU1_MIN 0x21
#define BOCU1_MIDDLE 0x90
#define BOCU1_MAX_LEAD 0xfe
-#define BOCU1_MAX_TRAIL 0xff
+
+/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */
+#define BOCU1_MAX_TRAIL 0xffL
#define BOCU1_RESET 0xff
/* number of lead bytes */
* to trail byte values 0..19 (0..0x13) as used in the difference calculation.
* External byte values that are illegal as trail bytes are mapped to -1.
*/
-static int8_t
+static const int8_t
bocu1ByteToTrail[BOCU1_MIN]={
/* 0 1 2 3 4 5 6 7 */
-1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
* from trail byte values 0..19 (0..0x13) as used in the difference calculation
* to external byte values 0x00..0x20.
*/
-static int8_t
+static const int8_t
bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
/* 0 1 2 3 4 5 6 7 */
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
/* CJK Unihan */
return 0x4e00-BOCU1_REACH_NEG_2;
} else if(0xac00<=c && c<=0xd7a3) {
- /* Korean Hangul */
- return (0xd7a3+0xac00)/2;
+ /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */
+ return ((int32_t)0xd7a3+(int32_t)0xac00)/2;
} else {
/* mostly small scripts */
return (c&~0x7f)+BOCU1_ASCII_PREV;
UTF_NEXT_CHAR(s, i, length, c);
p+=writePacked(encodeBocu1(&prev, c), p);
}
- return p-p0;
+ return (int32_t)(p-p0);
}
/**
/* cintltst code ------------------------------------------------------------ */
+static const int32_t DEFAULT_BUFFER_SIZE = 30000;
+
+
/* test one string with the ICU and the reference BOCU-1 implementations */
static void
roundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) {
- static UChar roundtripRef[30000], roundtripICU[30000];
- static char bocu1Ref[30000], bocu1ICU[30000];
+ UChar *roundtripRef, *roundtripICU;
+ char *bocu1Ref, *bocu1ICU;
int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength;
UErrorCode errorCode;
+ roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
+ roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
+ bocu1Ref = malloc(DEFAULT_BUFFER_SIZE);
+ bocu1ICU = malloc(DEFAULT_BUFFER_SIZE);
+
/* Unicode -> BOCU-1 */
bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref);
errorCode=U_ZERO_ERROR;
- bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, sizeof(bocu1ICU), text, length, &errorCode);
+ bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
return;
/* BOCU-1 -> Unicode */
roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef);
if(roundtripRefLength<0) {
+ free(roundtripICU);
return; /* readString() found an error and reported it */
}
- roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, sizeof(roundtripICU)/U_SIZEOF_UCHAR, bocu1ICU, bocu1ICULength, &errorCode);
+ roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
return;
log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength);
return;
}
+ free(roundtripRef);
+ free(roundtripICU);
+ free(bocu1Ref);
+ free(bocu1ICU);
}
static const UChar feff[]={ 0xfeff };
*/
static void
TestBOCU1(void) {
- UChar text[30000];
+ UChar *text;
int32_t i, length;
UConverter *bocu1;
return;
}
+ text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
+
/* text 1: each of strings[] once */
length=0;
for(i=0; i<LENGTHOF(strings); ++i) {
roundtripBOCU1(bocu1, 3, text, length);
ucnv_close(bocu1);
+ free(text);
}
U_CFUNC void addBOCU1Tests(TestNode** root);