X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/bocu1tst.c diff --git a/icuSources/test/cintltst/bocu1tst.c b/icuSources/test/cintltst/bocu1tst.c index 6f60b47c..a8edc180 100644 --- a/icuSources/test/cintltst/bocu1tst.c +++ b/icuSources/test/cintltst/bocu1tst.c @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * -* Copyright (C) 2002, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: bocu1tst.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -15,7 +17,7 @@ * * This is the reference implementation of BOCU-1, * the MIME-friendly form of the Binary Ordered Compression for Unicode, -* taken directly from ### http://oss.software.ibm.com/cvs/icu/icuhtml/design/conversion/bocu1/ +* taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/ * The files bocu1.h and bocu1.c from the design folder are taken * verbatim (minus copyright and #include) and copied together into this file. * The reference code and some of the reference bocu1tst.c @@ -31,11 +33,10 @@ #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/ucnv.h" +#include "unicode/utf16.h" #include "cmemory.h" #include "cintltst.h" -#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) - /* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */ /* BOCU-1 constants and macros ---------------------------------------------- */ @@ -66,7 +67,9 @@ #define BOCU1_MIN 0x21 #define BOCU1_MIDDLE 0x90 #define BOCU1_MAX_LEAD 0xfe -#define BOCU1_MAX_TRAIL 0xff + +/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */ +#define BOCU1_MAX_TRAIL 0xffL #define BOCU1_RESET 0xff /* number of lead bytes */ @@ -161,7 +164,7 @@ * to trail byte values 0..19 (0..0x13) as used in the difference calculation. * External byte values that are illegal as trail bytes are mapped to -1. */ -static int8_t +static const int8_t bocu1ByteToTrail[BOCU1_MIN]={ /* 0 1 2 3 4 5 6 7 */ -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, @@ -184,7 +187,7 @@ bocu1ByteToTrail[BOCU1_MIN]={ * from trail byte values 0..19 (0..0x13) as used in the difference calculation * to external byte values 0x00..0x20. */ -static int8_t +static const int8_t bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ /* 0 1 2 3 4 5 6 7 */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, @@ -248,7 +251,7 @@ decodeBocu1(Bocu1Rx *pRx, uint8_t b); * @param c current code point, 0..0x10ffff * @return "previous code point" state value */ -static U_INLINE int32_t +static int32_t bocu1Prev(int32_t c) { /* compute new prev */ if(0x3040<=c && c<=0x309f) { @@ -258,8 +261,8 @@ bocu1Prev(int32_t c) { /* CJK Unihan */ return 0x4e00-BOCU1_REACH_NEG_2; } else if(0xac00<=c && c<=0xd7a3) { - /* Korean Hangul */ - return (0xd7a3+0xac00)/2; + /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */ + return ((int32_t)0xd7a3+(int32_t)0xac00)/2; } else { /* mostly small scripts */ return (c&~0x7f)+BOCU1_ASCII_PREV; @@ -341,7 +344,7 @@ packDiff(int32_t diff) { } while(--count>0); /* add lead byte */ - result|=(lead+diff)<=0) { - UTF_APPEND_CHAR_UNSAFE(s, sLength, c); + U16_APPEND_UNSAFE(s, sLength, c); } } return sLength; } -static U_INLINE char +static char hexDigit(uint8_t digit) { return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); } @@ -869,50 +872,63 @@ TestBOCU1RefDiff(void) { /* cintltst code ------------------------------------------------------------ */ +static const int32_t DEFAULT_BUFFER_SIZE = 30000; + + /* test one string with the ICU and the reference BOCU-1 implementations */ static void roundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) { - static UChar roundtripRef[30000], roundtripICU[30000]; - static char bocu1Ref[30000], bocu1ICU[30000]; + UChar *roundtripRef, *roundtripICU; + char *bocu1Ref, *bocu1ICU; int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength; UErrorCode errorCode; + roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); + roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); + bocu1Ref = malloc(DEFAULT_BUFFER_SIZE); + bocu1ICU = malloc(DEFAULT_BUFFER_SIZE); + /* Unicode -> BOCU-1 */ bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref); errorCode=U_ZERO_ERROR; - bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, sizeof(bocu1ICU), text, length, &errorCode); + bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); - return; + goto cleanup; } if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) { log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength); - return; + goto cleanup; } /* BOCU-1 -> Unicode */ roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef); if(roundtripRefLength<0) { - return; /* readString() found an error and reported it */ + goto cleanup; /* readString() found an error and reported it */ } - roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, sizeof(roundtripICU)/U_SIZEOF_UCHAR, bocu1ICU, bocu1ICULength, &errorCode); + roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); - return; + goto cleanup; } if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) { log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength); - return; + goto cleanup; } if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) { log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength); - return; + goto cleanup; } +cleanup: + free(roundtripRef); + free(roundtripICU); + free(bocu1Ref); + free(bocu1ICU); } static const UChar feff[]={ 0xfeff }; @@ -935,21 +951,21 @@ static const struct { const UChar *s; int32_t length; } strings[]={ - { feff, LENGTHOF(feff) }, - { ascii, LENGTHOF(ascii) }, - { crlf, LENGTHOF(crlf) }, - { nul, LENGTHOF(nul) }, - { latin, LENGTHOF(latin) }, - { devanagari, LENGTHOF(devanagari) }, - { hiragana, LENGTHOF(hiragana) }, - { unihan, LENGTHOF(unihan) }, - { hangul, LENGTHOF(hangul) }, - { surrogates, LENGTHOF(surrogates) }, - { plane1, LENGTHOF(plane1) }, - { plane2, LENGTHOF(plane2) }, - { plane15, LENGTHOF(plane15) }, - { plane16, LENGTHOF(plane16) }, - { c0, LENGTHOF(c0) } + { feff, UPRV_LENGTHOF(feff) }, + { ascii, UPRV_LENGTHOF(ascii) }, + { crlf, UPRV_LENGTHOF(crlf) }, + { nul, UPRV_LENGTHOF(nul) }, + { latin, UPRV_LENGTHOF(latin) }, + { devanagari, UPRV_LENGTHOF(devanagari) }, + { hiragana, UPRV_LENGTHOF(hiragana) }, + { unihan, UPRV_LENGTHOF(unihan) }, + { hangul, UPRV_LENGTHOF(hangul) }, + { surrogates, UPRV_LENGTHOF(surrogates) }, + { plane1, UPRV_LENGTHOF(plane1) }, + { plane2, UPRV_LENGTHOF(plane2) }, + { plane15, UPRV_LENGTHOF(plane15) }, + { plane16, UPRV_LENGTHOF(plane16) }, + { c0, UPRV_LENGTHOF(c0) } }; /* @@ -960,7 +976,7 @@ static const struct { */ static void TestBOCU1(void) { - UChar text[30000]; + UChar *text; int32_t i, length; UConverter *bocu1; @@ -969,13 +985,15 @@ TestBOCU1(void) { errorCode=U_ZERO_ERROR; bocu1=ucnv_open("BOCU-1", &errorCode); if(U_FAILURE(errorCode)) { - log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode)); + log_data_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode)); return; } + text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); + /* text 1: each of strings[] once */ length=0; - for(i=0; i=LENGTHOF(strings)) { - i-=LENGTHOF(strings); + if(i>=UPRV_LENGTHOF(strings)) { + i-=UPRV_LENGTHOF(strings); } u_memcpy(text+length, strings[i].s, strings[i].length); length+=strings[i].length; @@ -1003,6 +1021,7 @@ TestBOCU1(void) { roundtripBOCU1(bocu1, 3, text, length); ucnv_close(bocu1); + free(text); } U_CFUNC void addBOCU1Tests(TestNode** root);