+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
-* Copyright (C) 2000-2012, International Business Machines
+* Copyright (C) 2000-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv2022.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "cmemory.h"
#include "uassert.h"
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
#ifdef U_ENABLE_GENERIC_ISO_2022
/*
* I am disabling the generic ISO-2022 converter after proposing to do so on
*/
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const char SHIFT_IN_STR[] = "\x0F";
// static const char SHIFT_OUT_STR[] = "\x0E";
+#endif
#define CR 0x0D
#define LF 0x0A
} StateEnum;
/* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_ONLY_HTML_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif
#define CSM(cs) ((uint16_t)1<<(cs))
* all versions, not just JIS7 and JIS8.
* - ICU does not distinguish between different versions of JIS X 0208.
*/
+#if UCONFIG_ONLY_HTML_CONVERSION
+enum { MAX_JA_VERSION=0 };
+#else
enum { MAX_JA_VERSION=4 };
+#endif
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
+#if !UCONFIG_ONLY_HTML_CONVERSION
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
+#endif
};
typedef enum {
/* ISO-2022 ----------------------------------------------------------------- */
/*Forward declaration */
-U_CFUNC void
+U_CFUNC void U_CALLCONV
ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
UErrorCode * err);
-U_CFUNC void
+U_CFUNC void U_CALLCONV
ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
UErrorCode * err);
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
};
-
/* Type def for refactoring changeState_2022 code*/
typedef enum{
#ifdef U_ENABLE_GENERIC_ISO_2022
ISO_2022=0,
#endif
ISO_2022_JP=1,
+#if !UCONFIG_ONLY_HTML_CONVERSION
ISO_2022_KR=2,
ISO_2022_CN=3
+#endif
} Variant2022;
/*********** ISO 2022 Converter Protos ***********/
-static void
+static void U_CALLCONV
_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
-static void
+static void U_CALLCONV
_ISO2022Close(UConverter *converter);
-static void
+static void U_CALLCONV
_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
-static const char*
+U_CDECL_BEGIN
+static const char * U_CALLCONV
_ISO2022getName(const UConverter* cnv);
+U_CDECL_END
-static void
+static void U_CALLCONV
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
-static UConverter *
+U_CDECL_BEGIN
+static UConverter * U_CALLCONV
_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
+U_CDECL_END
+
#ifdef U_ENABLE_GENERIC_ISO_2022
-static void
+static void U_CALLCONV
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
#endif
/*const UConverterSharedData _ISO2022Data;*/
extern const UConverterSharedData _ISO2022JPData;
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
extern const UConverterSharedData _ISO2022KRData;
extern const UConverterSharedData _ISO2022CNData;
+#endif
} // namespace
}
}
-static void
+static void U_CALLCONV
_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
char myLocale[6]={' ',' ',' ',' ',' ',' '};
if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
- size_t len=0;
/* open the required converters and cache them */
if(version>MAX_JA_VERSION) {
- /* prevent indexing beyond jpCharsetMasks[] */
- myConverterData->version = version = 0;
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
}
if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
myConverterData->myConverterArray[ISO8859_7] =
uprv_strcpy(myConverterData->locale,"ja");
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
- len = uprv_strlen(myConverterData->name);
+ size_t len = uprv_strlen(myConverterData->name);
myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
myConverterData->name[len+1]='\0';
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
+ if(version>1) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
const char *cnvName;
if(version==1) {
cnvName="icu-internal-25546";
else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
+ if(version>2) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
/* open the required converters and cache them */
myConverterData->myConverterArray[GB2312_1] =
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
}
}
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
else{
#ifdef U_ENABLE_GENERIC_ISO_2022
myConverterData->isFirstBuffer = TRUE;
/* initialize the state variables */
uprv_strcpy(myConverterData->name,"ISO_2022");
#else
- *errorCode = U_UNSUPPORTED_ERROR;
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
+ // data loading error code.
return;
#endif
}
}
-static void
+static void U_CALLCONV
_ISO2022Close(UConverter *converter) {
UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
UConverterSharedData **array = myData->myConverterArray;
}
}
-static void
+static void U_CALLCONV
_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
if(choice<=UCNV_RESET_TO_UNICODE) {
}
}
-static const char*
+U_CDECL_BEGIN
+
+static const char * U_CALLCONV
_ISO2022getName(const UConverter* cnv){
if(cnv->extraInfo){
UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
return NULL;
}
+U_CDECL_END
+
/*************** to unicode *******************/
/****************************************************************************
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*************** to unicode *******************/
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
/* 0 1 2 3 4 5 6 7 8 9 */
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#endif
static UCNV_TableStates_2022
while (hi != low) /*binary search*/{
- register int32_t mid = (hi+low) >> 1; /*Finds median*/
+ int32_t mid = (hi+low) >> 1; /*Finds median*/
if (mid == oldmid)
break;
}
}
break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
case ISO_2022_CN:
{
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
break;
}
- /*fall through*/
+ U_FALLTHROUGH;
case GB2312_1:
- /*fall through*/
+ U_FALLTHROUGH;
case CNS_11643_1:
myData2022->toU2022State.cs[1]=(int8_t)tempState;
break;
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
}
break;
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
default:
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
}
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*Checks the characters of the buffer against valid 2022 escape sequences
*if the match we return a pointer to the initial start of the sequence otherwise
*we return sourceLimit
return mySource;
#endif
}
-
+#endif
/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
* any future change in _MBCSFromUChar32() function should be reflected here.
*
*/
-static void
+static void U_CALLCONV
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
UErrorCode* err){
const char* mySourceLimit, *realSourceLimit;
ASCII,
JISX201,
ISO8859_1,
- ISO8859_7,
JISX208,
+ ISO8859_7,
JISX212,
GB2312,
KSC5601,
0x212C /* U+FF9F */
};
-static void
+static void U_CALLCONV
UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
UConverter *cnv = args->converter;
UConverterDataISO2022 *converterData;
}
/* try all the other possible charsets */
- for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
+ for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
cs = (int8_t)jpCharsetPref[i];
if(CSM(cs) & csm) {
choices[choiceCount++] = cs;
/*************** to unicode *******************/
-static void
+static void U_CALLCONV
UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
UErrorCode* err){
char tempBuf[2];
/* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
case CR:
- /*falls through*/
case LF:
/* automatically reset to single-byte mode */
if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
}
pToU2022State->cs[2] = 0;
pToU2022State->g = 0;
- /* falls through */
+ U_FALLTHROUGH;
default:
/* convert one or two bytes */
myData->isEmptySegment = FALSE;
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
* ii) There are only 2 shifting sequences SO to shift into double byte mode
* and SI to shift into single byte mode
*/
-static void
+static void U_CALLCONV
UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
UConverter* saveConv = args->converter;
args->converter=saveConv;
}
-static void
+static void U_CALLCONV
UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
const UChar *source = args->source;
/************************ To Unicode ***************************************/
-static void
+static void U_CALLCONV
UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
UErrorCode* err){
char const* sourceStart;
}
}
-static void
+static void U_CALLCONV
UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
UErrorCode* err){
char tempBuf[2];
CNS_11643_1992_Plane_7_STR
};
-static void
+static void U_CALLCONV
UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
UConverter *cnv = args->converter;
UConverterDataISO2022 *converterData;
}
-static void
+static void U_CALLCONV
UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
UErrorCode* err){
char tempBuf[3];
/* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
case CR:
- /*falls through*/
case LF:
uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
- /* falls through */
+ U_FALLTHROUGH;
default:
/* convert one or two bytes */
myData->isEmptySegment = FALSE;
args->target = myTarget;
args->source = mySource;
}
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
-static void
+static void U_CALLCONV
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
UConverter *cnv = args->converter;
UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
};
-static UConverter *
+U_CDECL_BEGIN
+
+static UConverter * U_CALLCONV
_ISO_2022_SafeClone(
const UConverter *cnv,
void *stackBuffer,
return &localClone->cnv;
}
-static void
+U_CDECL_END
+
+static void U_CALLCONV
_ISO_2022_GetUnicodeSet(const UConverter *cnv,
const USetAdder *sa,
UConverterUnicodeSet which,
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
}
break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
case 'c':
case 'z':
/* include ASCII for CN */
cnvData->currentConverter, sa, which, pErrorCode);
/* the loop over myConverterArray[] will simply not find another converter */
break;
+#endif
default:
break;
}
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
UConverterSetFilter filter;
if(cnvData->myConverterArray[i]!=NULL) {
- if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
- cnvData->version==0 && i==CNS_11643
- ) {
+ if(cnvData->locale[0]=='j' && i==JISX208) {
+ /*
+ * Only add code points that map to Shift-JIS codes
+ * corresponding to JIS X 0208.
+ */
+ filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+ cnvData->version==0 && i==CNS_11643) {
/*
* Version-specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* The two versions create different Unicode sets.
*/
filter=UCNV_SET_FILTER_2022_CN;
- } else if(cnvData->locale[0]=='j' && i==JISX208) {
- /*
- * Only add code points that map to Shift-JIS codes
- * corresponding to JIS X 0208.
- */
- filter=UCNV_SET_FILTER_SJIS;
} else if(i==KSC5601) {
/*
* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
* are broader than GR94.
*/
filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
} else {
filter=UCNV_SET_FILTER_NONE;
}
0,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};
-const UConverterSharedData _ISO2022Data={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_ISO2022StaticData,
- FALSE,
- &_ISO2022Impl,
- 0, UCNV_MBCS_TABLE_INITIALIZER
-};
+const UConverterSharedData _ISO2022Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
/*************JP****************/
static const UConverterImpl _ISO2022JPImpl={
namespace {
-const UConverterSharedData _ISO2022JPData={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_ISO2022JPStaticData,
- FALSE,
- &_ISO2022JPImpl,
- 0, UCNV_MBCS_TABLE_INITIALIZER
-};
+const UConverterSharedData _ISO2022JPData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
} // namespace
+#if !UCONFIG_ONLY_HTML_CONVERSION
/************* KR ***************/
static const UConverterImpl _ISO2022KRImpl={
UCNV_ISO_2022,
UCNV_IBM,
UCNV_ISO_2022,
1,
- 3, /* max 3 bytes per UChar: SO+DBCS */
+ 8, /* max 8 bytes per UChar */
{ 0x1a, 0, 0, 0 },
1,
FALSE,
namespace {
-const UConverterSharedData _ISO2022KRData={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_ISO2022KRStaticData,
- FALSE,
- &_ISO2022KRImpl,
- 0, UCNV_MBCS_TABLE_INITIALIZER
-};
+const UConverterSharedData _ISO2022KRData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
} // namespace
namespace {
-const UConverterSharedData _ISO2022CNData={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_ISO2022CNStaticData,
- FALSE,
- &_ISO2022CNImpl,
- 0, UCNV_MBCS_TABLE_INITIALIZER
-};
+const UConverterSharedData _ISO2022CNData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
} // namespace
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */