/*
******************************************************************************
*
-* Copyright (C) 2000-2008, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
/* bit flag for UConverter.options indicating GB 18030 special handling */
#define _MBCS_OPTION_GB18030 0x8000
+/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+#define _MBCS_OPTION_KEIS 0x01000
+#define _MBCS_OPTION_JEF 0x02000
+#define _MBCS_OPTION_JIPS 0x04000
+
+#define KEIS_SO_CHAR_1 0x0A
+#define KEIS_SO_CHAR_2 0x42
+#define KEIS_SI_CHAR_1 0x0A
+#define KEIS_SI_CHAR_2 0x41
+
+#define JEF_SO_CHAR 0x28
+#define JEF_SI_CHAR 0x29
+
+#define JIPS_SO_CHAR_1 0x1A
+#define JIPS_SO_CHAR_2 0x70
+#define JIPS_SI_CHAR_1 0x1A
+#define JIPS_SI_CHAR_2 0x71
+
+enum SISO_Option {
+ SI,
+ SO
+};
+typedef enum SISO_Option SISO_Option;
+
+static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
+ int32_t SISOLength = 0;
+
+ switch (option) {
+ case SI:
+ if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SI_CHAR_1;
+ value[1] = KEIS_SI_CHAR_2;
+ SISOLength = 2;
+ } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SI_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SI_CHAR_1;
+ value[1] = JIPS_SI_CHAR_2;
+ SISOLength = 2;
+ } else {
+ value[0] = UCNV_SI;
+ SISOLength = 1;
+ }
+ break;
+ case SO:
+ if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SO_CHAR_1;
+ value[1] = KEIS_SO_CHAR_2;
+ SISOLength = 2;
+ } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SO_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SO_CHAR_1;
+ value[1] = JIPS_SO_CHAR_2;
+ SISOLength = 2;
+ } else {
+ value[0] = UCNV_SO;
+ SISOLength = 1;
+ }
+ break;
+ default:
+ /* Should never happen. */
+ break;
+ }
+
+ return SISOLength;
+}
+
/* Miscellaneous ------------------------------------------------------------ */
/**
/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
args.size=sizeof(UConverterLoadArgs);
args.nestedLoads=2;
+ args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
args.reserved=pArgs->reserved;
args.options=pArgs->options;
args.pkg=pArgs->pkg;
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
+ if(pArgs->onlyTestIsLoadable) {
+ /*
+ * Exit as soon as we know that we can load the converter
+ * and the format is valid and supported.
+ * The worst that can happen in the following code is a memory
+ * allocation error.
+ */
+ ucnv_unload(baseSharedData);
+ return;
+ }
/* copy the base table data */
uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
+ if(pArgs->onlyTestIsLoadable) {
+ /*
+ * Exit as soon as we know that we can load the converter
+ * and the format is valid and supported.
+ * The worst that can happen in the following code is a memory
+ * allocation error.
+ */
+ return;
+ }
mbcsTable->countStates=(uint8_t)header->countStates;
mbcsTable->countToUFallbacks=header->countToUFallbacks;
static void
ucnv_MBCSOpen(UConverter *cnv,
- const char *name,
- const char *locale,
- uint32_t options,
- UErrorCode *pErrorCode) {
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
UConverterMBCSTable *mbcsTable;
const int32_t *extIndexes;
uint8_t outputType;
int8_t maxBytesPerUChar;
+ if(pArgs->onlyTestIsLoadable) {
+ return;
+ }
+
mbcsTable=&cnv->sharedData->mbcs;
outputType=mbcsTable->outputType;
if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* the swaplfnl option does not apply, remove it */
- cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+ cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
}
- if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
/* do this because double-checked locking is broken */
UBool isCached;
}
/* the option does not apply, remove it */
- cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+ cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
}
}
}
- if(uprv_strstr(name, "18030")!=NULL) {
- if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
+ if(uprv_strstr(pArgs->name, "18030")!=NULL) {
+ if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
/* set a flag for GB 18030 mode, which changes the callback behavior */
cnv->options|=_MBCS_OPTION_GB18030;
}
+ } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
+ /* set a flag for KEIS converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_KEIS;
+ } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
+ /* set a flag for JEF converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_JEF;
+ } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
+ /* set a flag for JIPS converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_JIPS;
}
/* fix maxBytesPerUChar depending on outputType and options etc. */
#endif
/* conversion loop */
- while(targetCapacity>0) {
+ while(targetCapacity > 0 && source < sourceLimit) {
entry=stateTable[0][*source++];
/* MBCS_ENTRY_IS_FINAL(entry) */
uint32_t stage2Entry;
uint32_t asciiRoundtrips;
uint32_t value;
- int32_t length, prevLength;
+ uint8_t si_value[2] = {0, 0};
+ uint8_t so_value[2] = {0, 0};
+ uint8_t si_value_length, so_value_length;
+ int32_t length = 0, prevLength;
uint8_t unicodeMask;
cnv=pArgs->converter;
sourceIndex= c==0 ? 0 : -1;
nextSourceIndex=0;
+ /* Get the SI/SO character for the converter */
+ si_value_length = getSISOBytes(SI, cnv->options, si_value);
+ so_value_length = getSISOBytes(SO, cnv->options, so_value);
+
/* conversion loop */
/*
* This is another piece of ugly code:
length=1;
} else {
/* change from double-byte mode to single-byte */
- value|=(uint32_t)UCNV_SI<<8;
- length=2;
+ if (si_value_length == 1) {
+ value|=(uint32_t)si_value[0]<<8;
+ length = 2;
+ } else if (si_value_length == 2) {
+ value|=(uint32_t)si_value[1]<<8;
+ value|=(uint32_t)si_value[0]<<16;
+ length = 3;
+ }
prevLength=1;
}
} else {
length=2;
} else {
/* change from single-byte mode to double-byte */
- value|=(uint32_t)UCNV_SO<<16;
- length=3;
+ if (so_value_length == 1) {
+ value|=(uint32_t)so_value[0]<<16;
+ length = 3;
+ } else if (so_value_length == 2) {
+ value|=(uint32_t)so_value[1]<<16;
+ value|=(uint32_t)so_value[0]<<24;
+ length = 4;
+ }
prevLength=2;
}
}
length=1;
} else {
/* change from double-byte mode to single-byte */
- value|=(uint32_t)UCNV_SI<<8;
- length=2;
+ if (si_value_length == 1) {
+ value|=(uint32_t)si_value[0]<<8;
+ length = 2;
+ } else if (si_value_length == 2) {
+ value|=(uint32_t)si_value[1]<<8;
+ value|=(uint32_t)si_value[0]<<16;
+ length = 3;
+ }
prevLength=1;
}
} else {
length=2;
} else {
/* change from single-byte mode to double-byte */
- value|=(uint32_t)UCNV_SO<<16;
- length=3;
+ if (so_value_length == 1) {
+ value|=(uint32_t)so_value[0]<<16;
+ length = 3;
+ } else if (so_value_length == 2) {
+ value|=(uint32_t)so_value[1]<<16;
+ value|=(uint32_t)so_value[0]<<24;
+ length = 4;
+ }
prevLength=2;
}
}
) {
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
if(targetCapacity>0) {
- *target++=(uint8_t)UCNV_SI;
+ *target++=(uint8_t)si_value[0];
+ if (si_value_length == 2) {
+ if (targetCapacity<2) {
+ cnv->charErrorBuffer[0]=(uint8_t)si_value[1];
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ *target++=(uint8_t)si_value[1];
+ }
+ }
if(offsets!=NULL) {
/* set the last source character's index (sourceIndex points at sourceLimit now) */
*offsets++=prevSourceIndex;
}
} else {
/* target is full */
- cnv->charErrorBuffer[0]=(char)UCNV_SI;
- cnv->charErrorBufferLength=1;
+ cnv->charErrorBuffer[0]=(uint8_t)si_value[0];
+ if (si_value_length == 2) {
+ cnv->charErrorBuffer[1]=(uint8_t)si_value[1];
+ }
+ cnv->charErrorBufferLength=si_value_length;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
prevLength=1; /* we switched into SBCS */
c=b;
moreBytes:
while(toULength<toULimit) {
- if(source<sourceLimit) {
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * Here we need to use the real limit in case we have two truncated
+ * sequences at the end.
+ * See ticket #7492.
+ */
+ if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source;
if(U8_IS_TRAIL(b)) {
++source;
c=b;
moreBytes:
while(toULength<toULimit) {
- if(source<sourceLimit) {
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * Here we need to use the real limit in case we have two truncated
+ * sequences at the end.
+ * See ticket #7492.
+ */
+ if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source;
if(U8_IS_TRAIL(b)) {
++source;