/*
**********************************************************************
-* Copyright (C) 2000-2009, International Business Machines
+* Copyright (C) 2000-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvisci.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
#include "cmemory.h"
#include "ucnv_bld.h"
-#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/uset.h"
#include "cstring.h"
+#include "uassert.h"
#define UCNV_OPTIONS_VERSION_MASK 0xf
#define NUKTA 0x093c
#define PNJ_HA 0x0A39
#define PNJ_RRA 0x0A5C
-static USet* PNJ_BINDI_TIPPI_SET= NULL;
-static USet* PNJ_CONSONANT_SET= NULL;
-
typedef enum {
DEVANAGARI =0,
BENGALI,
{ MALAYALAM, MLM_MASK, MLM }
};
-static void initializeSets() {
- /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
- PNJ_CONSONANT_SET = uset_open(0,0);
- uset_clear(PNJ_CONSONANT_SET);
-
- uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
-
- PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
- uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
-
- uset_compact(PNJ_CONSONANT_SET);
- uset_compact(PNJ_BINDI_TIPPI_SET);
+/*
+ * For special handling of certain Gurmukhi characters.
+ * Bit 0 (value 1): PNJ consonant
+ * Bit 1 (value 2): PNJ Bindi Tippi
+ */
+static const uint8_t pnjMap[80] = {
+ /* 0A00..0A0F */
+ 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0A10..0A1F */
+ 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* 0A20..0A2F */
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
+ /* 0A30..0A3F */
+ 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
+ /* 0A40..0A4F */
+ 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static UBool
+isPNJConsonant(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] & 1);
+ }
+}
+
+static UBool
+isPNJBindiTippi(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] >> 1);
+ }
}
static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
return;
}
- /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
- initializeSets();
-
cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
if (cnv->extraInfo != NULL) {
}
cnv->extraInfo=NULL;
}
- if (PNJ_CONSONANT_SET != NULL) {
- uset_close(PNJ_CONSONANT_SET);
- PNJ_CONSONANT_SET = NULL;
- }
- if (PNJ_BINDI_TIPPI_SET != NULL) {
- uset_close(PNJ_BINDI_TIPPI_SET);
- PNJ_BINDI_TIPPI_SET = NULL;
- }
}
static const char* _ISCIIgetName(const UConverter* cnv) {
converterData->contextCharFromUnicode = 0x00;
break;
}
- if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
/* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
/* reset context char */
converterData->contextCharFromUnicode = 0x0000;
} else {
/* oops.. the code point is unassigned */
/*check if the char is a First surrogate*/
- if (UTF_IS_SURROGATE(sourceChar)) {
- if (UTF_IS_SURROGATE_FIRST(sourceChar)) {
+ if (U16_IS_SURROGATE(sourceChar)) {
+ if (U16_IS_SURROGATE_LEAD(sourceChar)) {
getTrail:
/*look ahead to find the trail surrogate*/
if (source < sourceLimit) {
/* test the following code unit */
UChar trail= (*source);
- if (UTF_IS_SECOND_SURROGATE(trail)) {
+ if (U16_IS_TRAIL(trail)) {
source++;
- sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
*err =U_INVALID_CHAR_FOUND;
/* convert this surrogate code point */
/* exit this condition tree */
targetUniChar = toUnicodeTable[(sourceChar)] ; \
/* is the code point valid in current script? */ \
if(sourceChar> ASCII_END && \
- (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){ \
+ (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
/* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
targetUniChar!=VOCALLIC_RR){ \
/* look at the pre-context and perform special processing */
switch (sourceChar) {
case ISCII_INV:
- case EXT: /*falls through*/
+ case EXT:
case ATR:
*contextCharToUnicode = (UChar)sourceChar;
}
break;
case 0x0A:
- /* fall through */
case 0x0D:
data->resetToDefaultToUnicode = TRUE;
GET_MAPPING(sourceChar,targetUniChar,data)
i=1;
found=FALSE;
for (; i<vowelSignESpecialCases[0][0]; i++) {
+ U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
targetUniChar=vowelSignESpecialCases[i][1];
found=TRUE;
/* else fall through to default */
}
/* else fall through to default */
+ U_FALLTHROUGH;
}
default:GET_MAPPING(sourceChar,targetUniChar,data)
;
if (*toUnicodeStatus != missingCharMarker) {
/* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
- if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
+ if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
(*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
/* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
offset = (int)(source-args->source - 3);
/* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
*/
- if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+ if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
targetUniChar = PNJ_TIPPI - PNJ_DELTA;
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
- } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
/* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
} else {
};
-const UConverterSharedData _ISCIIData={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_ISCIIStaticData,
- FALSE,
- &_ISCIIImpl,
- 0
-};
+const UConverterSharedData _ISCIIData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */