icuSources/common/ucnvisci.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 2000-2016, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   file name:  ucnvisci.c
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2001JUN26
  14 *   created by: Ram Viswanadha
  15 *
  16 *   Date        Name        Description
  17 *   24/7/2001   Ram         Added support for EXT character handling
  18 */
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  23
  24 #include "unicode/ucnv.h"
  25 #include "unicode/ucnv_cb.h"
  26 #include "unicode/utf16.h"
  27 #include "cmemory.h"
  28 #include "ucnv_bld.h"
  29 #include "ucnv_cnv.h"
  30 #include "cstring.h"
  31 #include "uassert.h"
  32
  33 #define UCNV_OPTIONS_VERSION_MASK 0xf
  34 #define NUKTA               0x093c
  35 #define HALANT              0x094d
  36 #define ZWNJ                0x200c /* Zero Width Non Joiner */
  37 #define ZWJ                 0x200d /* Zero width Joiner */
  38 #define INVALID_CHAR        0xffff
  39 #define ATR                 0xEF   /* Attribute code */
  40 #define EXT                 0xF0   /* Extension code */
  41 #define DANDA               0x0964
  42 #define DOUBLE_DANDA        0x0965
  43 #define ISCII_NUKTA         0xE9
  44 #define ISCII_HALANT        0xE8
  45 #define ISCII_DANDA         0xEA
  46 #define ISCII_INV           0xD9
  47 #define ISCII_VOWEL_SIGN_E  0xE0
  48 #define INDIC_BLOCK_BEGIN   0x0900
  49 #define INDIC_BLOCK_END     0x0D7F
  50 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
  51 #define VOCALLIC_RR         0x0931
  52 #define LF                  0x0A
  53 #define ASCII_END           0xA0
  54 #define NO_CHAR_MARKER      0xFFFE
  55 #define TELUGU_DELTA        DELTA * TELUGU
  56 #define DEV_ABBR_SIGN       0x0970
  57 #define DEV_ANUDATTA        0x0952
  58 #define EXT_RANGE_BEGIN     0xA1
  59 #define EXT_RANGE_END       0xEE
  60
  61 #define PNJ_DELTA           0x0100
  62 #define PNJ_BINDI           0x0A02
  63 #define PNJ_TIPPI           0x0A70
  64 #define PNJ_SIGN_VIRAMA     0x0A4D
  65 #define PNJ_ADHAK           0x0A71
  66 #define PNJ_HA              0x0A39
  67 #define PNJ_RRA             0x0A5C
  68
  69 typedef enum {
  70     DEVANAGARI =0,
  71     BENGALI,
  72     GURMUKHI,
  73     GUJARATI,
  74     ORIYA,
  75     TAMIL,
  76     TELUGU,
  77     KANNADA,
  78     MALAYALAM,
  79     DELTA=0x80
  80 }UniLang;
  81
  82 /**
  83  * Enumeration for switching code pages if <ATR>+<one of below values>
  84  * is encountered
  85  */
  86 typedef enum {
  87     DEF = 0x40,
  88     RMN = 0x41,
  89     DEV = 0x42,
  90     BNG = 0x43,
  91     TML = 0x44,
  92     TLG = 0x45,
  93     ASM = 0x46,
  94     ORI = 0x47,
  95     KND = 0x48,
  96     MLM = 0x49,
  97     GJR = 0x4A,
  98     PNJ = 0x4B,
  99     ARB = 0x71,
 100     PES = 0x72,
 101     URD = 0x73,
 102     SND = 0x74,
 103     KSM = 0x75,
 104     PST = 0x76
 105 }ISCIILang;
 106
 107 typedef enum {
 108     DEV_MASK =0x80,
 109     PNJ_MASK =0x40,
 110     GJR_MASK =0x20,
 111     ORI_MASK =0x10,
 112     BNG_MASK =0x08,
 113     KND_MASK =0x04,
 114     MLM_MASK =0x02,
 115     TML_MASK =0x01,
 116     ZERO =0x00
 117 }MaskEnum;
 118
 119 #define ISCII_CNV_PREFIX "ISCII,version="
 120
 121 typedef struct {
 122     UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
 123     UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
 124     uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
 125     uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
 126     uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
 127     MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
 128     MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
 129     MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
 130     UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
 131     UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
 132     char name[sizeof(ISCII_CNV_PREFIX) + 1];
 133     UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
 134 } UConverterDataISCII;
 135
 136 typedef struct LookupDataStruct {
 137     UniLang uniLang;
 138     MaskEnum maskEnum;
 139     ISCIILang isciiLang;
 140 } LookupDataStruct;
 141
 142 static const LookupDataStruct lookupInitialData[]={
 143     { DEVANAGARI, DEV_MASK,  DEV },
 144     { BENGALI,    BNG_MASK,  BNG },
 145     { GURMUKHI,   PNJ_MASK,  PNJ },
 146     { GUJARATI,   GJR_MASK,  GJR },
 147     { ORIYA,      ORI_MASK,  ORI },
 148     { TAMIL,      TML_MASK,  TML },
 149     { TELUGU,     KND_MASK,  TLG },
 150     { KANNADA,    KND_MASK,  KND },
 151     { MALAYALAM,  MLM_MASK,  MLM }
 152 };
 153
 154 /*
 155  * For special handling of certain Gurmukhi characters.
 156  * Bit 0 (value 1): PNJ consonant
 157  * Bit 1 (value 2): PNJ Bindi Tippi
 158  */
 159 static const uint8_t pnjMap[80] = {
 160     /* 0A00..0A0F */
 161     0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
 162     /* 0A10..0A1F */
 163     0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
 164     /* 0A20..0A2F */
 165     3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
 166     /* 0A30..0A3F */
 167     3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
 168     /* 0A40..0A4F */
 169     0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
 170 };
 171
 172 static UBool
 173 isPNJConsonant(UChar32 c) {
 174     if (c < 0xa00 || 0xa50 <= c) {
 175         return FALSE;
 176     } else {
 177         return (UBool)(pnjMap[c - 0xa00] & 1);
 178     }
 179 }
 180
 181 static UBool
 182 isPNJBindiTippi(UChar32 c) {
 183     if (c < 0xa00 || 0xa50 <= c) {
 184         return FALSE;
 185     } else {
 186         return (UBool)(pnjMap[c - 0xa00] >> 1);
 187     }
 188 }
 189 U_CDECL_BEGIN
 190 static void  U_CALLCONV
 191 _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
 192     if(pArgs->onlyTestIsLoadable) {
 193         return;
 194     }
 195
 196     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
 197
 198     if (cnv->extraInfo != NULL) {
 199         int32_t len=0;
 200         UConverterDataISCII *converterData=
 201                 (UConverterDataISCII *) cnv->extraInfo;
 202         converterData->contextCharToUnicode=NO_CHAR_MARKER;
 203         cnv->toUnicodeStatus = missingCharMarker;
 204         converterData->contextCharFromUnicode=0x0000;
 205         converterData->resetToDefaultToUnicode=FALSE;
 206         /* check if the version requested is supported */
 207         if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
 208             /* initialize state variables */
 209             converterData->currentDeltaFromUnicode
 210                     = converterData->currentDeltaToUnicode
 211                             = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
 212
 213             converterData->currentMaskFromUnicode
 214                     = converterData->currentMaskToUnicode
 215                             = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
 216
 217             converterData->isFirstBuffer=TRUE;
 218             (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
 219             len = (int32_t)uprv_strlen(converterData->name);
 220             converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
 221             converterData->name[len+1]=0;
 222
 223             converterData->prevToUnicodeStatus = 0x0000;
 224         } else {
 225             uprv_free(cnv->extraInfo);
 226             cnv->extraInfo = NULL;
 227             *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 228         }
 229
 230     } else {
 231         *errorCode =U_MEMORY_ALLOCATION_ERROR;
 232     }
 233 }
 234
 235 static void U_CALLCONV
 236 _ISCIIClose(UConverter *cnv) {
 237     if (cnv->extraInfo!=NULL) {
 238         if (!cnv->isExtraLocal) {
 239             uprv_free(cnv->extraInfo);
 240         }
 241         cnv->extraInfo=NULL;
 242     }
 243 }
 244
 245 static const char*  U_CALLCONV
 246 _ISCIIgetName(const UConverter* cnv) {
 247     if (cnv->extraInfo) {
 248         UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
 249         return myData->name;
 250     }
 251     return NULL;
 252 }
 253
 254 static void U_CALLCONV
 255 _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
 256     UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
 257     if (choice<=UCNV_RESET_TO_UNICODE) {
 258         cnv->toUnicodeStatus = missingCharMarker;
 259         cnv->mode=0;
 260         data->currentDeltaToUnicode=data->defDeltaToUnicode;
 261         data->currentMaskToUnicode = data->defMaskToUnicode;
 262         data->contextCharToUnicode=NO_CHAR_MARKER;
 263         data->prevToUnicodeStatus = 0x0000;
 264     }
 265     if (choice!=UCNV_RESET_TO_UNICODE) {
 266         cnv->fromUChar32=0x0000;
 267         data->contextCharFromUnicode=0x00;
 268         data->currentMaskFromUnicode=data->defMaskToUnicode;
 269         data->currentDeltaFromUnicode=data->defDeltaToUnicode;
 270         data->isFirstBuffer=TRUE;
 271         data->resetToDefaultToUnicode=FALSE;
 272     }
 273 }
 274
 275 /**
 276  * The values in validity table are indexed by the lower bits of Unicode
 277  * range 0x0900 - 0x09ff. The values have a structure like:
 278  *       ---------------------------------------------------------------
 279  *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
 280  *      |       |       |       |       | ASM   | KND   |       |       |
 281  *       ---------------------------------------------------------------
 282  * If a code point is valid in a particular script
 283  * then that bit is turned on
 284  *
 285  * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
 286  * to represent these languages
 287  *
 288  * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
 289  * and combine and use 1 bit to represent these languages.
 290  *
 291  * TODO: It is probably easier to understand and maintain to change this
 292  * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
 293  */
 294
 295 static const uint8_t validityTable[128] = {
 296 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
 297 /* Note: This table was edited to mirror the Windows XP implementation */
 298 /*ISCII:Valid:Unicode */
 299 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 300 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 301 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 302 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 303 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 304 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 305 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 306 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 307 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 308 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 309 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 310 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 311 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 312 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 313 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 314 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 315 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 316 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 317 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 318 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 319 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 320 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 321 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 322 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 323 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 324 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 325 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 326 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 327 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 328 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 329 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 330 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 331 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 332 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 333 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 334 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 335 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 336 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 337 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 338 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 339 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 340 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
 341 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 342 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 343 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 344 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 345 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 346 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 347 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 348 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
 349 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 350 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 351 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
 352 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 353 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 354 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 355 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 356 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 357 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 358 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 359 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 360 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 361 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 362 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 363 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 364 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 365 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 366 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 367 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
 368 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 369 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 370 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 371 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 372 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 373 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
 374 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 375 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 376 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 377 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 378 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 379 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 380 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 381 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 382 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 383 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 384 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
 385 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
 386 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
 387 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 388 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 389 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 390 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 391 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 392 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 393 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 394 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 395 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 396 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
 397 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 398 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
 399 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 400 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 401 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 402 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 403 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 404 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 405 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 406 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 407 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 408 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 409 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 410 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
 411 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
 412 /*
 413  * The length of the array is 128 to provide values for 0x900..0x97f.
 414  * The last 15 entries for 0x971..0x97f of the validity table are all zero
 415  * because no Indic script uses such Unicode code points.
 416  */
 417 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
 418 };
 419
 420 static const uint16_t fromUnicodeTable[128]={
 421     0x00a0 ,/* 0x0900 */
 422     0x00a1 ,/* 0x0901 */
 423     0x00a2 ,/* 0x0902 */
 424     0x00a3 ,/* 0x0903 */
 425     0xa4e0 ,/* 0x0904 */
 426     0x00a4 ,/* 0x0905 */
 427     0x00a5 ,/* 0x0906 */
 428     0x00a6 ,/* 0x0907 */
 429     0x00a7 ,/* 0x0908 */
 430     0x00a8 ,/* 0x0909 */
 431     0x00a9 ,/* 0x090a */
 432     0x00aa ,/* 0x090b */
 433     0xA6E9 ,/* 0x090c */
 434     0x00ae ,/* 0x090d */
 435     0x00ab ,/* 0x090e */
 436     0x00ac ,/* 0x090f */
 437     0x00ad ,/* 0x0910 */
 438     0x00b2 ,/* 0x0911 */
 439     0x00af ,/* 0x0912 */
 440     0x00b0 ,/* 0x0913 */
 441     0x00b1 ,/* 0x0914 */
 442     0x00b3 ,/* 0x0915 */
 443     0x00b4 ,/* 0x0916 */
 444     0x00b5 ,/* 0x0917 */
 445     0x00b6 ,/* 0x0918 */
 446     0x00b7 ,/* 0x0919 */
 447     0x00b8 ,/* 0x091a */
 448     0x00b9 ,/* 0x091b */
 449     0x00ba ,/* 0x091c */
 450     0x00bb ,/* 0x091d */
 451     0x00bc ,/* 0x091e */
 452     0x00bd ,/* 0x091f */
 453     0x00be ,/* 0x0920 */
 454     0x00bf ,/* 0x0921 */
 455     0x00c0 ,/* 0x0922 */
 456     0x00c1 ,/* 0x0923 */
 457     0x00c2 ,/* 0x0924 */
 458     0x00c3 ,/* 0x0925 */
 459     0x00c4 ,/* 0x0926 */
 460     0x00c5 ,/* 0x0927 */
 461     0x00c6 ,/* 0x0928 */
 462     0x00c7 ,/* 0x0929 */
 463     0x00c8 ,/* 0x092a */
 464     0x00c9 ,/* 0x092b */
 465     0x00ca ,/* 0x092c */
 466     0x00cb ,/* 0x092d */
 467     0x00cc ,/* 0x092e */
 468     0x00cd ,/* 0x092f */
 469     0x00cf ,/* 0x0930 */
 470     0x00d0 ,/* 0x0931 */
 471     0x00d1 ,/* 0x0932 */
 472     0x00d2 ,/* 0x0933 */
 473     0x00d3 ,/* 0x0934 */
 474     0x00d4 ,/* 0x0935 */
 475     0x00d5 ,/* 0x0936 */
 476     0x00d6 ,/* 0x0937 */
 477     0x00d7 ,/* 0x0938 */
 478     0x00d8 ,/* 0x0939 */
 479     0xFFFF ,/* 0x093A */
 480     0xFFFF ,/* 0x093B */
 481     0x00e9 ,/* 0x093c */
 482     0xEAE9 ,/* 0x093d */
 483     0x00da ,/* 0x093e */
 484     0x00db ,/* 0x093f */
 485     0x00dc ,/* 0x0940 */
 486     0x00dd ,/* 0x0941 */
 487     0x00de ,/* 0x0942 */
 488     0x00df ,/* 0x0943 */
 489     0xDFE9 ,/* 0x0944 */
 490     0x00e3 ,/* 0x0945 */
 491     0x00e0 ,/* 0x0946 */
 492     0x00e1 ,/* 0x0947 */
 493     0x00e2 ,/* 0x0948 */
 494     0x00e7 ,/* 0x0949 */
 495     0x00e4 ,/* 0x094a */
 496     0x00e5 ,/* 0x094b */
 497     0x00e6 ,/* 0x094c */
 498     0x00e8 ,/* 0x094d */
 499     0x00ec ,/* 0x094e */
 500     0x00ed ,/* 0x094f */
 501     0xA1E9 ,/* 0x0950 */ /* OM Symbol */
 502     0xFFFF ,/* 0x0951 */
 503     0xF0B8 ,/* 0x0952 */
 504     0xFFFF ,/* 0x0953 */
 505     0xFFFF ,/* 0x0954 */
 506     0xFFFF ,/* 0x0955 */
 507     0xFFFF ,/* 0x0956 */
 508     0xFFFF ,/* 0x0957 */
 509     0xb3e9 ,/* 0x0958 */
 510     0xb4e9 ,/* 0x0959 */
 511     0xb5e9 ,/* 0x095a */
 512     0xbae9 ,/* 0x095b */
 513     0xbfe9 ,/* 0x095c */
 514     0xC0E9 ,/* 0x095d */
 515     0xc9e9 ,/* 0x095e */
 516     0x00ce ,/* 0x095f */
 517     0xAAe9 ,/* 0x0960 */
 518     0xA7E9 ,/* 0x0961 */
 519     0xDBE9 ,/* 0x0962 */
 520     0xDCE9 ,/* 0x0963 */
 521     0x00ea ,/* 0x0964 */
 522     0xeaea ,/* 0x0965 */
 523     0x00f1 ,/* 0x0966 */
 524     0x00f2 ,/* 0x0967 */
 525     0x00f3 ,/* 0x0968 */
 526     0x00f4 ,/* 0x0969 */
 527     0x00f5 ,/* 0x096a */
 528     0x00f6 ,/* 0x096b */
 529     0x00f7 ,/* 0x096c */
 530     0x00f8 ,/* 0x096d */
 531     0x00f9 ,/* 0x096e */
 532     0x00fa ,/* 0x096f */
 533     0xF0BF ,/* 0x0970 */
 534     0xFFFF ,/* 0x0971 */
 535     0xFFFF ,/* 0x0972 */
 536     0xFFFF ,/* 0x0973 */
 537     0xFFFF ,/* 0x0974 */
 538     0xFFFF ,/* 0x0975 */
 539     0xFFFF ,/* 0x0976 */
 540     0xFFFF ,/* 0x0977 */
 541     0xFFFF ,/* 0x0978 */
 542     0xFFFF ,/* 0x0979 */
 543     0xFFFF ,/* 0x097a */
 544     0xFFFF ,/* 0x097b */
 545     0xFFFF ,/* 0x097c */
 546     0xFFFF ,/* 0x097d */
 547     0xFFFF ,/* 0x097e */
 548     0xFFFF ,/* 0x097f */
 549 };
 550 static const uint16_t toUnicodeTable[256]={
 551     0x0000,/* 0x00 */
 552     0x0001,/* 0x01 */
 553     0x0002,/* 0x02 */
 554     0x0003,/* 0x03 */
 555     0x0004,/* 0x04 */
 556     0x0005,/* 0x05 */
 557     0x0006,/* 0x06 */
 558     0x0007,/* 0x07 */
 559     0x0008,/* 0x08 */
 560     0x0009,/* 0x09 */
 561     0x000a,/* 0x0a */
 562     0x000b,/* 0x0b */
 563     0x000c,/* 0x0c */
 564     0x000d,/* 0x0d */
 565     0x000e,/* 0x0e */
 566     0x000f,/* 0x0f */
 567     0x0010,/* 0x10 */
 568     0x0011,/* 0x11 */
 569     0x0012,/* 0x12 */
 570     0x0013,/* 0x13 */
 571     0x0014,/* 0x14 */
 572     0x0015,/* 0x15 */
 573     0x0016,/* 0x16 */
 574     0x0017,/* 0x17 */
 575     0x0018,/* 0x18 */
 576     0x0019,/* 0x19 */
 577     0x001a,/* 0x1a */
 578     0x001b,/* 0x1b */
 579     0x001c,/* 0x1c */
 580     0x001d,/* 0x1d */
 581     0x001e,/* 0x1e */
 582     0x001f,/* 0x1f */
 583     0x0020,/* 0x20 */
 584     0x0021,/* 0x21 */
 585     0x0022,/* 0x22 */
 586     0x0023,/* 0x23 */
 587     0x0024,/* 0x24 */
 588     0x0025,/* 0x25 */
 589     0x0026,/* 0x26 */
 590     0x0027,/* 0x27 */
 591     0x0028,/* 0x28 */
 592     0x0029,/* 0x29 */
 593     0x002a,/* 0x2a */
 594     0x002b,/* 0x2b */
 595     0x002c,/* 0x2c */
 596     0x002d,/* 0x2d */
 597     0x002e,/* 0x2e */
 598     0x002f,/* 0x2f */
 599     0x0030,/* 0x30 */
 600     0x0031,/* 0x31 */
 601     0x0032,/* 0x32 */
 602     0x0033,/* 0x33 */
 603     0x0034,/* 0x34 */
 604     0x0035,/* 0x35 */
 605     0x0036,/* 0x36 */
 606     0x0037,/* 0x37 */
 607     0x0038,/* 0x38 */
 608     0x0039,/* 0x39 */
 609     0x003A,/* 0x3A */
 610     0x003B,/* 0x3B */
 611     0x003c,/* 0x3c */
 612     0x003d,/* 0x3d */
 613     0x003e,/* 0x3e */
 614     0x003f,/* 0x3f */
 615     0x0040,/* 0x40 */
 616     0x0041,/* 0x41 */
 617     0x0042,/* 0x42 */
 618     0x0043,/* 0x43 */
 619     0x0044,/* 0x44 */
 620     0x0045,/* 0x45 */
 621     0x0046,/* 0x46 */
 622     0x0047,/* 0x47 */
 623     0x0048,/* 0x48 */
 624     0x0049,/* 0x49 */
 625     0x004a,/* 0x4a */
 626     0x004b,/* 0x4b */
 627     0x004c,/* 0x4c */
 628     0x004d,/* 0x4d */
 629     0x004e,/* 0x4e */
 630     0x004f,/* 0x4f */
 631     0x0050,/* 0x50 */
 632     0x0051,/* 0x51 */
 633     0x0052,/* 0x52 */
 634     0x0053,/* 0x53 */
 635     0x0054,/* 0x54 */
 636     0x0055,/* 0x55 */
 637     0x0056,/* 0x56 */
 638     0x0057,/* 0x57 */
 639     0x0058,/* 0x58 */
 640     0x0059,/* 0x59 */
 641     0x005a,/* 0x5a */
 642     0x005b,/* 0x5b */
 643     0x005c,/* 0x5c */
 644     0x005d,/* 0x5d */
 645     0x005e,/* 0x5e */
 646     0x005f,/* 0x5f */
 647     0x0060,/* 0x60 */
 648     0x0061,/* 0x61 */
 649     0x0062,/* 0x62 */
 650     0x0063,/* 0x63 */
 651     0x0064,/* 0x64 */
 652     0x0065,/* 0x65 */
 653     0x0066,/* 0x66 */
 654     0x0067,/* 0x67 */
 655     0x0068,/* 0x68 */
 656     0x0069,/* 0x69 */
 657     0x006a,/* 0x6a */
 658     0x006b,/* 0x6b */
 659     0x006c,/* 0x6c */
 660     0x006d,/* 0x6d */
 661     0x006e,/* 0x6e */
 662     0x006f,/* 0x6f */
 663     0x0070,/* 0x70 */
 664     0x0071,/* 0x71 */
 665     0x0072,/* 0x72 */
 666     0x0073,/* 0x73 */
 667     0x0074,/* 0x74 */
 668     0x0075,/* 0x75 */
 669     0x0076,/* 0x76 */
 670     0x0077,/* 0x77 */
 671     0x0078,/* 0x78 */
 672     0x0079,/* 0x79 */
 673     0x007a,/* 0x7a */
 674     0x007b,/* 0x7b */
 675     0x007c,/* 0x7c */
 676     0x007d,/* 0x7d */
 677     0x007e,/* 0x7e */
 678     0x007f,/* 0x7f */
 679     0x0080,/* 0x80 */
 680     0x0081,/* 0x81 */
 681     0x0082,/* 0x82 */
 682     0x0083,/* 0x83 */
 683     0x0084,/* 0x84 */
 684     0x0085,/* 0x85 */
 685     0x0086,/* 0x86 */
 686     0x0087,/* 0x87 */
 687     0x0088,/* 0x88 */
 688     0x0089,/* 0x89 */
 689     0x008a,/* 0x8a */
 690     0x008b,/* 0x8b */
 691     0x008c,/* 0x8c */
 692     0x008d,/* 0x8d */
 693     0x008e,/* 0x8e */
 694     0x008f,/* 0x8f */
 695     0x0090,/* 0x90 */
 696     0x0091,/* 0x91 */
 697     0x0092,/* 0x92 */
 698     0x0093,/* 0x93 */
 699     0x0094,/* 0x94 */
 700     0x0095,/* 0x95 */
 701     0x0096,/* 0x96 */
 702     0x0097,/* 0x97 */
 703     0x0098,/* 0x98 */
 704     0x0099,/* 0x99 */
 705     0x009a,/* 0x9a */
 706     0x009b,/* 0x9b */
 707     0x009c,/* 0x9c */
 708     0x009d,/* 0x9d */
 709     0x009e,/* 0x9e */
 710     0x009f,/* 0x9f */
 711     0x00A0,/* 0xa0 */
 712     0x0901,/* 0xa1 */
 713     0x0902,/* 0xa2 */
 714     0x0903,/* 0xa3 */
 715     0x0905,/* 0xa4 */
 716     0x0906,/* 0xa5 */
 717     0x0907,/* 0xa6 */
 718     0x0908,/* 0xa7 */
 719     0x0909,/* 0xa8 */
 720     0x090a,/* 0xa9 */
 721     0x090b,/* 0xaa */
 722     0x090e,/* 0xab */
 723     0x090f,/* 0xac */
 724     0x0910,/* 0xad */
 725     0x090d,/* 0xae */
 726     0x0912,/* 0xaf */
 727     0x0913,/* 0xb0 */
 728     0x0914,/* 0xb1 */
 729     0x0911,/* 0xb2 */
 730     0x0915,/* 0xb3 */
 731     0x0916,/* 0xb4 */
 732     0x0917,/* 0xb5 */
 733     0x0918,/* 0xb6 */
 734     0x0919,/* 0xb7 */
 735     0x091a,/* 0xb8 */
 736     0x091b,/* 0xb9 */
 737     0x091c,/* 0xba */
 738     0x091d,/* 0xbb */
 739     0x091e,/* 0xbc */
 740     0x091f,/* 0xbd */
 741     0x0920,/* 0xbe */
 742     0x0921,/* 0xbf */
 743     0x0922,/* 0xc0 */
 744     0x0923,/* 0xc1 */
 745     0x0924,/* 0xc2 */
 746     0x0925,/* 0xc3 */
 747     0x0926,/* 0xc4 */
 748     0x0927,/* 0xc5 */
 749     0x0928,/* 0xc6 */
 750     0x0929,/* 0xc7 */
 751     0x092a,/* 0xc8 */
 752     0x092b,/* 0xc9 */
 753     0x092c,/* 0xca */
 754     0x092d,/* 0xcb */
 755     0x092e,/* 0xcc */
 756     0x092f,/* 0xcd */
 757     0x095f,/* 0xce */
 758     0x0930,/* 0xcf */
 759     0x0931,/* 0xd0 */
 760     0x0932,/* 0xd1 */
 761     0x0933,/* 0xd2 */
 762     0x0934,/* 0xd3 */
 763     0x0935,/* 0xd4 */
 764     0x0936,/* 0xd5 */
 765     0x0937,/* 0xd6 */
 766     0x0938,/* 0xd7 */
 767     0x0939,/* 0xd8 */
 768     0x200D,/* 0xd9 */
 769     0x093e,/* 0xda */
 770     0x093f,/* 0xdb */
 771     0x0940,/* 0xdc */
 772     0x0941,/* 0xdd */
 773     0x0942,/* 0xde */
 774     0x0943,/* 0xdf */
 775     0x0946,/* 0xe0 */
 776     0x0947,/* 0xe1 */
 777     0x0948,/* 0xe2 */
 778     0x0945,/* 0xe3 */
 779     0x094a,/* 0xe4 */
 780     0x094b,/* 0xe5 */
 781     0x094c,/* 0xe6 */
 782     0x0949,/* 0xe7 */
 783     0x094d,/* 0xe8 */
 784     0x093c,/* 0xe9 */
 785     0x0964,/* 0xea */
 786     0xFFFF,/* 0xeb */
 787     0xFFFF,/* 0xec */
 788     0xFFFF,/* 0xed */
 789     0xFFFF,/* 0xee */
 790     0xFFFF,/* 0xef */
 791     0xFFFF,/* 0xf0 */
 792     0x0966,/* 0xf1 */
 793     0x0967,/* 0xf2 */
 794     0x0968,/* 0xf3 */
 795     0x0969,/* 0xf4 */
 796     0x096a,/* 0xf5 */
 797     0x096b,/* 0xf6 */
 798     0x096c,/* 0xf7 */
 799     0x096d,/* 0xf8 */
 800     0x096e,/* 0xf9 */
 801     0x096f,/* 0xfa */
 802     0xFFFF,/* 0xfb */
 803     0xFFFF,/* 0xfc */
 804     0xFFFF,/* 0xfd */
 805     0xFFFF,/* 0xfe */
 806     0xFFFF /* 0xff */
 807 };
 808
 809 static const uint16_t vowelSignESpecialCases[][2]={
 810         { 2 /*length of array*/    , 0      },
 811         { 0xA4 , 0x0904 },
 812 };
 813
 814 static const uint16_t nuktaSpecialCases[][2]={
 815     { 16 /*length of array*/   , 0      },
 816     { 0xA6 , 0x090c },
 817     { 0xEA , 0x093D },
 818     { 0xDF , 0x0944 },
 819     { 0xA1 , 0x0950 },
 820     { 0xb3 , 0x0958 },
 821     { 0xb4 , 0x0959 },
 822     { 0xb5 , 0x095a },
 823     { 0xba , 0x095b },
 824     { 0xbf , 0x095c },
 825     { 0xC0 , 0x095d },
 826     { 0xc9 , 0x095e },
 827     { 0xAA , 0x0960 },
 828     { 0xA7 , 0x0961 },
 829     { 0xDB , 0x0962 },
 830     { 0xDC , 0x0963 },
 831 };
 832
 833
 834 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
 835     int32_t offset = (int32_t)(source - args->source-1);                                        \
 836       /* write the targetUniChar  to target */                                                  \
 837     if(target < targetLimit){                                                                   \
 838         if(targetByteUnit <= 0xFF){                                                             \
 839             *(target)++ = (uint8_t)(targetByteUnit);                                            \
 840             if(offsets){                                                                        \
 841                 *(offsets++) = offset;                                                          \
 842             }                                                                                   \
 843         }else{                                                                                  \
 844             if (targetByteUnit > 0xFFFF) {                                                      \
 845                 *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
 846                 if (offsets) {                                                                  \
 847                     --offset;                                                                   \
 848                     *(offsets++) = offset;                                                      \
 849                 }                                                                               \
 850             }                                                                                   \
 851             if (!(target < targetLimit)) {                                                      \
 852                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
 853                                 (uint8_t)(targetByteUnit >> 8);                                 \
 854                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
 855                                 (uint8_t)targetByteUnit;                                        \
 856                 *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
 857             } else {                                                                            \
 858                 *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
 859                 if(offsets){                                                                    \
 860                     *(offsets++) = offset;                                                      \
 861                 }                                                                               \
 862                 if(target < targetLimit){                                                       \
 863                     *(target)++ = (uint8_t)  targetByteUnit;                                    \
 864                     if(offsets){                                                                \
 865                         *(offsets++) = offset                            ;                      \
 866                     }                                                                           \
 867                 }else{                                                                          \
 868                     args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
 869                                 (uint8_t) (targetByteUnit);                                     \
 870                     *err = U_BUFFER_OVERFLOW_ERROR;                                             \
 871                 }                                                                               \
 872             }                                                                                   \
 873         }                                                                                       \
 874     }else{                                                                                      \
 875         if (targetByteUnit & 0xFF0000) {                                                        \
 876             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
 877                         (uint8_t) (targetByteUnit >>16);                                        \
 878         }                                                                                       \
 879         if(targetByteUnit & 0xFF00){                                                            \
 880             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
 881                         (uint8_t) (targetByteUnit >>8);                                         \
 882         }                                                                                       \
 883         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
 884                         (uint8_t) (targetByteUnit);                                             \
 885         *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
 886     }                                                                                           \
 887 }
 888
 889 /* Rules:
 890  *    Explicit Halant :
 891  *                      <HALANT> + <ZWNJ>
 892  *    Soft Halant :
 893  *                      <HALANT> + <ZWJ>
 894  */
 895 static void U_CALLCONV
 896 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
 897         UConverterFromUnicodeArgs * args, UErrorCode * err) {
 898     const UChar *source = args->source;
 899     const UChar *sourceLimit = args->sourceLimit;
 900     unsigned char *target = (unsigned char *) args->target;
 901     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
 902     int32_t* offsets = args->offsets;
 903     uint32_t targetByteUnit = 0x0000;
 904     UChar32 sourceChar = 0x0000;
 905     UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
 906     UConverterDataISCII *converterData;
 907     uint16_t newDelta=0;
 908     uint16_t range = 0;
 909     UBool deltaChanged = FALSE;
 910
 911     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
 912         *err = U_ILLEGAL_ARGUMENT_ERROR;
 913         return;
 914     }
 915     /* initialize data */
 916     converterData=(UConverterDataISCII*)args->converter->extraInfo;
 917     newDelta=converterData->currentDeltaFromUnicode;
 918     range = (uint16_t)(newDelta/DELTA);
 919
 920     if ((sourceChar = args->converter->fromUChar32)!=0) {
 921         goto getTrail;
 922     }
 923
 924     /*writing the char to the output stream */
 925     while (source < sourceLimit) {
 926         /* Write the language code following LF only if LF is not the last character. */
 927         if (args->converter->fromUnicodeStatus == LF) {
 928             targetByteUnit = ATR<<8;
 929             targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
 930             args->converter->fromUnicodeStatus = 0x0000;
 931             /* now append ATR and language code */
 932             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
 933             if (U_FAILURE(*err)) {
 934                 break;
 935             }
 936         }
 937
 938         sourceChar = *source++;
 939         tempContextFromUnicode = converterData->contextCharFromUnicode;
 940
 941         targetByteUnit = missingCharMarker;
 942
 943         /*check if input is in ASCII and C0 control codes range*/
 944         if (sourceChar <= ASCII_END) {
 945             args->converter->fromUnicodeStatus = sourceChar;
 946             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
 947             if (U_FAILURE(*err)) {
 948                 break;
 949             }
 950             continue;
 951         }
 952         switch (sourceChar) {
 953         case ZWNJ:
 954             /* contextChar has HALANT */
 955             if (converterData->contextCharFromUnicode) {
 956                 converterData->contextCharFromUnicode = 0x00;
 957                 targetByteUnit = ISCII_HALANT;
 958             } else {
 959                 /* consume ZWNJ and continue */
 960                 converterData->contextCharFromUnicode = 0x00;
 961                 continue;
 962             }
 963             break;
 964         case ZWJ:
 965             /* contextChar has HALANT */
 966             if (converterData->contextCharFromUnicode) {
 967                 targetByteUnit = ISCII_NUKTA;
 968             } else {
 969                 targetByteUnit =ISCII_INV;
 970             }
 971             converterData->contextCharFromUnicode = 0x00;
 972             break;
 973         default:
 974             /* is the sourceChar in the INDIC_RANGE? */
 975             if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
 976                 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
 977                  * does not include these codepoints in all Northern scrips we need to
 978                  * filter them out
 979                  */
 980                 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
 981                     /* find out to which block the souceChar belongs*/
 982                     range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
 983                     newDelta =(uint16_t)(range*DELTA);
 984
 985                     /* Now are we in the same block as the previous? */
 986                     if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
 987                         converterData->currentDeltaFromUnicode = newDelta;
 988                         converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
 989                         deltaChanged =TRUE;
 990                         converterData->isFirstBuffer=FALSE;
 991                     }
 992
 993                     if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
 994                         if (sourceChar == PNJ_TIPPI) {
 995                             /* Make sure Tippi is converterd to Bindi. */
 996                             sourceChar = PNJ_BINDI;
 997                         } else if (sourceChar == PNJ_ADHAK) {
 998                             /* This is for consonant cluster handling. */
 999                             converterData->contextCharFromUnicode = PNJ_ADHAK;
1000                         }
1001
1002                     }
1003                     /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
1004                     /* now subtract the new delta from sourceChar*/
1005                     sourceChar -= converterData->currentDeltaFromUnicode;
1006                 }
1007
1008                 /* get the target byte unit */
1009                 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1010
1011                 /* is the code point valid in current script? */
1012                 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
1013                     /* Vocallic RR is assigned in ISCII Telugu and Unicode */
1014                     if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
1015                         targetByteUnit=missingCharMarker;
1016                     }
1017                 }
1018
1019                 if (deltaChanged) {
1020                     /* we are in a script block which is different than
1021                      * previous sourceChar's script block write ATR and language codes
1022                      */
1023                     uint32_t temp=0;
1024                     temp =(uint16_t)(ATR<<8);
1025                     temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1026                     /* reset */
1027                     deltaChanged=FALSE;
1028                     /* now append ATR and language code */
1029                     WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1030                     if (U_FAILURE(*err)) {
1031                         break;
1032                     }
1033                 }
1034
1035                 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1036                     continue;
1037                 }
1038             }
1039             /* reset context char */
1040             converterData->contextCharFromUnicode = 0x00;
1041             break;
1042         }
1043         if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
1044             /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
1045             /* reset context char */
1046             converterData->contextCharFromUnicode = 0x0000;
1047             targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
1048             /* write targetByteUnit to target */
1049             WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1050             if (U_FAILURE(*err)) {
1051                 break;
1052             }
1053         } else if (targetByteUnit != missingCharMarker) {
1054             if (targetByteUnit==ISCII_HALANT) {
1055                 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1056             }
1057             /* write targetByteUnit to target*/
1058             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1059             if (U_FAILURE(*err)) {
1060                 break;
1061             }
1062         } else {
1063             /* oops.. the code point is unassigned */
1064             /*check if the char is a First surrogate*/
1065             if (U16_IS_SURROGATE(sourceChar)) {
1066                 if (U16_IS_SURROGATE_LEAD(sourceChar)) {
1067 getTrail:
1068                     /*look ahead to find the trail surrogate*/
1069                     if (source < sourceLimit) {
1070                         /* test the following code unit */
1071                         UChar trail= (*source);
1072                         if (U16_IS_TRAIL(trail)) {
1073                             source++;
1074                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1075                             *err =U_INVALID_CHAR_FOUND;
1076                             /* convert this surrogate code point */
1077                             /* exit this condition tree */
1078                         } else {
1079                             /* this is an unmatched lead code unit (1st surrogate) */
1080                             /* callback(illegal) */
1081                             *err=U_ILLEGAL_CHAR_FOUND;
1082                         }
1083                     } else {
1084                         /* no more input */
1085                         *err = U_ZERO_ERROR;
1086                     }
1087                 } else {
1088                     /* this is an unmatched trail code unit (2nd surrogate) */
1089                     /* callback(illegal) */
1090                     *err=U_ILLEGAL_CHAR_FOUND;
1091                 }
1092             } else {
1093                 /* callback(unassigned) for a BMP code point */
1094                 *err = U_INVALID_CHAR_FOUND;
1095             }
1096
1097             args->converter->fromUChar32=sourceChar;
1098             break;
1099         }
1100     }/* end while(mySourceIndex<mySourceLength) */
1101
1102     /*save the state and return */
1103     args->source = source;
1104     args->target = (char*)target;
1105 }
1106
1107 static const uint16_t lookupTable[][2]={
1108     { ZERO,       ZERO     },     /*DEFALT*/
1109     { ZERO,       ZERO     },     /*ROMAN*/
1110     { DEVANAGARI, DEV_MASK },
1111     { BENGALI,    BNG_MASK },
1112     { TAMIL,      TML_MASK },
1113     { TELUGU,     KND_MASK },
1114     { BENGALI,    BNG_MASK },
1115     { ORIYA,      ORI_MASK },
1116     { KANNADA,    KND_MASK },
1117     { MALAYALAM,  MLM_MASK },
1118     { GUJARATI,   GJR_MASK },
1119     { GURMUKHI,   PNJ_MASK }
1120 };
1121
1122 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1123     /* add offset to current Indic Block */                                              \
1124     if(targetUniChar>ASCII_END &&                                                        \
1125            targetUniChar != ZWJ &&                                                       \
1126            targetUniChar != ZWNJ &&                                                      \
1127            targetUniChar != DANDA &&                                                     \
1128            targetUniChar != DOUBLE_DANDA){                                               \
1129                                                                                          \
1130            targetUniChar+=(uint16_t)(delta);                                             \
1131     }                                                                                    \
1132     /* now write the targetUniChar */                                                    \
1133     if(target<args->targetLimit){                                                        \
1134         *(target)++ = (UChar)targetUniChar;                                              \
1135         if(offsets){                                                                     \
1136             *(offsets)++ = (int32_t)(offset);                                            \
1137         }                                                                                \
1138     }else{                                                                               \
1139         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1140             (UChar)targetUniChar;                                                        \
1141         *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1142     }                                                                                    \
1143 }
1144
1145 #define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
1146     targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1147     /* is the code point valid in current script? */                                     \
1148     if(sourceChar> ASCII_END &&                                                          \
1149             (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
1150         /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
1151         if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
1152                     targetUniChar!=VOCALLIC_RR){                                         \
1153             targetUniChar=missingCharMarker;                                             \
1154         }                                                                                \
1155     }                                                                                    \
1156 }
1157
1158 /***********
1159  *  Rules for ISCII to Unicode converter
1160  *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1161  *  which has both precomposed and decomposed forms characters
1162  *  pre-context and post-context need to be considered.
1163  *
1164  *  Post context
1165  *  i)  ATR : Attribute code is used to declare the font and script switching.
1166  *      Currently we only switch scripts and font codes consumed without generating an error
1167  *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1168  *      obsolete characters
1169  *  Pre context
1170  *  i)  Halant: if preceeded by a halant then it is a explicit halant
1171  *  ii) Nukta :
1172  *       a) if preceeded by a halant then it is a soft halant
1173  *       b) if preceeded by specific consonants and the ligatures have pre-composed
1174  *          characters in Unicode then convert to pre-composed characters
1175  *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1176  *
1177  */
1178
1179 static void U_CALLCONV
1180 UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
1181     const char *source = ( char *) args->source;
1182     UChar *target = args->target;
1183     const char *sourceLimit = args->sourceLimit;
1184     const UChar* targetLimit = args->targetLimit;
1185     uint32_t targetUniChar = 0x0000;
1186     uint8_t sourceChar = 0x0000;
1187     UConverterDataISCII* data;
1188     UChar32* toUnicodeStatus=NULL;
1189     UChar32 tempTargetUniChar = 0x0000;
1190     UChar* contextCharToUnicode= NULL;
1191     UBool found;
1192     int i;
1193     int offset = 0;
1194
1195     if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
1196         *err = U_ILLEGAL_ARGUMENT_ERROR;
1197         return;
1198     }
1199
1200     data = (UConverterDataISCII*)(args->converter->extraInfo);
1201     contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1202     toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1203
1204     while (U_SUCCESS(*err) && source<sourceLimit) {
1205
1206         targetUniChar = missingCharMarker;
1207
1208         if (target < targetLimit) {
1209             sourceChar = (unsigned char)*(source)++;
1210
1211             /* look at the post-context preform special processing */
1212             if (*contextCharToUnicode==ATR) {
1213
1214                 /* If we have ATR in *contextCharToUnicode then we need to change our
1215                  * state to the Indic Script specified by sourceChar
1216                  */
1217
1218                 /* check if the sourceChar is supported script range*/
1219                 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1220                     data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1221                     data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
1222                 } else if (sourceChar==DEF) {
1223                     /* switch back to default */
1224                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1225                     data->currentMaskToUnicode = data->defMaskToUnicode;
1226                 } else {
1227                     if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
1228                         /* these are display codes consume and continue */
1229                     } else {
1230                         *err =U_ILLEGAL_CHAR_FOUND;
1231                         /* reset */
1232                         *contextCharToUnicode=NO_CHAR_MARKER;
1233                         goto CALLBACK;
1234                     }
1235                 }
1236
1237                 /* reset */
1238                 *contextCharToUnicode=NO_CHAR_MARKER;
1239
1240                 continue;
1241
1242             } else if (*contextCharToUnicode==EXT) {
1243                 /* check if sourceChar is in 0xA1-0xEE range */
1244                 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1245                     /* We currently support only Anudatta and Devanagari abbreviation sign */
1246                     if (sourceChar==0xBF || sourceChar == 0xB8) {
1247                         targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1248
1249                         /* find out if the mapping is valid in this state */
1250                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1251                             *contextCharToUnicode= NO_CHAR_MARKER;
1252
1253                             /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1254                             if (data->prevToUnicodeStatus) {
1255                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1256                                 data->prevToUnicodeStatus = 0x0000;
1257                             }
1258                             /* write to target */
1259                             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1260
1261                             continue;
1262                         }
1263                     }
1264                     /* byte unit is unassigned */
1265                     targetUniChar = missingCharMarker;
1266                     *err= U_INVALID_CHAR_FOUND;
1267                 } else {
1268                     /* only 0xA1 - 0xEE are legal after EXT char */
1269                     *contextCharToUnicode= NO_CHAR_MARKER;
1270                     *err = U_ILLEGAL_CHAR_FOUND;
1271                 }
1272                 goto CALLBACK;
1273             } else if (*contextCharToUnicode==ISCII_INV) {
1274                 if (sourceChar==ISCII_HALANT) {
1275                     targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1276                 } else {
1277                     targetUniChar = ZWJ;
1278                 }
1279
1280                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1281                 if (data->prevToUnicodeStatus) {
1282                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1283                     data->prevToUnicodeStatus = 0x0000;
1284                 }
1285                 /* write to target */
1286                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1287                 /* reset */
1288                 *contextCharToUnicode=NO_CHAR_MARKER;
1289             }
1290
1291             /* look at the pre-context and perform special processing */
1292             switch (sourceChar) {
1293             case ISCII_INV:
1294             case EXT:
1295             case ATR:
1296                 *contextCharToUnicode = (UChar)sourceChar;
1297
1298                 if (*toUnicodeStatus != missingCharMarker) {
1299                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1300                     if (data->prevToUnicodeStatus) {
1301                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1302                         data->prevToUnicodeStatus = 0x0000;
1303                     }
1304                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1305                     *toUnicodeStatus = missingCharMarker;
1306                 }
1307                 continue;
1308             case ISCII_DANDA:
1309                 /* handle double danda*/
1310                 if (*contextCharToUnicode== ISCII_DANDA) {
1311                     targetUniChar = DOUBLE_DANDA;
1312                     /* clear the context */
1313                     *contextCharToUnicode = NO_CHAR_MARKER;
1314                     *toUnicodeStatus = missingCharMarker;
1315                 } else {
1316                     GET_MAPPING(sourceChar,targetUniChar,data);
1317                     *contextCharToUnicode = sourceChar;
1318                 }
1319                 break;
1320             case ISCII_HALANT:
1321                 /* handle explicit halant */
1322                 if (*contextCharToUnicode == ISCII_HALANT) {
1323                     targetUniChar = ZWNJ;
1324                     /* clear the context */
1325                     *contextCharToUnicode = NO_CHAR_MARKER;
1326                 } else {
1327                     GET_MAPPING(sourceChar,targetUniChar,data);
1328                     *contextCharToUnicode = sourceChar;
1329                 }
1330                 break;
1331             case 0x0A:
1332             case 0x0D:
1333                 data->resetToDefaultToUnicode = TRUE;
1334                 GET_MAPPING(sourceChar,targetUniChar,data)
1335                 ;
1336                 *contextCharToUnicode = sourceChar;
1337                 break;
1338
1339             case ISCII_VOWEL_SIGN_E:
1340                 i=1;
1341                 found=FALSE;
1342                 for (; i<vowelSignESpecialCases[0][0]; i++) {
1343                     U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
1344                     if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
1345                         targetUniChar=vowelSignESpecialCases[i][1];
1346                         found=TRUE;
1347                         break;
1348                     }
1349                 }
1350                 if (found) {
1351                     /* find out if the mapping is valid in this state */
1352                     if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1353                         /*targetUniChar += data->currentDeltaToUnicode ;*/
1354                         *contextCharToUnicode= NO_CHAR_MARKER;
1355                         *toUnicodeStatus = missingCharMarker;
1356                         break;
1357                     }
1358                 }
1359                 GET_MAPPING(sourceChar,targetUniChar,data);
1360                 *contextCharToUnicode = sourceChar;
1361                 break;
1362
1363             case ISCII_NUKTA:
1364                 /* handle soft halant */
1365                 if (*contextCharToUnicode == ISCII_HALANT) {
1366                     targetUniChar = ZWJ;
1367                     /* clear the context */
1368                     *contextCharToUnicode = NO_CHAR_MARKER;
1369                     break;
1370                 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
1371                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1372                     if (data->prevToUnicodeStatus) {
1373                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1374                         data->prevToUnicodeStatus = 0x0000;
1375                     }
1376                     /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
1377                      * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1378                      */
1379                     targetUniChar = PNJ_RRA;
1380                     WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1381                     if (U_SUCCESS(*err)) {
1382                         targetUniChar = PNJ_SIGN_VIRAMA;
1383                         WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1384                         if (U_SUCCESS(*err)) {
1385                             targetUniChar = PNJ_HA;
1386                             WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1387                         } else {
1388                             args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1389                         }
1390                     } else {
1391                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1392                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1393                     }
1394                     *toUnicodeStatus = missingCharMarker;
1395                     data->contextCharToUnicode = NO_CHAR_MARKER;
1396                     continue;
1397                 } else {
1398                     /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1399                     i=1;
1400                     found =FALSE;
1401                     for (; i<nuktaSpecialCases[0][0]; i++) {
1402                         if (nuktaSpecialCases[i][0]==(uint8_t)
1403                                 *contextCharToUnicode) {
1404                             targetUniChar=nuktaSpecialCases[i][1];
1405                             found =TRUE;
1406                             break;
1407                         }
1408                     }
1409                     if (found) {
1410                         /* find out if the mapping is valid in this state */
1411                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1412                             /*targetUniChar += data->currentDeltaToUnicode ;*/
1413                             *contextCharToUnicode= NO_CHAR_MARKER;
1414                             *toUnicodeStatus = missingCharMarker;
1415                             if (data->currentDeltaToUnicode == PNJ_DELTA) {
1416                                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1417                                 if (data->prevToUnicodeStatus) {
1418                                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1419                                     data->prevToUnicodeStatus = 0x0000;
1420                                 }
1421                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1422                                 continue;
1423                             }
1424                             break;
1425                         }
1426                         /* else fall through to default */
1427                     }
1428                     /* else fall through to default */
1429                     U_FALLTHROUGH;
1430                 }
1431             default:GET_MAPPING(sourceChar,targetUniChar,data)
1432                 ;
1433                 *contextCharToUnicode = sourceChar;
1434                 break;
1435             }
1436
1437             if (*toUnicodeStatus != missingCharMarker) {
1438                 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
1439                 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
1440                         (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
1441                     /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
1442                     offset = (int)(source-args->source - 3);
1443                     tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
1444                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
1445                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
1446                     data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
1447                     *toUnicodeStatus = missingCharMarker;
1448                     continue;
1449                 } else {
1450                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1451                     if (data->prevToUnicodeStatus) {
1452                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1453                         data->prevToUnicodeStatus = 0x0000;
1454                     }
1455                     /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
1456                      * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1457                      */
1458                     if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
1459                         targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1460                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
1461                     } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
1462                         /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
1463                         data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1464                     } else {
1465                         /* write the previously mapped codepoint */
1466                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1467                     }
1468                 }
1469                 *toUnicodeStatus = missingCharMarker;
1470             }
1471
1472             if (targetUniChar != missingCharMarker) {
1473                 /* now save the targetUniChar for delayed write */
1474                 *toUnicodeStatus = (UChar) targetUniChar;
1475                 if (data->resetToDefaultToUnicode==TRUE) {
1476                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1477                     data->currentMaskToUnicode = data->defMaskToUnicode;
1478                     data->resetToDefaultToUnicode=FALSE;
1479                 }
1480             } else {
1481
1482                 /* we reach here only if targetUniChar == missingCharMarker
1483                  * so assign codes to reason and err
1484                  */
1485                 *err = U_INVALID_CHAR_FOUND;
1486 CALLBACK:
1487                 args->converter->toUBytes[0] = (uint8_t) sourceChar;
1488                 args->converter->toULength = 1;
1489                 break;
1490             }
1491
1492         } else {
1493             *err =U_BUFFER_OVERFLOW_ERROR;
1494             break;
1495         }
1496     }
1497
1498     if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1499         /* end of the input stream */
1500         UConverter *cnv = args->converter;
1501
1502         if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
1503             /* set toUBytes[] */
1504             cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1505             cnv->toULength = 1;
1506
1507             /* avoid looping on truncated sequences */
1508             *contextCharToUnicode = NO_CHAR_MARKER;
1509         } else {
1510             cnv->toULength = 0;
1511         }
1512
1513         if (*toUnicodeStatus != missingCharMarker) {
1514             /* output a remaining target character */
1515             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1516             *toUnicodeStatus = missingCharMarker;
1517         }
1518     }
1519
1520     args->target = target;
1521     args->source = source;
1522 }
1523
1524 /* structure for SafeClone calculations */
1525 struct cloneISCIIStruct {
1526     UConverter cnv;
1527     UConverterDataISCII mydata;
1528 };
1529
1530 static UConverter * U_CALLCONV
1531 _ISCII_SafeClone(const UConverter *cnv,
1532               void *stackBuffer,
1533               int32_t *pBufferSize,
1534               UErrorCode *status)
1535 {
1536     struct cloneISCIIStruct * localClone;
1537     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1538
1539     if (U_FAILURE(*status)) {
1540         return 0;
1541     }
1542
1543     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
1544         *pBufferSize = bufferSizeNeeded;
1545         return 0;
1546     }
1547
1548     localClone = (struct cloneISCIIStruct *)stackBuffer;
1549     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1550
1551     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1552     localClone->cnv.extraInfo = &localClone->mydata;
1553     localClone->cnv.isExtraLocal = TRUE;
1554
1555     return &localClone->cnv;
1556 }
1557
1558 static void U_CALLCONV
1559 _ISCIIGetUnicodeSet(const UConverter *cnv,
1560                     const USetAdder *sa,
1561                     UConverterUnicodeSet which,
1562                     UErrorCode *pErrorCode)
1563 {
1564     (void)cnv;
1565     (void)which;
1566     (void)pErrorCode;
1567     int32_t idx, script;
1568     uint8_t mask;
1569
1570     /* Since all ISCII versions allow switching to other ISCII
1571     scripts, we add all roundtrippable characters to this set. */
1572     sa->addRange(sa->set, 0, ASCII_END);
1573     for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1574         mask = (uint8_t)(lookupInitialData[script].maskEnum);
1575         for (idx = 0; idx < DELTA; idx++) {
1576             /* added check for TELUGU character */
1577             if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
1578                 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1579             }
1580         }
1581     }
1582     sa->add(sa->set, DANDA);
1583     sa->add(sa->set, DOUBLE_DANDA);
1584     sa->add(sa->set, ZWNJ);
1585     sa->add(sa->set, ZWJ);
1586 }
1587 U_CDECL_END
1588 static const UConverterImpl _ISCIIImpl={
1589
1590     UCNV_ISCII,
1591
1592     NULL,
1593     NULL,
1594
1595     _ISCIIOpen,
1596     _ISCIIClose,
1597     _ISCIIReset,
1598
1599     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1600     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1601     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1602     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1603     NULL,
1604
1605     NULL,
1606     _ISCIIgetName,
1607     NULL,
1608     _ISCII_SafeClone,
1609     _ISCIIGetUnicodeSet,
1610     NULL,
1611     NULL
1612 };
1613
1614 static const UConverterStaticData _ISCIIStaticData={
1615     sizeof(UConverterStaticData),
1616         "ISCII",
1617          0,
1618          UCNV_IBM,
1619          UCNV_ISCII,
1620          1,
1621          4,
1622         { 0x1a, 0, 0, 0 },
1623         0x1,
1624         FALSE,
1625         FALSE,
1626         0x0,
1627         0x0,
1628         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1629
1630 };
1631
1632 const UConverterSharedData _ISCIIData=
1633         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
1634
1635 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */