2 **********************************************************************
3 * Copyright (C) 2000-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: ucnvisci.c
8 * tab size: 8 (not used)
11 * created on: 2001JUN26
12 * created by: Ram Viswanadha
14 * Date Name Description
15 * 24/7/2001 Ram Added support for EXT character handling
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
24 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_cb.h"
27 #include "unicode/uset.h"
30 #define UCNV_OPTIONS_VERSION_MASK 0xf
33 #define ZWNJ 0x200c /* Zero Width Non Joiner */
34 #define ZWJ 0x200d /* Zero width Joiner */
35 #define INVALID_CHAR 0xffff
36 #define ATR 0xEF /* Attribute code */
37 #define EXT 0xF0 /* Extension code */
39 #define DOUBLE_DANDA 0x0965
40 #define ISCII_NUKTA 0xE9
41 #define ISCII_HALANT 0xE8
42 #define ISCII_DANDA 0xEA
43 #define ISCII_INV 0xD9
44 #define INDIC_BLOCK_BEGIN 0x0900
45 #define INDIC_BLOCK_END 0x0D7F
46 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
47 #define VOCALLIC_RR 0x0931
49 #define ASCII_END 0xA0
50 #define NO_CHAR_MARKER 0xFFFE
51 #define TELUGU_DELTA DELTA * TELUGU
52 #define DEV_ABBR_SIGN 0x0970
53 #define DEV_ANUDATTA 0x0952
54 #define EXT_RANGE_BEGIN 0xA1
55 #define EXT_RANGE_END 0xEE
73 * Enumeration for switching code pages if <ATX>+<one of below values>
110 UChar contextCharToUnicode
; /* previous Unicode codepoint for contextual analysis */
111 UChar contextCharFromUnicode
; /* previous Unicode codepoint for contextual analysis */
112 uint16_t defDeltaToUnicode
; /* delta for switching to default state when DEF is encountered */
113 uint16_t currentDeltaFromUnicode
;/* current delta in Indic block */
114 uint16_t currentDeltaToUnicode
; /* current delta in Indic block */
115 MaskEnum currentMaskFromUnicode
; /* mask for current state in toUnicode */
116 MaskEnum currentMaskToUnicode
; /* mask for current state in toUnicode */
117 MaskEnum defMaskToUnicode
; /* mask for default state in toUnicode */
118 UBool isFirstBuffer
; /* boolean for fromUnicode to see if we need to announce the first script */
120 }UConverterDataISCII
;
122 static const uint16_t lookupInitialData
[][3]={
123 { DEVANAGARI
, DEV_MASK
, DEV
},
124 { BENGALI
, BNG_MASK
, BNG
},
125 { GURMUKHI
, PNJ_MASK
, PNJ
},
126 { GUJARATI
, GJR_MASK
, GJR
},
127 { ORIYA
, ORI_MASK
, ORI
},
128 { TAMIL
, TML_MASK
, TML
},
129 { TELUGU
, KND_MASK
, TLG
},
130 { KANNADA
, KND_MASK
, KND
},
131 { MALAYALAM
, MLM_MASK
, MLM
}
135 _ISCIIOpen(UConverter
*cnv
, const char *name
,const char *locale
,uint32_t options
, UErrorCode
*errorCode
){
136 cnv
->extraInfo
= uprv_malloc (sizeof (UConverterDataISCII
));
138 if(cnv
->extraInfo
!= NULL
) {
140 UConverterDataISCII
*converterData
=(UConverterDataISCII
*) cnv
->extraInfo
;
141 converterData
->contextCharToUnicode
=NO_CHAR_MARKER
;
142 cnv
->toUnicodeStatus
= missingCharMarker
;
143 converterData
->contextCharFromUnicode
=0x0000;
144 /* check if the version requested is supported */
145 if((options
& UCNV_OPTIONS_VERSION_MASK
) < 9){
146 /* initialize state variables */
147 converterData
->currentDeltaFromUnicode
=converterData
->currentDeltaToUnicode
=
148 converterData
->defDeltaToUnicode
=
149 (uint16_t)(lookupInitialData
[options
& UCNV_OPTIONS_VERSION_MASK
][0] * DELTA
);
151 converterData
->currentMaskFromUnicode
= converterData
->currentMaskToUnicode
=
152 converterData
->defMaskToUnicode
=lookupInitialData
[options
& UCNV_OPTIONS_VERSION_MASK
][1];
154 converterData
->isFirstBuffer
=TRUE
;
155 uprv_strcpy(converterData
->name
,"ISCII,version=");
156 len
= (int32_t)uprv_strlen(converterData
->name
);
157 converterData
->name
[len
]= (char)((options
& UCNV_OPTIONS_VERSION_MASK
) + '0');
158 converterData
->name
[len
+1]=0;
160 uprv_free(cnv
->extraInfo
);
161 cnv
->extraInfo
= NULL
;
162 *errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
166 *errorCode
=U_MEMORY_ALLOCATION_ERROR
;
170 _ISCIIClose(UConverter
*cnv
){
171 if(cnv
->extraInfo
!=NULL
) {
172 if(!cnv
->isExtraLocal
) {
173 uprv_free(cnv
->extraInfo
);
180 _ISCIIgetName(const UConverter
* cnv
){
182 UConverterDataISCII
* myData
= (UConverterDataISCII
*)cnv
->extraInfo
;
189 _ISCIIReset(UConverter
*cnv
, UConverterResetChoice choice
){
190 UConverterDataISCII
* data
=(UConverterDataISCII
*) (cnv
->extraInfo
);
191 if(choice
<=UCNV_RESET_TO_UNICODE
) {
192 cnv
->toUnicodeStatus
= missingCharMarker
;
194 data
->currentDeltaToUnicode
=data
->defDeltaToUnicode
;
195 data
->currentMaskToUnicode
= data
->defMaskToUnicode
;
196 data
->contextCharToUnicode
=NO_CHAR_MARKER
;
198 if(choice
!=UCNV_RESET_TO_UNICODE
) {
199 cnv
->fromUChar32
=0x0000;
200 data
->contextCharFromUnicode
=0x00;
201 data
->currentMaskFromUnicode
=data
->defDeltaToUnicode
;
202 data
->currentDeltaFromUnicode
=data
->defDeltaToUnicode
;
203 data
->isFirstBuffer
=TRUE
;
208 * The values in validity table are indexed by the lower bits of Unicode
209 * range 0x0900 - 0x09ff. The values have a structure like:
210 * ---------------------------------------------------------------
211 * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
212 * | | | | | ASM | KND | | |
213 * ---------------------------------------------------------------
214 * If a code point is valid in a particular script
215 * then that bit is turned on
217 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
218 * to represent these languages
220 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
221 * and combine and use 1 bit to represent these languages.
223 * TODO: It is probably easier to understand and maintain to change this
224 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
227 static const uint8_t validityTable
[128] = {
228 /* This state table is tool generated please donot edit unless you know exactly what you are doing */
229 /*ISCII:Valid:Unicode */
230 /*0xa0 : 0x00: 0x900 */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
231 /*0xa1 : 0xb8: 0x901 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
232 /*0xa2 : 0xfe: 0x902 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
233 /*0xa3 : 0xbf: 0x903 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
234 /*0x00 : 0x00: 0x904 */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
235 /*0xa4 : 0xff: 0x905 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
236 /*0xa5 : 0xff: 0x906 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
237 /*0xa6 : 0xff: 0x907 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
238 /*0xa7 : 0xff: 0x908 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
239 /*0xa8 : 0xff: 0x909 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
240 /*0xa9 : 0xff: 0x90a */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
241 /*0xaa : 0xfe: 0x90b */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
242 /*0x00 : 0x00: 0x90c */ DEV_MASK
+ ZERO
+ ZERO
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
243 /*0xae : 0x80: 0x90d */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
244 /*0xab : 0x87: 0x90e */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
245 /*0xac : 0xff: 0x90f */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
246 /*0xad : 0xff: 0x910 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
247 /*0xb2 : 0x80: 0x911 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
248 /*0xaf : 0x87: 0x912 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
249 /*0xb0 : 0xff: 0x913 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
250 /*0xb1 : 0xff: 0x914 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
251 /*0xb3 : 0xff: 0x915 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
252 /*0xb4 : 0xfe: 0x916 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
253 /*0xb5 : 0xfe: 0x917 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
254 /*0xb6 : 0xfe: 0x918 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
255 /*0xb7 : 0xff: 0x919 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
256 /*0xb8 : 0xff: 0x91a */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
257 /*0xb9 : 0xfe: 0x91b */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
258 /*0xba : 0xff: 0x91c */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
259 /*0xbb : 0xfe: 0x91d */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
260 /*0xbc : 0xff: 0x91e */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
261 /*0xbd : 0xff: 0x91f */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
262 /*0xbe : 0xfe: 0x920 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
263 /*0xbf : 0xfe: 0x921 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
264 /*0xc0 : 0xfe: 0x922 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
265 /*0xc1 : 0xff: 0x923 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
266 /*0xc2 : 0xff: 0x924 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
267 /*0xc3 : 0xfe: 0x925 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
268 /*0xc4 : 0xfe: 0x926 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
269 /*0xc5 : 0xfe: 0x927 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
270 /*0xc6 : 0xff: 0x928 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
271 /*0xc7 : 0x81: 0x929 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ TML_MASK
,
272 /*0xc8 : 0xff: 0x92a */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
273 /*0xc9 : 0xfe: 0x92b */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
274 /*0xca : 0xfe: 0x92c */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
275 /*0xcb : 0xfe: 0x92d */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
276 /*0xcc : 0xfe: 0x92e */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
277 /*0xcd : 0xff: 0x92f */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
278 /*0xcf : 0xff: 0x930 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
279 /*0xd0 : 0x87: 0x931 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ MLM_MASK
+ TML_MASK
,
280 /*0xd1 : 0xff: 0x932 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
281 /*0xd2 : 0xb7: 0x933 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ ZERO
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
282 /*0xd3 : 0x83: 0x934 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ MLM_MASK
+ TML_MASK
,
283 /*0xd4 : 0xff: 0x935 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
284 /*0xd5 : 0xfe: 0x936 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
285 /*0xd6 : 0xbf: 0x937 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
286 /*0xd7 : 0xff: 0x938 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
287 /*0xd8 : 0xff: 0x939 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
288 /*0x00 : 0x00: 0x93A */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
289 /*0x00 : 0x00: 0x93B */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
290 /*0xe9 : 0xda: 0x93c */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ MLM_MASK
+ ZERO
,
291 /*0x00 : 0x00: 0x93d */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
292 /*0xda : 0xff: 0x93e */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
293 /*0xdb : 0xff: 0x93f */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
294 /*0xdc : 0xff: 0x940 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
295 /*0xdd : 0xff: 0x941 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
296 /*0xde : 0xff: 0x942 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
297 /*0xdf : 0xbe: 0x943 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
298 /*0x00 : 0x00: 0x944 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
299 /*0xe3 : 0x80: 0x945 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
300 /*0xe0 : 0x87: 0x946 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
301 /*0xe1 : 0xff: 0x947 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
302 /*0xe2 : 0xff: 0x948 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
303 /*0xe7 : 0x80: 0x949 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
304 /*0xe4 : 0x87: 0x94a */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
305 /*0xe5 : 0xff: 0x94b */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
306 /*0xe6 : 0xff: 0x94c */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
307 /*0xe8 : 0xff: 0x94d */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
308 /*0xec : 0x00: 0x94e */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
309 /*0xed : 0x00: 0x94f */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
310 /*0x00 : 0x00: 0x950 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
311 /*0x00 : 0x00: 0x951 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
312 /*0x00 : 0x00: 0x952 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
313 /*0x00 : 0x00: 0x953 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
314 /*0x00 : 0x00: 0x954 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
315 /*0x00 : 0x00: 0x955 */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ KND_MASK
+ ZERO
+ ZERO
,
316 /*0x00 : 0x00: 0x956 */ ZERO
+ ZERO
+ ZERO
+ ORI_MASK
+ ZERO
+ KND_MASK
+ ZERO
+ ZERO
,
317 /*0x00 : 0x00: 0x957 */ ZERO
+ ZERO
+ ZERO
+ ORI_MASK
+ ZERO
+ ZERO
+ MLM_MASK
+ ZERO
,
318 /*0x00 : 0x00: 0x958 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
319 /*0x00 : 0x00: 0x959 */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
320 /*0x00 : 0x00: 0x95a */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
321 /*0x00 : 0x00: 0x95b */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ORI_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
322 /*0x00 : 0x00: 0x95c */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ZERO
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
323 /*0x00 : 0x00: 0x95d */ DEV_MASK
+ ZERO
+ ZERO
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
324 /*0x00 : 0x00: 0x95e */ DEV_MASK
+ PNJ_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
325 /*0xce : 0x98: 0x95f */ DEV_MASK
+ ZERO
+ ZERO
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
326 /*0x00 : 0x00: 0x960 */ DEV_MASK
+ ZERO
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
327 /*0x00 : 0x00: 0x961 */ DEV_MASK
+ ZERO
+ ZERO
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ ZERO
,
328 /*0x00 : 0x00: 0x962 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
329 /*0x00 : 0x00: 0x963 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
330 /*0xea : 0xf8: 0x964 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
331 /*0xeaea : 0x00: 0x965*/ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ ZERO
+ ZERO
+ ZERO
,
332 /*0xf1 : 0xff: 0x966 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
333 /*0xf2 : 0xff: 0x967 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
334 /*0xf3 : 0xff: 0x968 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
335 /*0xf4 : 0xff: 0x969 */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
336 /*0xf5 : 0xff: 0x96a */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
337 /*0xf6 : 0xff: 0x96b */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
338 /*0xf7 : 0xff: 0x96c */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
339 /*0xf8 : 0xff: 0x96d */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
340 /*0xf9 : 0xff: 0x96e */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
341 /*0xfa : 0xff: 0x96f */ DEV_MASK
+ PNJ_MASK
+ GJR_MASK
+ ORI_MASK
+ BNG_MASK
+ KND_MASK
+ MLM_MASK
+ TML_MASK
,
342 /*0x00 : 0x80: 0x970 */ DEV_MASK
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
,
345 * The length of the array is 128 to provide values for 0x900..0x97f.
346 * The last 15 entries for 0x971..0x97f of the validity table are all zero
347 * because no Indic script uses such Unicode code points.
349 /*0x00 : 0x00: 0x9yz */ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
+ ZERO
352 static const uint16_t fromUnicodeTable
[128]={
433 0xA1E9 ,/* 0x0950 */ /* OM Symbol */
482 static const uint16_t toUnicodeTable
[256]={
741 static const uint16_t nuktaSpecialCases
[][2]={
742 { 16 /*length of array*/ , 0 },
760 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \
761 /* write the targetUniChar to target */ \
762 if(target <targetLimit){ \
763 if(targetByteUnit <= 0xFF){ \
764 *(target)++ = (uint8_t)(targetByteUnit); \
766 *(offsets++) = (int32_t)(source - args->source-1); \
769 *(target)++ = (uint8_t)(targetByteUnit>>8); \
771 *(offsets++) = (int32_t)(source - args->source-1); \
773 if(target < targetLimit){ \
774 *(target)++ = (uint8_t) targetByteUnit; \
776 *(offsets++) = (int32_t)(source - args->source-1); \
779 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
780 (uint8_t) (targetByteUnit); \
781 *err = U_BUFFER_OVERFLOW_ERROR; \
785 if(targetByteUnit & 0xFF00){ \
786 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
787 (uint8_t) (targetByteUnit >>8); \
789 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
790 (uint8_t) (targetByteUnit); \
791 *err = U_BUFFER_OVERFLOW_ERROR; \
803 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs
* args
,
805 const UChar
*source
= args
->source
;
806 const UChar
*sourceLimit
= args
->sourceLimit
;
807 unsigned char *target
= (unsigned char *) args
->target
;
808 unsigned char *targetLimit
= (unsigned char *) args
->targetLimit
;
809 int32_t* offsets
= args
->offsets
;
810 uint32_t targetByteUnit
= 0x0000;
811 UChar32 sourceChar
= 0x0000;
813 UConverterDataISCII
*converterData
;
816 UBool deltaChanged
= FALSE
;
818 if ((args
->converter
== NULL
) || (args
->targetLimit
< args
->target
) || (args
->sourceLimit
< args
->source
)){
819 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
822 /* initialize data */
823 converterData
=(UConverterDataISCII
*)args
->converter
->extraInfo
;
824 useFallback
= args
->converter
->useFallback
;
825 newDelta
=converterData
->currentDeltaFromUnicode
;
826 range
= (uint16_t)(newDelta
/DELTA
);
828 if((sourceChar
= args
->converter
->fromUChar32
)!=0) {
832 /*writing the char to the output stream */
833 while(source
< sourceLimit
){
835 targetByteUnit
= missingCharMarker
;
837 sourceChar
= *source
++;
839 /*check if input is in ASCII and C0 control codes range*/
840 if (sourceChar
<= ASCII_END
) {
841 WRITE_TO_TARGET_FROM_U(args
,offsets
,source
,target
,targetLimit
,sourceChar
,err
);
845 if(sourceChar
== LF
){
846 targetByteUnit
= ATR
<<8;
847 targetByteUnit
+= (uint8_t) lookupInitialData
[range
][2];
848 args
->converter
->fromUnicodeStatus
=sourceChar
;
849 /* now append ATR and language code */
850 WRITE_TO_TARGET_FROM_U(args
,offsets
,source
,target
,targetLimit
,targetByteUnit
,err
);
859 /* contextChar has HALANT */
860 if(converterData
->contextCharFromUnicode
){
861 converterData
->contextCharFromUnicode
= 0x00;
862 targetByteUnit
= ISCII_HALANT
;
864 /* consume ZWNJ and continue */
865 converterData
->contextCharFromUnicode
= 0x00;
870 /* contextChar has HALANT */
871 if(converterData
->contextCharFromUnicode
){
872 targetByteUnit
= ISCII_NUKTA
;
874 targetByteUnit
=ISCII_INV
;
876 converterData
->contextCharFromUnicode
= 0x00;
879 /* is the sourceChar in the INDIC_RANGE? */
880 if((uint16_t)(INDIC_BLOCK_END
-sourceChar
) <= INDIC_RANGE
){
881 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
882 * does not include these codepoints in all Northern scrips we need to
885 if(sourceChar
!= DANDA
&& sourceChar
!= DOUBLE_DANDA
){
886 /* find out to which block the souceChar belongs*/
887 range
=(uint16_t)((sourceChar
-INDIC_BLOCK_BEGIN
)/DELTA
);
888 newDelta
=(uint16_t)(range
*DELTA
);
890 /* Now are we in the same block as the previous? */
891 if(newDelta
!= converterData
->currentDeltaFromUnicode
|| converterData
->isFirstBuffer
){
892 converterData
->currentDeltaFromUnicode
= newDelta
;
893 converterData
->currentMaskFromUnicode
= lookupInitialData
[range
][1];
895 converterData
->isFirstBuffer
=FALSE
;
897 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
898 /* now subtract the new delta from sourceChar*/
899 sourceChar
-= converterData
->currentDeltaFromUnicode
;
902 /* get the target byte unit */
903 targetByteUnit
=fromUnicodeTable
[(uint8_t)sourceChar
];
905 /* is the code point valid in current script? */
906 if((validityTable
[(uint8_t)sourceChar
] & converterData
->currentMaskFromUnicode
)==0){
907 /* Vocallic RR is assigne in ISCII Telugu and Unicode */
908 if(converterData
->currentDeltaFromUnicode
!=(TELUGU_DELTA
) && sourceChar
!=VOCALLIC_RR
){
909 targetByteUnit
=missingCharMarker
;
914 /* we are in a script block which is different than
915 * previous sourceChar's script block write ATR and language codes
918 temp
=(uint16_t)(ATR
<<8);
919 temp
+= (uint16_t)((uint8_t) lookupInitialData
[range
][2]);
922 /* now append ATR and language code */
923 WRITE_TO_TARGET_FROM_U(args
,offsets
,source
,target
,targetLimit
,temp
,err
);
929 /* reset context char */
930 converterData
->contextCharFromUnicode
= 0x00;
935 if(targetByteUnit
!= missingCharMarker
){
936 if(targetByteUnit
==ISCII_HALANT
){
937 converterData
->contextCharFromUnicode
= (UChar
)targetByteUnit
;
939 /* write targetByteUnit to target*/
940 WRITE_TO_TARGET_FROM_U(args
,offsets
,source
,target
,targetLimit
,targetByteUnit
,err
);
946 /* oops.. the code point is unassigned */
947 /*check if the char is a First surrogate*/
948 if(UTF_IS_SURROGATE(sourceChar
)) {
949 if(UTF_IS_SURROGATE_FIRST(sourceChar
)) {
951 /*look ahead to find the trail surrogate*/
952 if(source
< sourceLimit
) {
953 /* test the following code unit */
954 UChar trail
= (*source
);
955 if(UTF_IS_SECOND_SURROGATE(trail
)) {
957 sourceChar
=UTF16_GET_PAIR_VALUE(sourceChar
, trail
);
958 *err
=U_INVALID_CHAR_FOUND
;
959 /* convert this surrogate code point */
960 /* exit this condition tree */
962 /* this is an unmatched lead code unit (1st surrogate) */
963 /* callback(illegal) */
964 *err
=U_ILLEGAL_CHAR_FOUND
;
971 /* this is an unmatched trail code unit (2nd surrogate) */
972 /* callback(illegal) */
973 *err
=U_ILLEGAL_CHAR_FOUND
;
976 /* callback(unassigned) for a BMP code point */
977 *err
= U_INVALID_CHAR_FOUND
;
980 args
->converter
->fromUChar32
=sourceChar
;
983 }/* end while(mySourceIndex<mySourceLength) */
985 /*save the state and return */
986 args
->source
= source
;
987 args
->target
= (char*)target
;
990 static const int32_t lookupTable
[][2]={
991 { ZERO
, ZERO
}, /*DEFALT*/
992 { ZERO
, ZERO
}, /*ROMAN*/
993 { DEVANAGARI
, DEV_MASK
},
994 { BENGALI
, BNG_MASK
},
996 { TELUGU
, KND_MASK
},
997 { BENGALI
, BNG_MASK
},
999 { KANNADA
, KND_MASK
},
1000 { GUJARATI
, GJR_MASK
},
1001 { GURMUKHI
, PNJ_MASK
},
1004 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1005 /* add offset to current Indic Block */ \
1006 if(targetUniChar>ASCII_END && \
1007 targetUniChar != ZWJ && \
1008 targetUniChar != ZWNJ && \
1009 targetUniChar != DANDA && \
1010 targetUniChar != DOUBLE_DANDA){ \
1012 targetUniChar+=(uint16_t)(delta); \
1014 /* now write the targetUniChar */ \
1015 if(target<args->targetLimit){ \
1016 *(target)++ = (UChar)targetUniChar; \
1018 *(offsets)++ = (int32_t)(offset); \
1021 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
1022 (UChar)targetUniChar; \
1023 *err = U_BUFFER_OVERFLOW_ERROR; \
1027 #define GET_MAPPING(sourceChar,targetUniChar,data){ \
1028 targetUniChar = toUnicodeTable[(sourceChar)] ; \
1029 /* is the code point valid in current script? */ \
1030 if(sourceChar> ASCII_END && \
1031 (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){ \
1032 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
1033 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) && \
1034 targetUniChar!=VOCALLIC_RR){ \
1035 targetUniChar=missingCharMarker; \
1041 * Rules for ISCII to Unicode converter
1042 * ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1043 * which has both precomposed and decomposed forms characters
1044 * pre-context and post-context need to be considered.
1047 * i) ATR : Attribute code is used to declare the font and script switching.
1048 * Currently we only switch scripts and font codes consumed without generating an error
1049 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1050 * obsolete characters
1052 * i) Halant: if preceeded by a halant then it is a explicit halant
1054 * a) if preceeded by a halant then it is a soft halant
1055 * b) if preceeded by specific consonants and the ligatures have pre-composed
1056 * characters in Unicode then convert to pre-composed characters
1057 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1062 UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs
*args
,
1064 const char *source
= ( char *) args
->source
;
1065 UChar
*target
= args
->target
;
1066 const char *sourceLimit
= args
->sourceLimit
;
1067 const UChar
* targetLimit
= args
->targetLimit
;
1068 uint32_t targetUniChar
= 0x0000;
1069 uint8_t sourceChar
= 0x0000;
1070 UConverterDataISCII
* data
;
1071 UChar32
* toUnicodeStatus
=NULL
;
1072 UChar
* contextCharToUnicode
= NULL
;
1074 if ((args
->converter
== NULL
) || (target
< args
->target
) || (source
< args
->source
)){
1075 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
1079 data
= (UConverterDataISCII
*)(args
->converter
->extraInfo
);
1080 contextCharToUnicode
= &data
->contextCharToUnicode
; /* contains previous ISCII codepoint visited */
1081 toUnicodeStatus
= (UChar32
*)&args
->converter
->toUnicodeStatus
;/* contains the mapping to Unicode of the above codepoint*/
1083 while(source
<sourceLimit
){
1085 targetUniChar
= missingCharMarker
;
1087 if(target
< targetLimit
){
1088 sourceChar
= (unsigned char)*(source
)++;
1090 /* look at the post-context preform special processing */
1091 if(*contextCharToUnicode
==ATR
){
1093 /* If we have ATR in *contextCharToUnicode then we need to change our
1094 * state to the Indic Script specified by sourceChar
1097 /* check if the sourceChar is supported script range*/
1098 if((uint8_t)(PNJ
-sourceChar
)<=PNJ
-DEV
){
1099 data
->currentDeltaToUnicode
=
1100 (uint16_t)(lookupTable
[sourceChar
& 0x0F][0] * DELTA
);
1101 data
->currentMaskToUnicode
=
1102 lookupTable
[sourceChar
& 0x0F][1] ;
1104 else if(sourceChar
==DEF
){
1105 /* switch back to default */
1106 data
->currentDeltaToUnicode
= data
->defDeltaToUnicode
;
1107 data
->currentMaskToUnicode
= data
->defMaskToUnicode
;
1109 if((sourceChar
>= 0x21 && sourceChar
<= 0x3F)){
1110 /* these are display codes consume and continue */
1112 *err
=U_ILLEGAL_CHAR_FOUND
;
1114 *contextCharToUnicode
=NO_CHAR_MARKER
;
1120 *contextCharToUnicode
=NO_CHAR_MARKER
;
1124 }else if(*contextCharToUnicode
==EXT
){
1125 /* check if sourceChar is in 0xA1-0xEE range */
1126 if((uint8_t) (EXT_RANGE_END
- sourceChar
) <= (EXT_RANGE_END
- EXT_RANGE_BEGIN
)){
1127 /* We currently support only Anudatta and Devanagari abbreviation sign */
1128 if(sourceChar
==0xBF || sourceChar
== 0xB8){
1129 targetUniChar
= (sourceChar
==0xBF) ? DEV_ABBR_SIGN
: DEV_ANUDATTA
;
1131 /* find out if the mapping is valid in this state */
1132 if(validityTable
[(uint8_t)targetUniChar
] & data
->currentMaskToUnicode
){
1134 *contextCharToUnicode
= NO_CHAR_MARKER
;
1136 /* write to target */
1137 WRITE_TO_TARGET_TO_U(args
,source
,target
,args
->offsets
,(source
-args
->source
-2),
1138 targetUniChar
,data
->currentDeltaToUnicode
,err
);
1143 /* byte unit is unassigned */
1144 targetUniChar
= missingCharMarker
;
1145 *err
= U_INVALID_CHAR_FOUND
;
1147 /* only 0xA1 - 0xEE are legal after EXT char */
1148 *contextCharToUnicode
= NO_CHAR_MARKER
;
1149 *err
= U_ILLEGAL_CHAR_FOUND
;
1152 }else if(*contextCharToUnicode
==ISCII_INV
){
1153 if(sourceChar
==ISCII_HALANT
){
1154 targetUniChar
= 0x0020; /* replace with space accoding to Indic FAQ */
1156 targetUniChar
= ZWJ
;
1159 /* write to target */
1160 WRITE_TO_TARGET_TO_U(args
,source
,target
,args
->offsets
,(source
-args
->source
-2),
1161 targetUniChar
,data
->currentDeltaToUnicode
,err
);
1163 *contextCharToUnicode
=NO_CHAR_MARKER
;
1166 /* look at the pre-context and perform special processing */
1169 case EXT
: /*falls through*/
1171 *contextCharToUnicode
= (UChar
)sourceChar
;
1173 if(*toUnicodeStatus
!= missingCharMarker
){
1175 WRITE_TO_TARGET_TO_U(args
,source
,target
,args
->offsets
,(source
-args
->source
-2),
1176 *toUnicodeStatus
,data
->currentDeltaToUnicode
,err
);
1177 *toUnicodeStatus
= missingCharMarker
;
1181 /* handle double danda*/
1182 if(*contextCharToUnicode
== ISCII_DANDA
){
1183 targetUniChar
= DOUBLE_DANDA
;
1184 /* clear the context */
1185 *contextCharToUnicode
= NO_CHAR_MARKER
;
1186 *toUnicodeStatus
= missingCharMarker
;
1188 GET_MAPPING(sourceChar
,targetUniChar
,data
);
1189 *contextCharToUnicode
= sourceChar
;
1193 /* handle explicit halant */
1194 if(*contextCharToUnicode
== ISCII_HALANT
){
1195 targetUniChar
= ZWNJ
;
1196 /* clear the context */
1197 *contextCharToUnicode
= NO_CHAR_MARKER
;
1199 GET_MAPPING(sourceChar
,targetUniChar
,data
);
1200 *contextCharToUnicode
= sourceChar
;
1204 /* handle soft halant */
1205 if(*contextCharToUnicode
== ISCII_HALANT
){
1206 targetUniChar
= ZWJ
;
1207 /* clear the context */
1208 *contextCharToUnicode
= NO_CHAR_MARKER
;
1211 /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1214 for( ;i
<nuktaSpecialCases
[0][0];i
++){
1215 if(nuktaSpecialCases
[i
][0]==(uint8_t)*contextCharToUnicode
){
1216 targetUniChar
=nuktaSpecialCases
[i
][1];
1222 /* find out if the mapping is valid in this state */
1223 if(validityTable
[(uint8_t)targetUniChar
] & data
->currentMaskToUnicode
){
1224 targetUniChar
+= data
->currentDeltaToUnicode
;
1225 *contextCharToUnicode
= NO_CHAR_MARKER
;
1226 *toUnicodeStatus
= missingCharMarker
;
1229 /* else fall through to default */
1231 /* else fall through to default */
1234 GET_MAPPING(sourceChar
,targetUniChar
,data
);
1235 *contextCharToUnicode
= sourceChar
;
1240 if(*toUnicodeStatus
!= missingCharMarker
){
1241 /* write the previously mapped codepoint */
1242 WRITE_TO_TARGET_TO_U(args
,source
,target
,args
->offsets
,(source
-args
->source
-2),
1243 *toUnicodeStatus
,data
->currentDeltaToUnicode
,err
);
1244 *toUnicodeStatus
= missingCharMarker
;
1248 if(targetUniChar
!= missingCharMarker
){
1249 /* now save the targetUniChar for delayed write */
1250 *toUnicodeStatus
= (UChar
) targetUniChar
;
1253 /* we reach here only if targetUniChar == missingCharMarker
1254 * so assign codes to reason and err
1256 *err
= U_INVALID_CHAR_FOUND
;
1258 args
->converter
->toUBytes
[0] = (uint8_t) sourceChar
;
1259 args
->converter
->toULength
= 1;
1265 *err
=U_BUFFER_OVERFLOW_ERROR
;
1270 if(U_SUCCESS(*err
) && args
->flush
&& source
== sourceLimit
) {
1271 /* end of the input stream */
1272 UConverter
*cnv
= args
->converter
;
1274 if(*contextCharToUnicode
==ATR
|| *contextCharToUnicode
==EXT
|| *contextCharToUnicode
==ISCII_INV
){
1275 /* set toUBytes[] */
1276 cnv
->toUBytes
[0] = (uint8_t)*contextCharToUnicode
;
1279 /* avoid looping on truncated sequences */
1280 *contextCharToUnicode
= NO_CHAR_MARKER
;
1285 if(*toUnicodeStatus
!= missingCharMarker
) {
1286 /* output a remaining target character */
1287 WRITE_TO_TARGET_TO_U(args
,source
,target
,args
->offsets
,(source
- args
->source
-1),
1288 *toUnicodeStatus
,data
->currentDeltaToUnicode
,err
);
1289 *toUnicodeStatus
= missingCharMarker
;
1293 args
->target
= target
;
1294 args
->source
= source
;
1297 /* structure for SafeClone calculations */
1298 struct cloneISCIIStruct
1301 UConverterDataISCII mydata
;
1306 _ISCII_SafeClone(const UConverter
*cnv
,
1308 int32_t *pBufferSize
,
1311 struct cloneISCIIStruct
* localClone
;
1312 int32_t bufferSizeNeeded
= sizeof(struct cloneISCIIStruct
);
1314 if (U_FAILURE(*status
)){
1318 if (*pBufferSize
== 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1319 *pBufferSize
= bufferSizeNeeded
;
1323 localClone
= (struct cloneISCIIStruct
*)stackBuffer
;
1324 uprv_memcpy(&localClone
->cnv
, cnv
, sizeof(UConverter
));
1326 uprv_memcpy(&localClone
->mydata
, cnv
->extraInfo
, sizeof(UConverterDataISCII
));
1327 localClone
->cnv
.extraInfo
= &localClone
->mydata
;
1328 localClone
->cnv
.isExtraLocal
= TRUE
;
1330 return &localClone
->cnv
;
1334 _ISCIIGetUnicodeSet(const UConverter
*cnv
,
1336 UConverterUnicodeSet which
,
1337 UErrorCode
*pErrorCode
)
1339 int32_t idx
, script
;
1342 /* Since all ISCII versions allow switching to other ISCII
1343 scripts, we add all roundtrippable characters to this set. */
1344 sa
->addRange(sa
->set
, 0, ASCII_END
);
1345 for (script
= DEVANAGARI
; script
<= MALAYALAM
; script
++) {
1346 mask
= (uint8_t)(lookupInitialData
[script
][1]);
1347 for (idx
= 0; idx
< DELTA
; idx
++) {
1348 if (validityTable
[idx
] & mask
) {
1349 sa
->add(sa
->set
, idx
+ (script
* DELTA
) + INDIC_BLOCK_BEGIN
);
1353 sa
->add(sa
->set
, DANDA
);
1354 sa
->add(sa
->set
, DOUBLE_DANDA
);
1355 sa
->add(sa
->set
, ZWNJ
);
1356 sa
->add(sa
->set
, ZWJ
);
1359 static const UConverterImpl _ISCIIImpl
={
1370 UConverter_toUnicode_ISCII_OFFSETS_LOGIC
,
1371 UConverter_toUnicode_ISCII_OFFSETS_LOGIC
,
1372 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC
,
1373 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC
,
1383 static const UConverterStaticData _ISCIIStaticData
={
1384 sizeof(UConverterStaticData
),
1397 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1401 const UConverterSharedData _ISCIIData
={
1402 sizeof(UConverterSharedData
),
1412 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */