]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidi_props.c
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / common / ubidi_props.c
CommitLineData
73c04bcf
A
1/*
2*******************************************************************************
3*
46f4442e 4* Copyright (C) 2004-2008, International Business Machines
73c04bcf
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ubidi_props.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2004dec30
14* created by: Markus W. Scherer
15*
16* Low-level Unicode bidi/shaping properties access.
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/uset.h"
21#include "unicode/udata.h" /* UDataInfo */
22#include "ucmndata.h" /* DataHeader */
23#include "udatamem.h"
24#include "umutex.h"
25#include "uassert.h"
26#include "cmemory.h"
27#include "utrie.h"
28#include "ubidi_props.h"
29#include "ucln_cmn.h"
30
31struct UBiDiProps {
32 UDataMemory *mem;
33 const int32_t *indexes;
34 const uint32_t *mirrors;
35 const uint8_t *jgArray;
36
37 UTrie trie;
38 uint8_t formatVersion[4];
39};
40
41/* data loading etc. -------------------------------------------------------- */
42
73c04bcf
A
43#if UBIDI_HARDCODE_DATA
44
45/* ubidi_props_data.c is machine-generated by genbidi --csource */
46#include "ubidi_props_data.c"
47
48#else
49
50static UBool U_CALLCONV
51isAcceptable(void *context,
52 const char *type, const char *name,
53 const UDataInfo *pInfo) {
54 if(
55 pInfo->size>=20 &&
56 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
57 pInfo->charsetFamily==U_CHARSET_FAMILY &&
58 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
59 pInfo->dataFormat[1]==UBIDI_FMT_1 &&
60 pInfo->dataFormat[2]==UBIDI_FMT_2 &&
61 pInfo->dataFormat[3]==UBIDI_FMT_3 &&
62 pInfo->formatVersion[0]==1 &&
63 pInfo->formatVersion[2]==UTRIE_SHIFT &&
64 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
65 ) {
66 UBiDiProps *bdp=(UBiDiProps *)context;
67 uprv_memcpy(bdp->formatVersion, pInfo->formatVersion, 4);
68 return TRUE;
69 } else {
70 return FALSE;
71 }
72}
73
74static UBiDiProps *
75ubidi_openData(UBiDiProps *bdpProto,
76 const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
77 UBiDiProps *bdp;
78 int32_t size;
79
80 bdpProto->indexes=(const int32_t *)bin;
81 if( (length>=0 && length<16*4) ||
82 bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16
83 ) {
84 /* length or indexes[] too short for minimum indexes[] length of 16 */
85 *pErrorCode=U_INVALID_FORMAT_ERROR;
86 return NULL;
87 }
88 size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4;
89 if(length>=0) {
90 if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) {
91 length-=size;
92 } else {
93 /* length too short for indexes[] or for the whole data length */
94 *pErrorCode=U_INVALID_FORMAT_ERROR;
95 return NULL;
96 }
97 }
98 bin+=size;
99 /* from here on, assume that the sizes of the items fit into the total length */
100
101 /* unserialize the trie, after indexes[] */
102 size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE];
103 utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode);
104 if(U_FAILURE(*pErrorCode)) {
105 return NULL;
106 }
107 bin+=size;
108
109 /* get mirrors[] */
110 size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH];
111 bdpProto->mirrors=(const uint32_t *)bin;
112 bin+=size;
113
114 /* get jgArray[] */
115 size=bdpProto->indexes[UBIDI_IX_JG_LIMIT]-bdpProto->indexes[UBIDI_IX_JG_START];
116 bdpProto->jgArray=bin;
117 bin+=size;
118
119 /* allocate, copy, and return the new UBiDiProps */
120 bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps));
121 if(bdp==NULL) {
122 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
123 return NULL;
124 } else {
125 uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps));
126 return bdp;
127 }
128}
129
46f4442e 130U_CFUNC UBiDiProps *
73c04bcf
A
131ubidi_openProps(UErrorCode *pErrorCode) {
132 UBiDiProps bdpProto={ NULL }, *bdp;
133
134 bdpProto.mem=udata_openChoice(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, isAcceptable, &bdpProto, pErrorCode);
135 if(U_FAILURE(*pErrorCode)) {
136 return NULL;
137 }
138
139 bdp=ubidi_openData(
140 &bdpProto,
141 udata_getMemory(bdpProto.mem),
142 udata_getLength(bdpProto.mem),
143 pErrorCode);
144 if(U_FAILURE(*pErrorCode)) {
145 udata_close(bdpProto.mem);
146 return NULL;
147 } else {
148 return bdp;
149 }
150}
151
46f4442e 152U_CFUNC UBiDiProps *
73c04bcf
A
153ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
154 UBiDiProps bdpProto={ NULL };
155 const DataHeader *hdr;
156
157 if(U_FAILURE(*pErrorCode)) {
158 return NULL;
159 }
160 if(bin==NULL) {
161 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
162 return NULL;
163 }
164
165 /* check the header */
166 if(length>=0 && length<20) {
167 *pErrorCode=U_INVALID_FORMAT_ERROR;
168 return NULL;
169 }
170 hdr=(const DataHeader *)bin;
171 if(
172 !(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 &&
173 hdr->info.isBigEndian==U_IS_BIG_ENDIAN &&
174 isAcceptable(&bdpProto, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &hdr->info))
175 ) {
176 *pErrorCode=U_INVALID_FORMAT_ERROR;
177 return NULL;
178 }
179
180 bin+=hdr->dataHeader.headerSize;
181 if(length>=0) {
182 length-=hdr->dataHeader.headerSize;
183 }
184 return ubidi_openData(&bdpProto, bin, length, pErrorCode);
185}
186
187#endif
188
46f4442e 189U_CFUNC void
73c04bcf
A
190ubidi_closeProps(UBiDiProps *bdp) {
191 if(bdp!=NULL) {
192#if !UBIDI_HARDCODE_DATA
193 udata_close(bdp->mem);
194#endif
195 uprv_free(bdp);
196 }
197}
198
199/* UBiDiProps singleton ----------------------------------------------------- */
200
73c04bcf 201#if !UBIDI_HARDCODE_DATA
46f4442e
A
202static UBiDiProps *gBdpDummy=NULL;
203static UBiDiProps *gBdp=NULL;
73c04bcf
A
204static UErrorCode gErrorCode=U_ZERO_ERROR;
205static int8_t gHaveData=0;
73c04bcf
A
206
207static UBool U_CALLCONV
208ubidi_cleanup(void) {
73c04bcf
A
209 ubidi_closeProps(gBdpDummy);
210 gBdpDummy=NULL;
46f4442e
A
211 ubidi_closeProps(gBdp);
212 gBdp=NULL;
73c04bcf
A
213 gErrorCode=U_ZERO_ERROR;
214 gHaveData=0;
73c04bcf
A
215 return TRUE;
216}
46f4442e 217#endif
73c04bcf 218
46f4442e 219U_CFUNC const UBiDiProps *
73c04bcf
A
220ubidi_getSingleton(UErrorCode *pErrorCode) {
221#if UBIDI_HARDCODE_DATA
222 if(U_FAILURE(*pErrorCode)) {
223 return NULL;
224 }
225 return &ubidi_props_singleton;
226#else
227 int8_t haveData;
228
229 if(U_FAILURE(*pErrorCode)) {
230 return NULL;
231 }
232
233 UMTX_CHECK(NULL, gHaveData, haveData);
234
235 if(haveData>0) {
236 /* data was loaded */
237 return gBdp;
238 } else if(haveData<0) {
239 /* data loading failed */
240 *pErrorCode=gErrorCode;
241 return NULL;
242 } else /* haveData==0 */ {
243 /* load the data */
244 UBiDiProps *bdp=ubidi_openProps(pErrorCode);
245 if(U_FAILURE(*pErrorCode)) {
246 gHaveData=-1;
247 gErrorCode=*pErrorCode;
248 return NULL;
249 }
250
251 /* set the static variables */
252 umtx_lock(NULL);
253 if(gBdp==NULL) {
254 gBdp=bdp;
255 bdp=NULL;
256 gHaveData=1;
257 ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
258 }
259 umtx_unlock(NULL);
260
261 ubidi_closeProps(bdp);
262 return gBdp;
263 }
264#endif
265}
266
46f4442e
A
267#if !UBIDI_HARDCODE_DATA
268U_CAPI const UBiDiProps *
73c04bcf
A
269ubidi_getDummy(UErrorCode *pErrorCode) {
270 UBiDiProps *bdp;
271
272 if(U_FAILURE(*pErrorCode)) {
273 return NULL;
274 }
275
276 UMTX_CHECK(NULL, gBdpDummy, bdp);
277
278 if(bdp!=NULL) {
279 /* the dummy object was already created */
280 return bdp;
281 } else /* bdp==NULL */ {
282 /* create the dummy object */
283 int32_t *indexes;
284
285 bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)+UBIDI_IX_TOP*4+UTRIE_DUMMY_SIZE);
286 if(bdp==NULL) {
287 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
288 return NULL;
289 }
290 uprv_memset(bdp, 0, sizeof(UBiDiProps)+UBIDI_IX_TOP*4);
291
292 bdp->indexes=indexes=(int32_t *)(bdp+1);
293 indexes[UBIDI_IX_INDEX_TOP]=UBIDI_IX_TOP;
294
295 indexes[UBIDI_IX_TRIE_SIZE]=
296 utrie_unserializeDummy(&bdp->trie, indexes+UBIDI_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode);
297 if(U_FAILURE(*pErrorCode)) {
298 uprv_free(bdp);
299 return NULL;
300 }
301
302 bdp->formatVersion[0]=1;
303 bdp->formatVersion[2]=UTRIE_SHIFT;
304 bdp->formatVersion[3]=UTRIE_INDEX_SHIFT;
305
306 /* set the static variables */
307 umtx_lock(NULL);
308 if(gBdpDummy==NULL) {
309 gBdpDummy=bdp;
310 bdp=NULL;
311 ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
312 }
313 umtx_unlock(NULL);
314
315 uprv_free(bdp);
316 return gBdpDummy;
317 }
318}
46f4442e 319#endif
73c04bcf
A
320
321/* set of property starts for UnicodeSet ------------------------------------ */
322
323static UBool U_CALLCONV
324_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
325 /* add the start code point to the USet */
326 const USetAdder *sa=(const USetAdder *)context;
327 sa->add(sa->set, start);
328 return TRUE;
329}
330
46f4442e 331U_CFUNC void
73c04bcf
A
332ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
333 int32_t i, length;
334 UChar32 c, start, limit;
335
336 const uint8_t *jgArray;
337 uint8_t prev, jg;
338
339 if(U_FAILURE(*pErrorCode)) {
340 return;
341 }
342
343 /* add the start code point of each same-value range of the trie */
344 utrie_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
345
346 /* add the code points from the bidi mirroring table */
347 length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
348 for(i=0; i<length; ++i) {
349 c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
350 sa->addRange(sa->set, c, c+1);
351 }
352
353 /* add the code points from the Joining_Group array where the value changes */
354 start=bdp->indexes[UBIDI_IX_JG_START];
355 limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
356 jgArray=bdp->jgArray;
357 prev=0;
358 while(start<limit) {
359 jg=*jgArray++;
360 if(jg!=prev) {
361 sa->add(sa->set, start);
362 prev=jg;
363 }
364 ++start;
365 }
366 if(prev!=0) {
367 /* add the limit code point if the last value was not 0 (it is now start==limit) */
368 sa->add(sa->set, limit);
369 }
370
371 /* add code points with hardcoded properties, plus the ones following them */
372
373 /* (none right now) */
374}
375
376/* data access primitives --------------------------------------------------- */
377
378/* UTRIE_GET16() itself validates c */
379#define GET_PROPS(bdp, c, result) \
380 UTRIE_GET16(&(bdp)->trie, c, result);
381
382/* property access functions ------------------------------------------------ */
383
384U_CFUNC int32_t
385ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
386 int32_t max;
387
388 if(bdp==NULL) {
389 return -1;
390 }
391
392 max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
393 switch(which) {
394 case UCHAR_BIDI_CLASS:
395 return (max&UBIDI_CLASS_MASK);
396 case UCHAR_JOINING_GROUP:
397 return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT;
398 case UCHAR_JOINING_TYPE:
399 return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT;
400 default:
401 return -1; /* undefined */
402 }
403}
404
46f4442e 405U_CAPI UCharDirection
73c04bcf
A
406ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
407 uint32_t props;
408 GET_PROPS(bdp, c, props);
409 return (UCharDirection)UBIDI_GET_CLASS(props);
410}
411
46f4442e 412U_CFUNC UBool
73c04bcf
A
413ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
414 uint32_t props;
415 GET_PROPS(bdp, c, props);
416 return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
417}
418
46f4442e 419U_CFUNC UChar32
73c04bcf
A
420ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
421 uint32_t props;
422 int32_t delta;
423
424 GET_PROPS(bdp, c, props);
425 delta=((int16_t)props)>>UBIDI_MIRROR_DELTA_SHIFT;
426 if(delta!=UBIDI_ESC_MIRROR_DELTA) {
427 return c+delta;
428 } else {
429 /* look for mirror code point in the mirrors[] table */
430 const uint32_t *mirrors;
431 uint32_t m;
432 int32_t i, length;
433 UChar32 c2;
434
435 mirrors=bdp->mirrors;
436 length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
437
438 /* linear search */
439 for(i=0; i<length; ++i) {
440 m=mirrors[i];
441 c2=UBIDI_GET_MIRROR_CODE_POINT(m);
442 if(c==c2) {
443 /* found c, return its mirror code point using the index in m */
444 return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]);
445 } else if(c<c2) {
446 break;
447 }
448 }
449
450 /* c not found, return it itself */
451 return c;
452 }
453}
454
46f4442e 455U_CFUNC UBool
73c04bcf
A
456ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
457 uint32_t props;
458 GET_PROPS(bdp, c, props);
459 return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
460}
461
46f4442e 462U_CFUNC UBool
73c04bcf
A
463ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
464 uint32_t props;
465 GET_PROPS(bdp, c, props);
466 return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
467}
468
46f4442e 469U_CFUNC UJoiningType
73c04bcf
A
470ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
471 uint32_t props;
472 GET_PROPS(bdp, c, props);
473 return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
474}
475
46f4442e 476U_CFUNC UJoiningGroup
73c04bcf
A
477ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
478 UChar32 start, limit;
479
480 start=bdp->indexes[UBIDI_IX_JG_START];
481 limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
482 if(start<=c && c<limit) {
483 return (UJoiningGroup)bdp->jgArray[c-start];
484 } else {
485 return U_JG_NO_JOINING_GROUP;
486 }
487}
488
489/* public API (see uchar.h) ------------------------------------------------- */
490
46f4442e 491U_CFUNC UCharDirection
73c04bcf
A
492u_charDirection(UChar32 c) {
493 UErrorCode errorCode=U_ZERO_ERROR;
494 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
495 if(bdp!=NULL) {
496 return ubidi_getClass(bdp, c);
497 } else {
498 return U_LEFT_TO_RIGHT;
499 }
500}
501
46f4442e 502U_CFUNC UBool
73c04bcf
A
503u_isMirrored(UChar32 c) {
504 UErrorCode errorCode=U_ZERO_ERROR;
505 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
506 return (UBool)(bdp!=NULL && ubidi_isMirrored(bdp, c));
507}
508
46f4442e 509U_CFUNC UChar32
73c04bcf
A
510u_charMirror(UChar32 c) {
511 UErrorCode errorCode=U_ZERO_ERROR;
512 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
513 if(bdp!=NULL) {
514 return ubidi_getMirror(bdp, c);
515 } else {
516 return c;
517 }
518}