]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucmndata.c
ICU-461.17.tar.gz
[apple/icu.git] / icuSources / common / ucmndata.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
729e4ab9 4* Copyright (C) 1999-2010, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************/
8
9
374ca955 10/*------------------------------------------------------------------------------
b75a7d8f
A
11 *
12 * UCommonData An abstract interface for dealing with ICU Common Data Files.
13 * ICU Common Data Files are a grouping of a number of individual
14 * data items (resources, converters, tables, anything) into a
15 * single file or dll. The combined format includes a table of
16 * contents for locating the individual items by name.
17 *
18 * Two formats for the table of contents are supported, which is
19 * why there is an abstract inteface involved.
20 *
374ca955
A
21 */
22
b75a7d8f
A
23#include "unicode/utypes.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "ucmndata.h"
27#include "udatamem.h"
28
374ca955
A
29#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
30# include <stdio.h>
31#endif
32
33U_CFUNC uint16_t
34udata_getHeaderSize(const DataHeader *udh) {
35 if(udh==NULL) {
36 return 0;
37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
38 /* same endianness */
39 return udh->dataHeader.headerSize;
40 } else {
41 /* opposite endianness */
42 uint16_t x=udh->dataHeader.headerSize;
43 return (uint16_t)((x<<8)|(x>>8));
44 }
45}
46
47U_CFUNC uint16_t
48udata_getInfoSize(const UDataInfo *info) {
49 if(info==NULL) {
50 return 0;
51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
52 /* same endianness */
53 return info->size;
54 } else {
55 /* opposite endianness */
56 uint16_t x=info->size;
57 return (uint16_t)((x<<8)|(x>>8));
58 }
59}
b75a7d8f 60
374ca955
A
61/*-----------------------------------------------------------------------------*
62 * *
63 * Pointer TOCs. TODO: This form of table-of-contents should be removed *
64 * because DLLs must be relocated on loading to correct the *
65 * pointer values and this operation makes shared memory *
66 * mapping of the data much less likely to work. *
67 * *
68 *-----------------------------------------------------------------------------*/
b75a7d8f
A
69typedef struct {
70 const char *entryName;
71 const DataHeader *pHeader;
72} PointerTOCEntry;
73
74
75typedef struct {
76 uint32_t count;
77 uint32_t reserved;
78 PointerTOCEntry entry[2]; /* Actual size is from count. */
79} PointerTOC;
80
81
374ca955 82/* definition of OffsetTOC struct types moved to ucmndata.h */
b75a7d8f 83
374ca955
A
84/*-----------------------------------------------------------------------------*
85 * *
86 * entry point lookup implementations *
87 * *
88 *-----------------------------------------------------------------------------*/
b75a7d8f
A
89static uint32_t offsetTOCEntryCount(const UDataMemory *pData) {
90 int32_t retVal=0;
374ca955 91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
b75a7d8f
A
92 if (toc != NULL) {
93 retVal = toc->count;
374ca955 94 }
b75a7d8f
A
95 return retVal;
96}
97
98
99static const DataHeader *
100offsetTOCLookupFn(const UDataMemory *pData,
101 const char *tocEntryName,
374ca955 102 int32_t *pLength,
b75a7d8f 103 UErrorCode *pErrorCode) {
374ca955 104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
b75a7d8f 105 if(toc!=NULL) {
73c04bcf 106 const char *base=(const char *)toc;
374ca955
A
107 uint32_t start, limit, number, lastNumber;
108 int32_t strResult;
109 const UDataOffsetTOCEntry *entry;
b75a7d8f
A
110
111 /* perform a binary search for the data in the common data's table of contents */
374ca955
A
112#if defined (UDATA_DEBUG_DUMP)
113 /* list the contents of the TOC each time .. not recommended */
114 for(start=0;start<toc->count;start++) {
115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffset]);
116 }
117#endif
118
b75a7d8f
A
119 start=0;
120 limit=toc->count; /* number of names in this table of contents */
374ca955
A
121 lastNumber=limit;
122 entry=toc->entry;
374ca955
A
123 for (;;) {
124 number = (start+limit)/2;
125 if (lastNumber == number) { /* Have we moved? */
73c04bcf
A
126 break; /* We haven't moved, and it wasn't found; */
127 /* or the empty stub common data library was used during build. */
374ca955
A
128 }
129 lastNumber = number;
130 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset);
131 if(strResult<0) {
b75a7d8f 132 limit=number;
374ca955 133 } else if (strResult>0) {
b75a7d8f
A
134 start=number;
135 }
374ca955
A
136 else {
137 /* found it */
b75a7d8f 138#ifdef UDATA_DEBUG
374ca955 139 fprintf(stderr, "%s: Found.\n", tocEntryName);
b75a7d8f 140#endif
374ca955
A
141 entry += number; /* Alias the entry to the current entry. */
142 if((number+1) < toc->count) {
143 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
144 } else {
145 *pLength = -1;
146 }
147 return (const DataHeader *)(base+entry->dataOffset);
148 }
149 }
b75a7d8f 150#ifdef UDATA_DEBUG
374ca955 151 fprintf(stderr, "%s: Not found.\n", tocEntryName);
b75a7d8f 152#endif
374ca955 153 return NULL;
b75a7d8f
A
154 } else {
155#ifdef UDATA_DEBUG
156 fprintf(stderr, "returning header\n");
157#endif
158
159 return pData->pHeader;
160 }
161}
162
163
164static uint32_t pointerTOCEntryCount(const UDataMemory *pData) {
165 const PointerTOC *toc = (PointerTOC *)pData->toc;
73c04bcf 166 return (uint32_t)((toc != NULL) ? (toc->count) : 0);
b75a7d8f
A
167}
168
169
170static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData,
171 const char *name,
374ca955 172 int32_t *pLength,
b75a7d8f
A
173 UErrorCode *pErrorCode) {
174 if(pData->toc!=NULL) {
175 const PointerTOC *toc = (PointerTOC *)pData->toc;
374ca955
A
176 uint32_t start, limit, number, lastNumber;
177 int32_t strResult;
178
179#if defined (UDATA_DEBUG_DUMP)
180 /* list the contents of the TOC each time .. not recommended */
181 for(start=0;start<toc->count;start++) {
182 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName);
183 }
184#endif
b75a7d8f
A
185
186 /* perform a binary search for the data in the common data's table of contents */
187 start=0;
374ca955
A
188 limit=toc->count;
189 lastNumber=limit;
b75a7d8f 190
374ca955
A
191 for (;;) {
192 number = (start+limit)/2;
193 if (lastNumber == number) { /* Have we moved? */
73c04bcf
A
194 break; /* We haven't moved, and it wasn't found, */
195 /* or the empty stub common data library was used during build. */
374ca955
A
196 }
197 lastNumber = number;
198 strResult = uprv_strcmp(name, toc->entry[number].entryName);
199 if(strResult<0) {
b75a7d8f 200 limit=number;
374ca955 201 } else if (strResult>0) {
b75a7d8f
A
202 start=number;
203 }
374ca955
A
204 else {
205 /* found it */
206#ifdef UDATA_DEBUG
729e4ab9 207 fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
374ca955
A
208#endif
209 *pLength=-1;
210 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
211 }
b75a7d8f 212 }
374ca955
A
213#ifdef UDATA_DEBUG
214 fprintf(stderr, "%s: Not found.\n", name);
215#endif
216 return NULL;
b75a7d8f
A
217 } else {
218 return pData->pHeader;
219 }
220}
221
222static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
223static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
224
225
226
227/*----------------------------------------------------------------------*
228 * *
229 * checkCommonData Validate the format of a common data file. *
374ca955 230 * Fill in the virtual function ptr based on TOC type *
b75a7d8f
A
231 * If the data is invalid, close the UDataMemory *
232 * and set the appropriate error code. *
233 * *
234 *----------------------------------------------------------------------*/
729e4ab9 235U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
b75a7d8f
A
236 if (U_FAILURE(*err)) {
237 return;
238 }
239
240 if(!(udm->pHeader->dataHeader.magic1==0xda &&
241 udm->pHeader->dataHeader.magic2==0x27 &&
242 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
243 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
244 ) {
245 /* header not valid */
246 *err=U_INVALID_FORMAT_ERROR;
247 }
248 else if (udm->pHeader->info.dataFormat[0]==0x43 &&
249 udm->pHeader->info.dataFormat[1]==0x6d &&
250 udm->pHeader->info.dataFormat[2]==0x6e &&
251 udm->pHeader->info.dataFormat[3]==0x44 &&
252 udm->pHeader->info.formatVersion[0]==1
253 ) {
254 /* dataFormat="CmnD" */
255 udm->vFuncs = &CmnDFuncs;
374ca955 256 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f
A
257 }
258 else if(udm->pHeader->info.dataFormat[0]==0x54 &&
259 udm->pHeader->info.dataFormat[1]==0x6f &&
260 udm->pHeader->info.dataFormat[2]==0x43 &&
261 udm->pHeader->info.dataFormat[3]==0x50 &&
262 udm->pHeader->info.formatVersion[0]==1
263 ) {
264 /* dataFormat="ToCP" */
265 udm->vFuncs = &ToCPFuncs;
374ca955 266 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f
A
267 }
268 else {
269 /* dataFormat not recognized */
270 *err=U_INVALID_FORMAT_ERROR;
271 }
272
273 if (U_FAILURE(*err)) {
274 /* If the data is no good and we memory-mapped it ourselves,
275 * close the memory mapping so it doesn't leak. Note that this has
276 * no effect on non-memory mapped data, other than clearing fields in udm.
277 */
278 udata_close(udm);
279 }
280}
281
374ca955
A
282/*
283 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
284 * header but not its sub-items.
285 * This function will be needed for automatic runtime swapping.
286 * Sub-items should not be swapped to limit the swapping to the parts of the
287 * package that are actually used.
288 *
289 * Since lengths of items are implicit in the order and offsets of their
290 * ToC entries, and since offsets are relative to the start of the ToC,
291 * a swapped version may need to generate a different data structure
292 * with pointers to the original data items and with their lengths
293 * (-1 for the last one if it is not known), and maybe even pointers to the
294 * swapped versions of the items.
295 * These pointers to swapped versions would establish a cache;
296 * instead, each open data item could simply own the storage for its swapped
297 * data. This fits better with the current design.
298 *
299 * markus 2003sep18 Jitterbug 2235
300 */