]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucmndata.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / ucmndata.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
374ca955 4* Copyright (C) 1999-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************/
8
9
374ca955 10/*------------------------------------------------------------------------------
b75a7d8f
A
11 *
12 * UCommonData An abstract interface for dealing with ICU Common Data Files.
13 * ICU Common Data Files are a grouping of a number of individual
14 * data items (resources, converters, tables, anything) into a
15 * single file or dll. The combined format includes a table of
16 * contents for locating the individual items by name.
17 *
18 * Two formats for the table of contents are supported, which is
19 * why there is an abstract inteface involved.
20 *
374ca955
A
21 */
22
b75a7d8f
A
23#include "unicode/utypes.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "ucmndata.h"
27#include "udatamem.h"
28
374ca955
A
29#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
30# include <stdio.h>
31#endif
32
33U_CFUNC uint16_t
34udata_getHeaderSize(const DataHeader *udh) {
35 if(udh==NULL) {
36 return 0;
37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
38 /* same endianness */
39 return udh->dataHeader.headerSize;
40 } else {
41 /* opposite endianness */
42 uint16_t x=udh->dataHeader.headerSize;
43 return (uint16_t)((x<<8)|(x>>8));
44 }
45}
46
47U_CFUNC uint16_t
48udata_getInfoSize(const UDataInfo *info) {
49 if(info==NULL) {
50 return 0;
51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
52 /* same endianness */
53 return info->size;
54 } else {
55 /* opposite endianness */
56 uint16_t x=info->size;
57 return (uint16_t)((x<<8)|(x>>8));
58 }
59}
b75a7d8f 60
374ca955
A
61/*-----------------------------------------------------------------------------*
62 * *
63 * Pointer TOCs. TODO: This form of table-of-contents should be removed *
64 * because DLLs must be relocated on loading to correct the *
65 * pointer values and this operation makes shared memory *
66 * mapping of the data much less likely to work. *
67 * *
68 *-----------------------------------------------------------------------------*/
b75a7d8f
A
69typedef struct {
70 const char *entryName;
71 const DataHeader *pHeader;
72} PointerTOCEntry;
73
74
75typedef struct {
76 uint32_t count;
77 uint32_t reserved;
78 PointerTOCEntry entry[2]; /* Actual size is from count. */
79} PointerTOC;
80
81
374ca955 82/* definition of OffsetTOC struct types moved to ucmndata.h */
b75a7d8f 83
374ca955
A
84/*-----------------------------------------------------------------------------*
85 * *
86 * entry point lookup implementations *
87 * *
88 *-----------------------------------------------------------------------------*/
b75a7d8f
A
89static uint32_t offsetTOCEntryCount(const UDataMemory *pData) {
90 int32_t retVal=0;
374ca955 91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
b75a7d8f
A
92 if (toc != NULL) {
93 retVal = toc->count;
374ca955 94 }
b75a7d8f
A
95 return retVal;
96}
97
98
99static const DataHeader *
100offsetTOCLookupFn(const UDataMemory *pData,
101 const char *tocEntryName,
374ca955 102 int32_t *pLength,
b75a7d8f 103 UErrorCode *pErrorCode) {
374ca955 104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
b75a7d8f
A
105 if(toc!=NULL) {
106 const char *base=(const char *)pData->toc;
374ca955
A
107 uint32_t start, limit, number, lastNumber;
108 int32_t strResult;
109 const UDataOffsetTOCEntry *entry;
b75a7d8f
A
110
111 /* perform a binary search for the data in the common data's table of contents */
374ca955
A
112#if defined (UDATA_DEBUG_DUMP)
113 /* list the contents of the TOC each time .. not recommended */
114 for(start=0;start<toc->count;start++) {
115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffset]);
116 }
117#endif
118
b75a7d8f
A
119 start=0;
120 limit=toc->count; /* number of names in this table of contents */
374ca955
A
121 lastNumber=limit;
122 entry=toc->entry;
b75a7d8f
A
123 if (limit == 0) { /* Stub common data library used during build is empty. */
124 return NULL;
125 }
374ca955
A
126 for (;;) {
127 number = (start+limit)/2;
128 if (lastNumber == number) { /* Have we moved? */
129 break; /* We haven't moved, and it wasn't found. */
130 }
131 lastNumber = number;
132 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset);
133 if(strResult<0) {
b75a7d8f 134 limit=number;
374ca955 135 } else if (strResult>0) {
b75a7d8f
A
136 start=number;
137 }
374ca955
A
138 else {
139 /* found it */
b75a7d8f 140#ifdef UDATA_DEBUG
374ca955 141 fprintf(stderr, "%s: Found.\n", tocEntryName);
b75a7d8f 142#endif
374ca955
A
143 entry += number; /* Alias the entry to the current entry. */
144 if((number+1) < toc->count) {
145 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
146 } else {
147 *pLength = -1;
148 }
149 return (const DataHeader *)(base+entry->dataOffset);
150 }
151 }
b75a7d8f 152#ifdef UDATA_DEBUG
374ca955 153 fprintf(stderr, "%s: Not found.\n", tocEntryName);
b75a7d8f 154#endif
374ca955 155 return NULL;
b75a7d8f
A
156 } else {
157#ifdef UDATA_DEBUG
158 fprintf(stderr, "returning header\n");
159#endif
160
161 return pData->pHeader;
162 }
163}
164
165
166static uint32_t pointerTOCEntryCount(const UDataMemory *pData) {
167 const PointerTOC *toc = (PointerTOC *)pData->toc;
168 if (toc != NULL) {
169 return toc->count;
170 } else {
171 return 0;
172 }
173}
174
175
176static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData,
177 const char *name,
374ca955 178 int32_t *pLength,
b75a7d8f
A
179 UErrorCode *pErrorCode) {
180 if(pData->toc!=NULL) {
181 const PointerTOC *toc = (PointerTOC *)pData->toc;
374ca955
A
182 uint32_t start, limit, number, lastNumber;
183 int32_t strResult;
184
185#if defined (UDATA_DEBUG_DUMP)
186 /* list the contents of the TOC each time .. not recommended */
187 for(start=0;start<toc->count;start++) {
188 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName);
189 }
190#endif
b75a7d8f
A
191
192 /* perform a binary search for the data in the common data's table of contents */
193 start=0;
374ca955
A
194 limit=toc->count;
195 lastNumber=limit;
b75a7d8f
A
196
197 if (limit == 0) { /* Stub common data library used during build is empty. */
198 return NULL;
199 }
200
374ca955
A
201 for (;;) {
202 number = (start+limit)/2;
203 if (lastNumber == number) { /* Have we moved? */
204 break; /* We haven't moved, and it wasn't found. */
205 }
206 lastNumber = number;
207 strResult = uprv_strcmp(name, toc->entry[number].entryName);
208 if(strResult<0) {
b75a7d8f 209 limit=number;
374ca955 210 } else if (strResult>0) {
b75a7d8f
A
211 start=number;
212 }
374ca955
A
213 else {
214 /* found it */
215#ifdef UDATA_DEBUG
216 fprintf(STDErr, "%s: Found.\n", toc->entry[number].entryName);
217#endif
218 *pLength=-1;
219 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
220 }
b75a7d8f 221 }
374ca955
A
222#ifdef UDATA_DEBUG
223 fprintf(stderr, "%s: Not found.\n", name);
224#endif
225 return NULL;
b75a7d8f
A
226 } else {
227 return pData->pHeader;
228 }
229}
230
231static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
232static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
233
234
235
236/*----------------------------------------------------------------------*
237 * *
238 * checkCommonData Validate the format of a common data file. *
374ca955 239 * Fill in the virtual function ptr based on TOC type *
b75a7d8f
A
240 * If the data is invalid, close the UDataMemory *
241 * and set the appropriate error code. *
242 * *
243 *----------------------------------------------------------------------*/
244void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
245 if (U_FAILURE(*err)) {
246 return;
247 }
248
249 if(!(udm->pHeader->dataHeader.magic1==0xda &&
250 udm->pHeader->dataHeader.magic2==0x27 &&
251 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
252 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
253 ) {
254 /* header not valid */
255 *err=U_INVALID_FORMAT_ERROR;
256 }
257 else if (udm->pHeader->info.dataFormat[0]==0x43 &&
258 udm->pHeader->info.dataFormat[1]==0x6d &&
259 udm->pHeader->info.dataFormat[2]==0x6e &&
260 udm->pHeader->info.dataFormat[3]==0x44 &&
261 udm->pHeader->info.formatVersion[0]==1
262 ) {
263 /* dataFormat="CmnD" */
264 udm->vFuncs = &CmnDFuncs;
374ca955 265 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f
A
266 }
267 else if(udm->pHeader->info.dataFormat[0]==0x54 &&
268 udm->pHeader->info.dataFormat[1]==0x6f &&
269 udm->pHeader->info.dataFormat[2]==0x43 &&
270 udm->pHeader->info.dataFormat[3]==0x50 &&
271 udm->pHeader->info.formatVersion[0]==1
272 ) {
273 /* dataFormat="ToCP" */
274 udm->vFuncs = &ToCPFuncs;
374ca955 275 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f
A
276 }
277 else {
278 /* dataFormat not recognized */
279 *err=U_INVALID_FORMAT_ERROR;
280 }
281
282 if (U_FAILURE(*err)) {
283 /* If the data is no good and we memory-mapped it ourselves,
284 * close the memory mapping so it doesn't leak. Note that this has
285 * no effect on non-memory mapped data, other than clearing fields in udm.
286 */
287 udata_close(udm);
288 }
289}
290
374ca955
A
291/*
292 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
293 * header but not its sub-items.
294 * This function will be needed for automatic runtime swapping.
295 * Sub-items should not be swapped to limit the swapping to the parts of the
296 * package that are actually used.
297 *
298 * Since lengths of items are implicit in the order and offsets of their
299 * ToC entries, and since offsets are relative to the start of the ToC,
300 * a swapped version may need to generate a different data structure
301 * with pointers to the original data items and with their lengths
302 * (-1 for the last one if it is not known), and maybe even pointers to the
303 * swapped versions of the items.
304 * These pointers to swapped versions would establish a cache;
305 * instead, each open data item could simply own the storage for its swapped
306 * data. This fits better with the current design.
307 *
308 * markus 2003sep18 Jitterbug 2235
309 */