]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/ucmndata.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / ucmndata.c
... / ...
CommitLineData
1/*
2******************************************************************************
3*
4* Copyright (C) 1999-2004, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************/
8
9
10/*------------------------------------------------------------------------------
11 *
12 * UCommonData An abstract interface for dealing with ICU Common Data Files.
13 * ICU Common Data Files are a grouping of a number of individual
14 * data items (resources, converters, tables, anything) into a
15 * single file or dll. The combined format includes a table of
16 * contents for locating the individual items by name.
17 *
18 * Two formats for the table of contents are supported, which is
19 * why there is an abstract inteface involved.
20 *
21 */
22
23#include "unicode/utypes.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "ucmndata.h"
27#include "udatamem.h"
28
29#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
30# include <stdio.h>
31#endif
32
33U_CFUNC uint16_t
34udata_getHeaderSize(const DataHeader *udh) {
35 if(udh==NULL) {
36 return 0;
37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
38 /* same endianness */
39 return udh->dataHeader.headerSize;
40 } else {
41 /* opposite endianness */
42 uint16_t x=udh->dataHeader.headerSize;
43 return (uint16_t)((x<<8)|(x>>8));
44 }
45}
46
47U_CFUNC uint16_t
48udata_getInfoSize(const UDataInfo *info) {
49 if(info==NULL) {
50 return 0;
51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
52 /* same endianness */
53 return info->size;
54 } else {
55 /* opposite endianness */
56 uint16_t x=info->size;
57 return (uint16_t)((x<<8)|(x>>8));
58 }
59}
60
61/*-----------------------------------------------------------------------------*
62 * *
63 * Pointer TOCs. TODO: This form of table-of-contents should be removed *
64 * because DLLs must be relocated on loading to correct the *
65 * pointer values and this operation makes shared memory *
66 * mapping of the data much less likely to work. *
67 * *
68 *-----------------------------------------------------------------------------*/
69typedef struct {
70 const char *entryName;
71 const DataHeader *pHeader;
72} PointerTOCEntry;
73
74
75typedef struct {
76 uint32_t count;
77 uint32_t reserved;
78 PointerTOCEntry entry[2]; /* Actual size is from count. */
79} PointerTOC;
80
81
82/* definition of OffsetTOC struct types moved to ucmndata.h */
83
84/*-----------------------------------------------------------------------------*
85 * *
86 * entry point lookup implementations *
87 * *
88 *-----------------------------------------------------------------------------*/
89static uint32_t offsetTOCEntryCount(const UDataMemory *pData) {
90 int32_t retVal=0;
91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
92 if (toc != NULL) {
93 retVal = toc->count;
94 }
95 return retVal;
96}
97
98
99static const DataHeader *
100offsetTOCLookupFn(const UDataMemory *pData,
101 const char *tocEntryName,
102 int32_t *pLength,
103 UErrorCode *pErrorCode) {
104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
105 if(toc!=NULL) {
106 const char *base=(const char *)pData->toc;
107 uint32_t start, limit, number, lastNumber;
108 int32_t strResult;
109 const UDataOffsetTOCEntry *entry;
110
111 /* perform a binary search for the data in the common data's table of contents */
112#if defined (UDATA_DEBUG_DUMP)
113 /* list the contents of the TOC each time .. not recommended */
114 for(start=0;start<toc->count;start++) {
115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffset]);
116 }
117#endif
118
119 start=0;
120 limit=toc->count; /* number of names in this table of contents */
121 lastNumber=limit;
122 entry=toc->entry;
123 if (limit == 0) { /* Stub common data library used during build is empty. */
124 return NULL;
125 }
126 for (;;) {
127 number = (start+limit)/2;
128 if (lastNumber == number) { /* Have we moved? */
129 break; /* We haven't moved, and it wasn't found. */
130 }
131 lastNumber = number;
132 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset);
133 if(strResult<0) {
134 limit=number;
135 } else if (strResult>0) {
136 start=number;
137 }
138 else {
139 /* found it */
140#ifdef UDATA_DEBUG
141 fprintf(stderr, "%s: Found.\n", tocEntryName);
142#endif
143 entry += number; /* Alias the entry to the current entry. */
144 if((number+1) < toc->count) {
145 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
146 } else {
147 *pLength = -1;
148 }
149 return (const DataHeader *)(base+entry->dataOffset);
150 }
151 }
152#ifdef UDATA_DEBUG
153 fprintf(stderr, "%s: Not found.\n", tocEntryName);
154#endif
155 return NULL;
156 } else {
157#ifdef UDATA_DEBUG
158 fprintf(stderr, "returning header\n");
159#endif
160
161 return pData->pHeader;
162 }
163}
164
165
166static uint32_t pointerTOCEntryCount(const UDataMemory *pData) {
167 const PointerTOC *toc = (PointerTOC *)pData->toc;
168 if (toc != NULL) {
169 return toc->count;
170 } else {
171 return 0;
172 }
173}
174
175
176static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData,
177 const char *name,
178 int32_t *pLength,
179 UErrorCode *pErrorCode) {
180 if(pData->toc!=NULL) {
181 const PointerTOC *toc = (PointerTOC *)pData->toc;
182 uint32_t start, limit, number, lastNumber;
183 int32_t strResult;
184
185#if defined (UDATA_DEBUG_DUMP)
186 /* list the contents of the TOC each time .. not recommended */
187 for(start=0;start<toc->count;start++) {
188 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName);
189 }
190#endif
191
192 /* perform a binary search for the data in the common data's table of contents */
193 start=0;
194 limit=toc->count;
195 lastNumber=limit;
196
197 if (limit == 0) { /* Stub common data library used during build is empty. */
198 return NULL;
199 }
200
201 for (;;) {
202 number = (start+limit)/2;
203 if (lastNumber == number) { /* Have we moved? */
204 break; /* We haven't moved, and it wasn't found. */
205 }
206 lastNumber = number;
207 strResult = uprv_strcmp(name, toc->entry[number].entryName);
208 if(strResult<0) {
209 limit=number;
210 } else if (strResult>0) {
211 start=number;
212 }
213 else {
214 /* found it */
215#ifdef UDATA_DEBUG
216 fprintf(STDErr, "%s: Found.\n", toc->entry[number].entryName);
217#endif
218 *pLength=-1;
219 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
220 }
221 }
222#ifdef UDATA_DEBUG
223 fprintf(stderr, "%s: Not found.\n", name);
224#endif
225 return NULL;
226 } else {
227 return pData->pHeader;
228 }
229}
230
231static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
232static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
233
234
235
236/*----------------------------------------------------------------------*
237 * *
238 * checkCommonData Validate the format of a common data file. *
239 * Fill in the virtual function ptr based on TOC type *
240 * If the data is invalid, close the UDataMemory *
241 * and set the appropriate error code. *
242 * *
243 *----------------------------------------------------------------------*/
244void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
245 if (U_FAILURE(*err)) {
246 return;
247 }
248
249 if(!(udm->pHeader->dataHeader.magic1==0xda &&
250 udm->pHeader->dataHeader.magic2==0x27 &&
251 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
252 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
253 ) {
254 /* header not valid */
255 *err=U_INVALID_FORMAT_ERROR;
256 }
257 else if (udm->pHeader->info.dataFormat[0]==0x43 &&
258 udm->pHeader->info.dataFormat[1]==0x6d &&
259 udm->pHeader->info.dataFormat[2]==0x6e &&
260 udm->pHeader->info.dataFormat[3]==0x44 &&
261 udm->pHeader->info.formatVersion[0]==1
262 ) {
263 /* dataFormat="CmnD" */
264 udm->vFuncs = &CmnDFuncs;
265 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
266 }
267 else if(udm->pHeader->info.dataFormat[0]==0x54 &&
268 udm->pHeader->info.dataFormat[1]==0x6f &&
269 udm->pHeader->info.dataFormat[2]==0x43 &&
270 udm->pHeader->info.dataFormat[3]==0x50 &&
271 udm->pHeader->info.formatVersion[0]==1
272 ) {
273 /* dataFormat="ToCP" */
274 udm->vFuncs = &ToCPFuncs;
275 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
276 }
277 else {
278 /* dataFormat not recognized */
279 *err=U_INVALID_FORMAT_ERROR;
280 }
281
282 if (U_FAILURE(*err)) {
283 /* If the data is no good and we memory-mapped it ourselves,
284 * close the memory mapping so it doesn't leak. Note that this has
285 * no effect on non-memory mapped data, other than clearing fields in udm.
286 */
287 udata_close(udm);
288 }
289}
290
291/*
292 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
293 * header but not its sub-items.
294 * This function will be needed for automatic runtime swapping.
295 * Sub-items should not be swapped to limit the swapping to the parts of the
296 * package that are actually used.
297 *
298 * Since lengths of items are implicit in the order and offsets of their
299 * ToC entries, and since offsets are relative to the start of the ToC,
300 * a swapped version may need to generate a different data structure
301 * with pointers to the original data items and with their lengths
302 * (-1 for the last one if it is not known), and maybe even pointers to the
303 * swapped versions of the items.
304 * These pointers to swapped versions would establish a cache;
305 * instead, each open data item could simply own the storage for its swapped
306 * data. This fits better with the current design.
307 *
308 * markus 2003sep18 Jitterbug 2235
309 */