]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucmndata.c
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / common / ucmndata.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************/
8
9
10 /*------------------------------------------------------------------------------
11 *
12 * UCommonData An abstract interface for dealing with ICU Common Data Files.
13 * ICU Common Data Files are a grouping of a number of individual
14 * data items (resources, converters, tables, anything) into a
15 * single file or dll. The combined format includes a table of
16 * contents for locating the individual items by name.
17 *
18 * Two formats for the table of contents are supported, which is
19 * why there is an abstract inteface involved.
20 *
21 */
22
23 #include "unicode/utypes.h"
24 #include "unicode/udata.h"
25 #include "cstring.h"
26 #include "ucmndata.h"
27 #include "udatamem.h"
28
29 #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
30 # include <stdio.h>
31 #endif
32
33 U_CFUNC uint16_t
34 udata_getHeaderSize(const DataHeader *udh) {
35 if(udh==NULL) {
36 return 0;
37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
38 /* same endianness */
39 return udh->dataHeader.headerSize;
40 } else {
41 /* opposite endianness */
42 uint16_t x=udh->dataHeader.headerSize;
43 return (uint16_t)((x<<8)|(x>>8));
44 }
45 }
46
47 U_CFUNC uint16_t
48 udata_getInfoSize(const UDataInfo *info) {
49 if(info==NULL) {
50 return 0;
51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
52 /* same endianness */
53 return info->size;
54 } else {
55 /* opposite endianness */
56 uint16_t x=info->size;
57 return (uint16_t)((x<<8)|(x>>8));
58 }
59 }
60
61 /*-----------------------------------------------------------------------------*
62 * *
63 * Pointer TOCs. TODO: This form of table-of-contents should be removed *
64 * because DLLs must be relocated on loading to correct the *
65 * pointer values and this operation makes shared memory *
66 * mapping of the data much less likely to work. *
67 * *
68 *-----------------------------------------------------------------------------*/
69 typedef struct {
70 const char *entryName;
71 const DataHeader *pHeader;
72 } PointerTOCEntry;
73
74
75 typedef struct {
76 uint32_t count;
77 uint32_t reserved;
78 PointerTOCEntry entry[2]; /* Actual size is from count. */
79 } PointerTOC;
80
81
82 /* definition of OffsetTOC struct types moved to ucmndata.h */
83
84 /*-----------------------------------------------------------------------------*
85 * *
86 * entry point lookup implementations *
87 * *
88 *-----------------------------------------------------------------------------*/
89 static uint32_t offsetTOCEntryCount(const UDataMemory *pData) {
90 int32_t retVal=0;
91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
92 if (toc != NULL) {
93 retVal = toc->count;
94 }
95 return retVal;
96 }
97
98
99 static const DataHeader *
100 offsetTOCLookupFn(const UDataMemory *pData,
101 const char *tocEntryName,
102 int32_t *pLength,
103 UErrorCode *pErrorCode) {
104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
105 if(toc!=NULL) {
106 const char *base=(const char *)toc;
107 uint32_t start, limit, number, lastNumber;
108 int32_t strResult;
109 const UDataOffsetTOCEntry *entry;
110
111 /* perform a binary search for the data in the common data's table of contents */
112 #if defined (UDATA_DEBUG_DUMP)
113 /* list the contents of the TOC each time .. not recommended */
114 for(start=0;start<toc->count;start++) {
115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffset]);
116 }
117 #endif
118
119 start=0;
120 limit=toc->count; /* number of names in this table of contents */
121 lastNumber=limit;
122 entry=toc->entry;
123 for (;;) {
124 number = (start+limit)/2;
125 if (lastNumber == number) { /* Have we moved? */
126 break; /* We haven't moved, and it wasn't found; */
127 /* or the empty stub common data library was used during build. */
128 }
129 lastNumber = number;
130 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset);
131 if(strResult<0) {
132 limit=number;
133 } else if (strResult>0) {
134 start=number;
135 }
136 else {
137 /* found it */
138 #ifdef UDATA_DEBUG
139 fprintf(stderr, "%s: Found.\n", tocEntryName);
140 #endif
141 entry += number; /* Alias the entry to the current entry. */
142 if((number+1) < toc->count) {
143 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
144 } else {
145 *pLength = -1;
146 }
147 return (const DataHeader *)(base+entry->dataOffset);
148 }
149 }
150 #ifdef UDATA_DEBUG
151 fprintf(stderr, "%s: Not found.\n", tocEntryName);
152 #endif
153 return NULL;
154 } else {
155 #ifdef UDATA_DEBUG
156 fprintf(stderr, "returning header\n");
157 #endif
158
159 return pData->pHeader;
160 }
161 }
162
163
164 static uint32_t pointerTOCEntryCount(const UDataMemory *pData) {
165 const PointerTOC *toc = (PointerTOC *)pData->toc;
166 return (uint32_t)((toc != NULL) ? (toc->count) : 0);
167 }
168
169
170 static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData,
171 const char *name,
172 int32_t *pLength,
173 UErrorCode *pErrorCode) {
174 if(pData->toc!=NULL) {
175 const PointerTOC *toc = (PointerTOC *)pData->toc;
176 uint32_t start, limit, number, lastNumber;
177 int32_t strResult;
178
179 #if defined (UDATA_DEBUG_DUMP)
180 /* list the contents of the TOC each time .. not recommended */
181 for(start=0;start<toc->count;start++) {
182 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName);
183 }
184 #endif
185
186 /* perform a binary search for the data in the common data's table of contents */
187 start=0;
188 limit=toc->count;
189 lastNumber=limit;
190
191 for (;;) {
192 number = (start+limit)/2;
193 if (lastNumber == number) { /* Have we moved? */
194 break; /* We haven't moved, and it wasn't found, */
195 /* or the empty stub common data library was used during build. */
196 }
197 lastNumber = number;
198 strResult = uprv_strcmp(name, toc->entry[number].entryName);
199 if(strResult<0) {
200 limit=number;
201 } else if (strResult>0) {
202 start=number;
203 }
204 else {
205 /* found it */
206 #ifdef UDATA_DEBUG
207 fprintf(STDErr, "%s: Found.\n", toc->entry[number].entryName);
208 #endif
209 *pLength=-1;
210 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
211 }
212 }
213 #ifdef UDATA_DEBUG
214 fprintf(stderr, "%s: Not found.\n", name);
215 #endif
216 return NULL;
217 } else {
218 return pData->pHeader;
219 }
220 }
221
222 static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
223 static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
224
225
226
227 /*----------------------------------------------------------------------*
228 * *
229 * checkCommonData Validate the format of a common data file. *
230 * Fill in the virtual function ptr based on TOC type *
231 * If the data is invalid, close the UDataMemory *
232 * and set the appropriate error code. *
233 * *
234 *----------------------------------------------------------------------*/
235 void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
236 if (U_FAILURE(*err)) {
237 return;
238 }
239
240 if(!(udm->pHeader->dataHeader.magic1==0xda &&
241 udm->pHeader->dataHeader.magic2==0x27 &&
242 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
243 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
244 ) {
245 /* header not valid */
246 *err=U_INVALID_FORMAT_ERROR;
247 }
248 else if (udm->pHeader->info.dataFormat[0]==0x43 &&
249 udm->pHeader->info.dataFormat[1]==0x6d &&
250 udm->pHeader->info.dataFormat[2]==0x6e &&
251 udm->pHeader->info.dataFormat[3]==0x44 &&
252 udm->pHeader->info.formatVersion[0]==1
253 ) {
254 /* dataFormat="CmnD" */
255 udm->vFuncs = &CmnDFuncs;
256 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
257 }
258 else if(udm->pHeader->info.dataFormat[0]==0x54 &&
259 udm->pHeader->info.dataFormat[1]==0x6f &&
260 udm->pHeader->info.dataFormat[2]==0x43 &&
261 udm->pHeader->info.dataFormat[3]==0x50 &&
262 udm->pHeader->info.formatVersion[0]==1
263 ) {
264 /* dataFormat="ToCP" */
265 udm->vFuncs = &ToCPFuncs;
266 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
267 }
268 else {
269 /* dataFormat not recognized */
270 *err=U_INVALID_FORMAT_ERROR;
271 }
272
273 if (U_FAILURE(*err)) {
274 /* If the data is no good and we memory-mapped it ourselves,
275 * close the memory mapping so it doesn't leak. Note that this has
276 * no effect on non-memory mapped data, other than clearing fields in udm.
277 */
278 udata_close(udm);
279 }
280 }
281
282 /*
283 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
284 * header but not its sub-items.
285 * This function will be needed for automatic runtime swapping.
286 * Sub-items should not be swapped to limit the swapping to the parts of the
287 * package that are actually used.
288 *
289 * Since lengths of items are implicit in the order and offsets of their
290 * ToC entries, and since offsets are relative to the start of the ToC,
291 * a swapped version may need to generate a different data structure
292 * with pointers to the original data items and with their lengths
293 * (-1 for the last one if it is not known), and maybe even pointers to the
294 * swapped versions of the items.
295 * These pointers to swapped versions would establish a cache;
296 * instead, each open data item could simply own the storage for its swapped
297 * data. This fits better with the current design.
298 *
299 * markus 2003sep18 Jitterbug 2235
300 */