]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ****************************************************************************** | |
3 | * | |
4 | * Copyright (C) 1999-2011, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************/ | |
8 | ||
9 | ||
10 | /*------------------------------------------------------------------------------ | |
11 | * | |
12 | * UCommonData An abstract interface for dealing with ICU Common Data Files. | |
13 | * ICU Common Data Files are a grouping of a number of individual | |
14 | * data items (resources, converters, tables, anything) into a | |
15 | * single file or dll. The combined format includes a table of | |
16 | * contents for locating the individual items by name. | |
17 | * | |
18 | * Two formats for the table of contents are supported, which is | |
19 | * why there is an abstract inteface involved. | |
20 | * | |
21 | */ | |
22 | ||
23 | #include "unicode/utypes.h" | |
24 | #include "unicode/udata.h" | |
25 | #include "cstring.h" | |
26 | #include "ucmndata.h" | |
27 | #include "udatamem.h" | |
28 | ||
29 | #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) | |
30 | # include <stdio.h> | |
31 | #endif | |
32 | ||
33 | U_CFUNC uint16_t | |
34 | udata_getHeaderSize(const DataHeader *udh) { | |
35 | if(udh==NULL) { | |
36 | return 0; | |
37 | } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { | |
38 | /* same endianness */ | |
39 | return udh->dataHeader.headerSize; | |
40 | } else { | |
41 | /* opposite endianness */ | |
42 | uint16_t x=udh->dataHeader.headerSize; | |
43 | return (uint16_t)((x<<8)|(x>>8)); | |
44 | } | |
45 | } | |
46 | ||
47 | U_CFUNC uint16_t | |
48 | udata_getInfoSize(const UDataInfo *info) { | |
49 | if(info==NULL) { | |
50 | return 0; | |
51 | } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { | |
52 | /* same endianness */ | |
53 | return info->size; | |
54 | } else { | |
55 | /* opposite endianness */ | |
56 | uint16_t x=info->size; | |
57 | return (uint16_t)((x<<8)|(x>>8)); | |
58 | } | |
59 | } | |
60 | ||
61 | /*-----------------------------------------------------------------------------* | |
62 | * * | |
63 | * Pointer TOCs. TODO: This form of table-of-contents should be removed * | |
64 | * because DLLs must be relocated on loading to correct the * | |
65 | * pointer values and this operation makes shared memory * | |
66 | * mapping of the data much less likely to work. * | |
67 | * * | |
68 | *-----------------------------------------------------------------------------*/ | |
69 | typedef struct { | |
70 | const char *entryName; | |
71 | const DataHeader *pHeader; | |
72 | } PointerTOCEntry; | |
73 | ||
74 | ||
75 | typedef struct { | |
76 | uint32_t count; | |
77 | uint32_t reserved; | |
78 | PointerTOCEntry entry[2]; /* Actual size is from count. */ | |
79 | } PointerTOC; | |
80 | ||
81 | ||
82 | /* definition of OffsetTOC struct types moved to ucmndata.h */ | |
83 | ||
84 | /*-----------------------------------------------------------------------------* | |
85 | * * | |
86 | * entry point lookup implementations * | |
87 | * * | |
88 | *-----------------------------------------------------------------------------*/ | |
89 | ||
90 | #ifndef MIN | |
91 | #define MIN(a,b) (((a)<(b)) ? (a) : (b)) | |
92 | #endif | |
93 | ||
94 | /** | |
95 | * Compare strings where we know the shared prefix length, | |
96 | * and advance the prefix length as we find that the strings share even more characters. | |
97 | */ | |
98 | static int32_t | |
99 | strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { | |
100 | int32_t pl=*pPrefixLength; | |
101 | int32_t cmp=0; | |
102 | s1+=pl; | |
103 | s2+=pl; | |
104 | for(;;) { | |
105 | int32_t c1=(uint8_t)*s1++; | |
106 | int32_t c2=(uint8_t)*s2++; | |
107 | cmp=c1-c2; | |
108 | if(cmp!=0 || c1==0) { /* different or done */ | |
109 | break; | |
110 | } | |
111 | ++pl; /* increment shared same-prefix length */ | |
112 | } | |
113 | *pPrefixLength=pl; | |
114 | return cmp; | |
115 | } | |
116 | ||
117 | static int32_t | |
118 | offsetTOCPrefixBinarySearch(const char *s, const char *names, | |
119 | const UDataOffsetTOCEntry *toc, int32_t count) { | |
120 | int32_t start=0; | |
121 | int32_t limit=count; | |
122 | /* | |
123 | * Remember the shared prefix between s, start and limit, | |
124 | * and don't compare that shared prefix again. | |
125 | * The shared prefix should get longer as we narrow the [start, limit[ range. | |
126 | */ | |
127 | int32_t startPrefixLength=0; | |
128 | int32_t limitPrefixLength=0; | |
129 | if(count==0) { | |
130 | return -1; | |
131 | } | |
132 | /* | |
133 | * Prime the prefix lengths so that we don't keep prefixLength at 0 until | |
134 | * both the start and limit indexes have moved. | |
135 | * At the same time, we find if s is one of the start and (limit-1) names, | |
136 | * and if not, exclude them from the actual binary search. | |
137 | */ | |
138 | if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { | |
139 | return 0; | |
140 | } | |
141 | ++start; | |
142 | --limit; | |
143 | if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { | |
144 | return limit; | |
145 | } | |
146 | while(start<limit) { | |
147 | int32_t i=(start+limit)/2; | |
148 | int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); | |
149 | int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength); | |
150 | if(cmp<0) { | |
151 | limit=i; | |
152 | limitPrefixLength=prefixLength; | |
153 | } else if(cmp==0) { | |
154 | return i; | |
155 | } else { | |
156 | start=i+1; | |
157 | startPrefixLength=prefixLength; | |
158 | } | |
159 | } | |
160 | return -1; | |
161 | } | |
162 | ||
163 | static int32_t | |
164 | pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) { | |
165 | int32_t start=0; | |
166 | int32_t limit=count; | |
167 | /* | |
168 | * Remember the shared prefix between s, start and limit, | |
169 | * and don't compare that shared prefix again. | |
170 | * The shared prefix should get longer as we narrow the [start, limit[ range. | |
171 | */ | |
172 | int32_t startPrefixLength=0; | |
173 | int32_t limitPrefixLength=0; | |
174 | if(count==0) { | |
175 | return -1; | |
176 | } | |
177 | /* | |
178 | * Prime the prefix lengths so that we don't keep prefixLength at 0 until | |
179 | * both the start and limit indexes have moved. | |
180 | * At the same time, we find if s is one of the start and (limit-1) names, | |
181 | * and if not, exclude them from the actual binary search. | |
182 | */ | |
183 | if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) { | |
184 | return 0; | |
185 | } | |
186 | ++start; | |
187 | --limit; | |
188 | if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) { | |
189 | return limit; | |
190 | } | |
191 | while(start<limit) { | |
192 | int32_t i=(start+limit)/2; | |
193 | int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); | |
194 | int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength); | |
195 | if(cmp<0) { | |
196 | limit=i; | |
197 | limitPrefixLength=prefixLength; | |
198 | } else if(cmp==0) { | |
199 | return i; | |
200 | } else { | |
201 | start=i+1; | |
202 | startPrefixLength=prefixLength; | |
203 | } | |
204 | } | |
205 | return -1; | |
206 | } | |
207 | ||
208 | static uint32_t offsetTOCEntryCount(const UDataMemory *pData) { | |
209 | int32_t retVal=0; | |
210 | const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; | |
211 | if (toc != NULL) { | |
212 | retVal = toc->count; | |
213 | } | |
214 | return retVal; | |
215 | } | |
216 | ||
217 | static const DataHeader * | |
218 | offsetTOCLookupFn(const UDataMemory *pData, | |
219 | const char *tocEntryName, | |
220 | int32_t *pLength, | |
221 | UErrorCode *pErrorCode) { | |
222 | const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; | |
223 | if(toc!=NULL) { | |
224 | const char *base=(const char *)toc; | |
225 | int32_t number, count=(int32_t)toc->count; | |
226 | ||
227 | /* perform a binary search for the data in the common data's table of contents */ | |
228 | #if defined (UDATA_DEBUG_DUMP) | |
229 | /* list the contents of the TOC each time .. not recommended */ | |
230 | for(number=0; number<count; ++number) { | |
231 | fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]); | |
232 | } | |
233 | #endif | |
234 | number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); | |
235 | if(number>=0) { | |
236 | /* found it */ | |
237 | const UDataOffsetTOCEntry *entry=toc->entry+number; | |
238 | #ifdef UDATA_DEBUG | |
239 | fprintf(stderr, "%s: Found.\n", tocEntryName); | |
240 | #endif | |
241 | if((number+1) < count) { | |
242 | *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); | |
243 | } else { | |
244 | *pLength = -1; | |
245 | } | |
246 | return (const DataHeader *)(base+entry->dataOffset); | |
247 | } else { | |
248 | #ifdef UDATA_DEBUG | |
249 | fprintf(stderr, "%s: Not found.\n", tocEntryName); | |
250 | #endif | |
251 | return NULL; | |
252 | } | |
253 | } else { | |
254 | #ifdef UDATA_DEBUG | |
255 | fprintf(stderr, "returning header\n"); | |
256 | #endif | |
257 | ||
258 | return pData->pHeader; | |
259 | } | |
260 | } | |
261 | ||
262 | ||
263 | static uint32_t pointerTOCEntryCount(const UDataMemory *pData) { | |
264 | const PointerTOC *toc = (PointerTOC *)pData->toc; | |
265 | return (uint32_t)((toc != NULL) ? (toc->count) : 0); | |
266 | } | |
267 | ||
268 | ||
269 | static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData, | |
270 | const char *name, | |
271 | int32_t *pLength, | |
272 | UErrorCode *pErrorCode) { | |
273 | if(pData->toc!=NULL) { | |
274 | const PointerTOC *toc = (PointerTOC *)pData->toc; | |
275 | int32_t number, count=(int32_t)toc->count; | |
276 | ||
277 | #if defined (UDATA_DEBUG_DUMP) | |
278 | /* list the contents of the TOC each time .. not recommended */ | |
279 | for(number=0; number<count; ++number) { | |
280 | fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName); | |
281 | } | |
282 | #endif | |
283 | number=pointerTOCPrefixBinarySearch(name, toc->entry, count); | |
284 | if(number>=0) { | |
285 | /* found it */ | |
286 | #ifdef UDATA_DEBUG | |
287 | fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); | |
288 | #endif | |
289 | *pLength=-1; | |
290 | return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); | |
291 | } else { | |
292 | #ifdef UDATA_DEBUG | |
293 | fprintf(stderr, "%s: Not found.\n", name); | |
294 | #endif | |
295 | return NULL; | |
296 | } | |
297 | } else { | |
298 | return pData->pHeader; | |
299 | } | |
300 | } | |
301 | ||
302 | static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; | |
303 | static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; | |
304 | ||
305 | ||
306 | ||
307 | /*----------------------------------------------------------------------* | |
308 | * * | |
309 | * checkCommonData Validate the format of a common data file. * | |
310 | * Fill in the virtual function ptr based on TOC type * | |
311 | * If the data is invalid, close the UDataMemory * | |
312 | * and set the appropriate error code. * | |
313 | * * | |
314 | *----------------------------------------------------------------------*/ | |
315 | U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { | |
316 | if (U_FAILURE(*err)) { | |
317 | return; | |
318 | } | |
319 | ||
320 | if(udm==NULL || udm->pHeader==NULL) { | |
321 | *err=U_INVALID_FORMAT_ERROR; | |
322 | } else if(!(udm->pHeader->dataHeader.magic1==0xda && | |
323 | udm->pHeader->dataHeader.magic2==0x27 && | |
324 | udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && | |
325 | udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) | |
326 | ) { | |
327 | /* header not valid */ | |
328 | *err=U_INVALID_FORMAT_ERROR; | |
329 | } | |
330 | else if (udm->pHeader->info.dataFormat[0]==0x43 && | |
331 | udm->pHeader->info.dataFormat[1]==0x6d && | |
332 | udm->pHeader->info.dataFormat[2]==0x6e && | |
333 | udm->pHeader->info.dataFormat[3]==0x44 && | |
334 | udm->pHeader->info.formatVersion[0]==1 | |
335 | ) { | |
336 | /* dataFormat="CmnD" */ | |
337 | udm->vFuncs = &CmnDFuncs; | |
338 | udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); | |
339 | } | |
340 | else if(udm->pHeader->info.dataFormat[0]==0x54 && | |
341 | udm->pHeader->info.dataFormat[1]==0x6f && | |
342 | udm->pHeader->info.dataFormat[2]==0x43 && | |
343 | udm->pHeader->info.dataFormat[3]==0x50 && | |
344 | udm->pHeader->info.formatVersion[0]==1 | |
345 | ) { | |
346 | /* dataFormat="ToCP" */ | |
347 | udm->vFuncs = &ToCPFuncs; | |
348 | udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); | |
349 | } | |
350 | else { | |
351 | /* dataFormat not recognized */ | |
352 | *err=U_INVALID_FORMAT_ERROR; | |
353 | } | |
354 | ||
355 | if (U_FAILURE(*err)) { | |
356 | /* If the data is no good and we memory-mapped it ourselves, | |
357 | * close the memory mapping so it doesn't leak. Note that this has | |
358 | * no effect on non-memory mapped data, other than clearing fields in udm. | |
359 | */ | |
360 | udata_close(udm); | |
361 | } | |
362 | } | |
363 | ||
364 | /* | |
365 | * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package | |
366 | * header but not its sub-items. | |
367 | * This function will be needed for automatic runtime swapping. | |
368 | * Sub-items should not be swapped to limit the swapping to the parts of the | |
369 | * package that are actually used. | |
370 | * | |
371 | * Since lengths of items are implicit in the order and offsets of their | |
372 | * ToC entries, and since offsets are relative to the start of the ToC, | |
373 | * a swapped version may need to generate a different data structure | |
374 | * with pointers to the original data items and with their lengths | |
375 | * (-1 for the last one if it is not known), and maybe even pointers to the | |
376 | * swapped versions of the items. | |
377 | * These pointers to swapped versions would establish a cache; | |
378 | * instead, each open data item could simply own the storage for its swapped | |
379 | * data. This fits better with the current design. | |
380 | * | |
381 | * markus 2003sep18 Jitterbug 2235 | |
382 | */ |