2 ******************************************************************************
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999oct25
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/putil.h"
22 #include "unicode/udata.h"
23 #include "unicode/uversion.h"
32 /***********************************************************************
34 * Notes on the organization of the ICU data implementation
36 * All of the public API is defined in udata.h
38 * The implementation is split into several files...
40 * - udata.c (this file) contains higher level code that knows about
41 * the search paths for locating data, caching opened data, etc.
43 * - umapfile.c contains the low level platform-specific code for actually loading
44 * (memory mapping, file reading, whatever) data into memory.
46 * - ucmndata.c deals with the tables of contents of ICU data items within
47 * an ICU common format data file. The implementation includes
48 * an abstract interface and support for multiple TOC formats.
49 * All knowledge of any specific TOC format is encapsulated here.
51 * - udatamem.c has code for managing UDataMemory structs. These are little
52 * descriptor objects for blocks of memory holding ICU data of
56 /* configuration ---------------------------------------------------------- */
58 /* If you are excruciatingly bored turn this on .. */
59 /* #define UDATA_DEBUG 1 */
61 #if defined(UDATA_DEBUG)
66 /***********************************************************************
68 * static (Global) data
70 ************************************************************************/
71 static UDataMemory
*gCommonICUData
= NULL
; /* Pointer to the common ICU data. */
72 /* May be updated once, if we started with */
73 /* a stub or subset library. */
75 static UDataMemory
*gStubICUData
= NULL
; /* If gCommonICUData does get updated, remember */
76 /* the original one so that it can be cleaned */
77 /* up when ICU is shut down. */
79 static UHashtable
*gCommonDataCache
= NULL
; /* Global hash table of opened ICU data files. */
82 static UBool U_CALLCONV
85 if (gCommonDataCache
) { /* Delete the cache of user data mappings. */
86 uhash_close(gCommonDataCache
); /* Table owns the contents, and will delete them. */
87 gCommonDataCache
= NULL
; /* Cleanup is not thread safe. */
90 if (gCommonICUData
!= NULL
) {
91 udata_close(gCommonICUData
); /* Clean up common ICU Data */
92 gCommonICUData
= NULL
;
95 if (gStubICUData
!= NULL
) {
96 udata_close(gStubICUData
); /* Clean up the stub ICU Data */
101 return TRUE
; /* Everything was cleaned up */
108 * setCommonICUData. Set a UDataMemory to be the global ICU Data
111 setCommonICUData(UDataMemory
*pData
, /* The new common data. Belongs to caller, we copy it. */
112 UDataMemory
*oldData
, /* Old ICUData ptr. Overwrite of this value is ok, */
113 /* of any others is not. */
114 UBool warn
, /* If true, set USING_DEFAULT warning if ICUData was */
115 /* changed by another thread before we got to it. */
118 UDataMemory
*newCommonData
= UDataMemory_createNewInstance(pErr
);
119 if (U_FAILURE(*pErr
)) {
123 /* For the assignment, other threads must cleanly see either the old */
124 /* or the new, not some partially initialized new. The old can not be */
125 /* deleted - someone may still have a pointer to it lying around in */
127 UDatamemory_assign(newCommonData
, pData
);
129 if (gCommonICUData
==oldData
) {
130 gStubICUData
= gCommonICUData
; /* remember the old Common Data, so it can be cleaned up. */
131 gCommonICUData
= newCommonData
;
132 ucln_common_registerCleanup(UCLN_COMMON_UDATA
, udata_cleanup
);
136 *pErr
= U_USING_DEFAULT_WARNING
;
138 uprv_free(newCommonData
);
145 findBasename(const char *path
) {
146 const char *basename
=uprv_strrchr(path
, U_FILE_SEP_CHAR
);
156 packageNameFromPath(const char *path
)
158 if((path
== NULL
) || (*path
== 0)) {
159 return U_ICUDATA_NAME
;
162 path
= findBasename(path
);
164 if((path
== NULL
) || (*path
== 0)) {
165 return U_ICUDATA_NAME
;
172 /*----------------------------------------------------------------------*
174 * Cache for common data *
175 * Functions for looking up or adding entries to a cache of *
176 * data that has been previously opened. Avoids a potentially *
177 * expensive operation of re-opening the data for subsequent *
180 * Data remains cached for the duration of the process. *
182 *----------------------------------------------------------------------*/
184 typedef struct DataCacheElement
{
192 * Deleter function for DataCacheElements.
193 * udata cleanup function closes the hash table; hash table in turn calls back to
194 * here for each entry.
196 static void U_EXPORT2 U_CALLCONV
DataCacheElement_deleter(void *pDCEl
) {
197 DataCacheElement
*p
= (DataCacheElement
*)pDCEl
;
198 udata_close(p
->item
); /* unmaps storage */
199 uprv_free(p
->name
); /* delete the hash key string. */
200 uprv_free(pDCEl
); /* delete 'this' */
203 /* udata_getCacheHashTable()
204 * Get the hash table used to store the data cache entries.
205 * Lazy create it if it doesn't yet exist.
207 static UHashtable
*udata_getHashTable() {
208 UErrorCode err
= U_ZERO_ERROR
;
209 UBool cacheIsInitialized
;
210 UHashtable
*tHT
= NULL
;
213 cacheIsInitialized
= (gCommonDataCache
!= NULL
);
216 if (cacheIsInitialized
) {
217 return gCommonDataCache
;
220 tHT
= uhash_open(uhash_hashChars
, uhash_compareChars
, &err
);
221 uhash_setValueDeleter(tHT
, DataCacheElement_deleter
);
224 if (gCommonDataCache
== NULL
) {
225 gCommonDataCache
= tHT
;
227 ucln_common_registerCleanup(UCLN_COMMON_UDATA
, udata_cleanup
);
234 if (U_FAILURE(err
)) {
235 return NULL
; /* TODO: handle this error better. */
237 return gCommonDataCache
;
242 static UDataMemory
*udata_findCachedData(const char *path
)
245 UDataMemory
*retVal
= NULL
;
246 DataCacheElement
*el
;
247 const char *baseName
;
249 baseName
= findBasename(path
); /* Cache remembers only the base name, not the full path. */
250 htable
= udata_getHashTable();
252 el
= (DataCacheElement
*)uhash_get(htable
, baseName
);
258 fprintf(stderr
, "Cache: [%s] -> %p\n", baseName
, retVal
);
264 static UDataMemory
*udata_cacheDataItem(const char *path
, UDataMemory
*item
, UErrorCode
*pErr
) {
265 DataCacheElement
*newElement
;
266 const char *baseName
;
269 UDataMemory
*oldValue
= NULL
;
270 UErrorCode subErr
= U_ZERO_ERROR
;
272 if (U_FAILURE(*pErr
)) {
276 /* Create a new DataCacheElement - the thingy we store in the hash table -
277 * and copy the supplied path and UDataMemoryItems into it.
279 newElement
= uprv_malloc(sizeof(DataCacheElement
));
280 if (newElement
== NULL
) {
281 *pErr
= U_MEMORY_ALLOCATION_ERROR
;
284 newElement
->item
= UDataMemory_createNewInstance(pErr
);
285 if (U_FAILURE(*pErr
)) {
288 UDatamemory_assign(newElement
->item
, item
);
290 baseName
= findBasename(path
);
291 nameLen
= (int32_t)uprv_strlen(baseName
);
292 newElement
->name
= uprv_malloc(nameLen
+1);
293 if (newElement
->name
== NULL
) {
294 *pErr
= U_MEMORY_ALLOCATION_ERROR
;
297 uprv_strcpy(newElement
->name
, baseName
);
299 /* Stick the new DataCacheElement into the hash table.
301 htable
= udata_getHashTable();
303 oldValue
= uhash_get(htable
, path
);
304 if (oldValue
!= NULL
) {
305 subErr
= U_USING_DEFAULT_WARNING
;
310 newElement
->name
, /* Key */
311 newElement
, /* Value */
317 fprintf(stderr
, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement
->name
,
318 newElement
->item
, u_errorName(subErr
), newElement
->item
->vFuncs
);
321 if (subErr
== U_USING_DEFAULT_WARNING
|| U_FAILURE(subErr
)) {
322 *pErr
= subErr
; /* copy sub err unto fillin ONLY if something happens. */
323 uprv_free(newElement
->name
);
324 uprv_free(newElement
->item
);
325 uprv_free(newElement
);
329 return newElement
->item
;
334 /*-------------------------------------------------------------------------------
336 * TinyString - a small set of really simple string functions, for
337 * the purpose of consolidating buffer overflow code in one place
339 * Use wherever you would otherwise declare a fixed sized char[xx] buffer.
340 * Do non-growing ops by accessing fields of struct directly
341 * Grow using the append function to automatically extend buffer
344 *-------------------------------------------------------------------------------*/
345 typedef struct TinyString
{
348 char fStaticBuf
[100];
352 static void TinyString_init(TinyString
*This
) {
353 This
->s
= This
->fStaticBuf
;
356 This
->fCapacity
= sizeof(This
->fStaticBuf
)-1;
359 static void TinyString_append(TinyString
*This
, const char *what
) {
361 newLen
= This
->length
+ (int32_t)uprv_strlen(what
);
362 if (newLen
>= This
->fCapacity
) {
363 int32_t newCapacity
= newLen
* 2;
364 char *newBuf
= (char *)uprv_malloc(newCapacity
+1);
365 if (newBuf
!= NULL
) {
366 uprv_strcpy(newBuf
, This
->s
);
367 if (This
->s
!= This
->fStaticBuf
) {
371 This
->fCapacity
= newCapacity
;
374 if (newLen
< This
->fCapacity
) {
375 uprv_strcat(This
->s
+This
->length
, what
);
376 This
->length
= newLen
;
380 static void TinyString_appendn(TinyString
*This
, const char *what
, int32_t n
) {
382 newLen
= This
->length
+ n
;
383 if (newLen
>= This
->fCapacity
) {
384 int32_t newCapacity
= newLen
* 2;
385 char *newBuf
= (char *)uprv_malloc(newCapacity
+1);
386 if (newBuf
!= NULL
) {
387 uprv_strcpy(newBuf
, This
->s
);
388 if (This
->s
!= This
->fStaticBuf
) {
392 This
->fCapacity
= newCapacity
;
395 if (newLen
< This
->fCapacity
) {
396 uprv_strncat(This
->s
+This
->length
, what
, n
);
397 This
->length
= newLen
;
401 static void TinyString_dt(TinyString
*This
) {
402 if (This
->s
!= This
->fStaticBuf
) {
405 TinyString_init(This
);
411 /*----------------------------------------------------------------------*==============
413 * Path management. Could be shared with other tools/etc if need be *
416 *----------------------------------------------------------------------*/
418 #define U_DATA_PATHITER_BUFSIZ 128 /* Size of local buffer for paths */
419 /* Overflow causes malloc of larger buf */
423 const char *path
; /* working path (u_icudata_Dir) */
424 const char *nextPath
; /* path following this one */
425 const char *basename
; /* item's basename (icudt22e_mt.res)*/
426 const char *suffix
; /* item suffix (can be null) */
428 uint32_t basenameLen
; /* length of basename */
430 char *itemPath
; /* path passed in with item name */
431 char itemPathBuf
[U_DATA_PATHITER_BUFSIZ
];
433 char *pathBuffer
; /* output path for this it'ion */
434 char pathBufferA
[U_DATA_PATHITER_BUFSIZ
];
436 char *packageStub
; /* example: "/icudt28b". Will ignore that leaf in set paths. */
437 char packageStubBuf
[U_DATA_PATHITER_BUFSIZ
];
438 uint32_t packageStubLen
;
440 UBool checkLastFour
; /* if TRUE then allow paths such as '/foo/myapp.dat'
441 * to match, checks last 4 chars of suffix with
442 * last 4 of path, then previous chars. */
447 * Initialize (or re-initialize) a user-supplied UDataPathIterator
448 * Note: UDataPathIterator does not allocate storage, so it doesn't need to be closed.
450 * @param iter The iterator to be initialized. Its current state does not matter.
451 * @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
452 * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l
453 * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
454 * @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
455 * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
456 * '/blarg/stuff.dat' would also be found.
458 static void udata_pathiter_init(UDataPathIterator
*iter
, const char *path
, const char *pkg
,
459 const char *item
, const char *suffix
, UBool doCheckLastFour
)
462 fprintf(stderr
, "SUFFIX1=%s PATH=%s\n", suffix
, path
);
466 iter
->path
= u_getDataDirectory();
473 iter
->packageStubLen
= 0;
474 iter
->packageStub
=iter
->packageStubBuf
;
475 iter
->packageStub
[0] = 0;
477 if(uprv_strlen(pkg
) + 2 > U_DATA_PATHITER_BUFSIZ
) {
478 iter
->packageStub
= uprv_malloc(uprv_strlen(pkg
)+2);
480 iter
->packageStub
= iter
->packageStubBuf
;
482 iter
->packageStub
[0] = U_FILE_SEP_CHAR
;
483 uprv_strcpy(iter
->packageStub
+1, pkg
);
484 iter
->packageStubLen
= (int32_t)uprv_strlen(iter
->packageStub
);
487 fprintf(stderr
, "STUB=%s [%d]\n", iter
->packageStub
, iter
->packageStubLen
);
492 iter
->basename
= findBasename(item
);
493 iter
->basenameLen
= (int32_t)uprv_strlen(iter
->basename
);
495 if(iter
->basename
== NULL
) {
496 iter
->nextPath
= NULL
;
501 iter
->itemPath
= iter
->itemPathBuf
;
502 if(iter
->basename
== item
) {
503 iter
->itemPath
[0] = 0;
504 iter
->nextPath
= iter
->path
;
506 int32_t itemPathLen
= (int32_t)(iter
->basename
-item
);
507 if (itemPathLen
>= U_DATA_PATHITER_BUFSIZ
) {
508 char *t
= (char *)uprv_malloc(itemPathLen
+1);
512 /* Malloc failed. Ignore the itemPath. */
516 uprv_strncpy(iter
->itemPath
, item
, itemPathLen
);
517 iter
->itemPath
[itemPathLen
]=0;
518 iter
->nextPath
= iter
->itemPath
;
521 fprintf(stderr
, "SUFFIX=%s [%p]\n", suffix
, suffix
);
526 iter
->suffix
= suffix
;
531 iter
->checkLastFour
= doCheckLastFour
;
533 /* pathBuffer will hold the output path strings returned by the this iterator
534 * Get an upper bound of possible string size, and make sure that the buffer
535 * is big enough (sum of length of each piece, 2 extra delimiters, + trailing NULL) */
537 int32_t maxPathLen
= (int32_t)uprv_strlen(iter
->path
) + uprv_strlen(item
) + uprv_strlen(iter
->suffix
) + iter
->packageStubLen
+ 3;
538 iter
->pathBuffer
= iter
->pathBufferA
;
539 if (maxPathLen
>= U_DATA_PATHITER_BUFSIZ
) {
540 iter
->pathBuffer
= (char *)uprv_malloc(maxPathLen
);
541 if (iter
->pathBuffer
== NULL
) {
542 iter
->pathBuffer
= iter
->pathBufferA
;
549 fprintf(stderr
, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
557 iter
->checkLastFour
?"TRUE":"false");
563 * Get the next path on the list.
565 * @param iter The Iter to be used
566 * @param len If set, pointer to the length of the returned path, for convenience.
567 * @return Pointer to the next path segment, or NULL if there are no more.
569 static const char *udata_pathiter_next(UDataPathIterator
*iter
, int32_t *outPathLen
)
571 const char *path
= NULL
;
572 uint32_t pathLen
= 0;
573 const char *pathBasename
;
575 if(outPathLen
!= NULL
) {
581 if( iter
->nextPath
== NULL
) {
585 path
= iter
->nextPath
;
587 if(iter
->nextPath
== iter
->itemPath
) { /* we were processing item's path. */
588 iter
->nextPath
= iter
->path
; /* start with regular path next tm. */
589 pathLen
= (int32_t)uprv_strlen(path
);
591 /* fix up next for next time */
592 iter
->nextPath
= uprv_strchr(path
, U_PATH_SEP_CHAR
);
593 if(iter
->nextPath
== NULL
) {
594 /* segment: entire path */
595 pathLen
= (int32_t)uprv_strlen(path
);
597 /* segment: until next segment */
598 pathLen
= (int32_t)(iter
->nextPath
- path
);
599 if(*iter
->nextPath
) { /* skip divider */
610 fprintf(stderr
, "rest of path (IDD) = %s\n", path
);
611 fprintf(stderr
, " ");
614 for(qqq
=0;qqq
<pathLen
;qqq
++)
616 fprintf(stderr
, " ");
619 fprintf(stderr
, "^\n");
622 uprv_strncpy(iter
->pathBuffer
, path
, pathLen
);
623 iter
->pathBuffer
[pathLen
] = 0;
625 /* check for .dat files */
626 pathBasename
= findBasename(iter
->pathBuffer
);
628 if(iter
->checkLastFour
== TRUE
&&
630 uprv_strncmp(iter
->pathBuffer
+(pathLen
-4),iter
->suffix
,4)==0 && /* suffix matches */
631 uprv_strncmp(findBasename(iter
->pathBuffer
),iter
->basename
,iter
->basenameLen
)==0 && /* base matches */
632 uprv_strlen(pathBasename
)==(iter
->basenameLen
+4)) { /* base+suffix = full len */
635 fprintf(stderr
, "Have %s file on the path: %s\n", iter
->suffix
, iter
->pathBuffer
);
640 { /* regular dir path */
641 if(iter
->pathBuffer
[pathLen
-1] != U_FILE_SEP_CHAR
) {
643 uprv_strncmp(iter
->pathBuffer
+(pathLen
-4), ".dat", 4) == 0)
646 fprintf(stderr
, "skipping non-directory .dat file %s\n", iter
->pathBuffer
);
651 /* Check if it is a directory with the same name as our package */
652 if(iter
->packageStubLen
&&
653 (pathLen
> iter
->packageStubLen
) &&
654 !uprv_strcmp(iter
->pathBuffer
+ pathLen
- iter
->packageStubLen
, iter
->packageStub
)) {
656 fprintf(stderr
, "Found stub %s ( will add package %s of len %d)\n", iter
->packageStub
, iter
->basename
, iter
->basenameLen
);
658 pathLen
-= iter
->packageStubLen
;
661 iter
->pathBuffer
[pathLen
++] = U_FILE_SEP_CHAR
;
664 uprv_strncpy(iter
->pathBuffer
+ pathLen
, /* + basename */
666 iter
->packageStubLen
-1);
668 pathLen
+= iter
->packageStubLen
-1;
670 if(*iter
->suffix
) /* tack on suffix */
672 uprv_strcpy(iter
->pathBuffer
+ pathLen
,
674 pathLen
+= (int32_t)uprv_strlen(iter
->suffix
);
679 /* return value of path size */
681 *outPathLen
= pathLen
;
685 fprintf(stderr
, " --> %s\n", iter
->pathBuffer
);
688 return iter
->pathBuffer
;
692 /* fell way off the end */
698 * Path Iterator Destructor. Clean up any allocated storage
700 static void udata_pathiter_dt(UDataPathIterator
*iter
) {
701 if (iter
->itemPath
!= iter
->itemPathBuf
) {
702 uprv_free(iter
->itemPath
);
703 iter
->itemPath
= NULL
;
705 if (iter
->pathBuffer
!= iter
->pathBufferA
) {
706 uprv_free(iter
->pathBuffer
);
707 iter
->pathBuffer
= NULL
;
711 /* ==================================================================================*/
714 /*----------------------------------------------------------------------*
716 * Add a static reference to the common data library *
717 * Unless overridden by an explicit udata_setCommonData, this will be *
720 *----------------------------------------------------------------------*/
721 extern const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT
;
724 /*----------------------------------------------------------------------*
726 * openCommonData Attempt to open a common format (.dat) file *
727 * Map it into memory (if it's not there already) *
728 * and return a UDataMemory object for it. *
730 * If the requested data is already open and cached *
731 * just return the cached UDataMem object. *
733 *----------------------------------------------------------------------*/
735 openCommonData(const char *path
, /* Path from OpenCHoice? */
736 UBool isICUData
, /* ICU Data true if path == NULL */
737 UErrorCode
*pErrorCode
)
740 UDataPathIterator iter
;
741 const char *pathBuffer
;
742 const char *inBasename
;
744 if (U_FAILURE(*pErrorCode
)) {
748 UDataMemory_init(&tData
);
750 /* ??????? TODO revisit this */
752 /* "mini-cache" for common ICU data */
753 if(gCommonICUData
!= NULL
) {
754 return gCommonICUData
;
757 tData
.pHeader
= &U_ICUDATA_ENTRY_POINT
;
758 udata_checkCommonData(&tData
, pErrorCode
);
759 setCommonICUData(&tData
, NULL
, FALSE
, pErrorCode
);
760 return gCommonICUData
;
764 /* request is NOT for ICU Data. */
766 /* Find the base name portion of the supplied path. */
767 /* inBasename will be left pointing somewhere within the original path string. */
768 inBasename
= findBasename(path
);
770 fprintf(stderr
, "inBasename = %s\n", inBasename
);
774 /* no basename. This will happen if the original path was a directory name, */
775 /* like "a/b/c/". (Fallback to separate files will still work.) */
777 fprintf(stderr
, "ocd: no basename in %s, bailing.\n", path
);
779 *pErrorCode
=U_FILE_ACCESS_ERROR
;
783 /* Is the requested common data file already open and cached? */
784 /* Note that the cache is keyed by the base name only. The rest of the path, */
785 /* if any, is not considered. */
787 UDataMemory
*dataToReturn
= udata_findCachedData(inBasename
);
788 if (dataToReturn
!= NULL
) {
793 /* Requested item is not in the cache.
794 * Hunt it down, trying all the path locations
797 udata_pathiter_init(&iter
, u_getDataDirectory(), inBasename
, path
, ".dat", TRUE
);
799 while((UDataMemory_isLoaded(&tData
)==FALSE
) &&
800 (pathBuffer
= udata_pathiter_next(&iter
, NULL
)) != NULL
)
803 fprintf(stderr
, "ocd: trying path %s - ", pathBuffer
);
805 uprv_mapFile(&tData
, pathBuffer
);
807 fprintf(stderr
, "%s\n", UDataMemory_isLoaded(&tData
)?"LOADED":"not loaded");
810 udata_pathiter_dt(&iter
); /* Note: this call may invalidate "pathBuffer" */
812 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
813 if (!UDataMemory_isLoaded(&tData
)) {
814 char ourPathBuffer
[1024];
815 /* One more chance, for extendCommonData() */
816 uprv_strncpy(ourPathBuffer
, path
, 1019);
817 ourPathBuffer
[1019]=0;
818 uprv_strcat(ourPathBuffer
, ".dat");
819 uprv_mapFile(&tData
, ourPathBuffer
);
823 if (!UDataMemory_isLoaded(&tData
)) {
825 *pErrorCode
=U_FILE_ACCESS_ERROR
;
829 /* we have mapped a file, check its header */
830 udata_checkCommonData(&tData
, pErrorCode
);
833 /* Cache the UDataMemory struct for this .dat file,
834 * so we won't need to hunt it down and map it again next time
835 * something is needed from it. */
836 return udata_cacheDataItem(inBasename
, &tData
, pErrorCode
);
841 # define MAX_STUB_ENTRIES 8
843 # define MAX_STUB_ENTRIES 0
847 /*----------------------------------------------------------------------*
849 * extendICUData If the full set of ICU data was not loaded at *
850 * program startup, load it now. This function will *
851 * be called when the lookup of an ICU data item in *
852 * the common ICU data fails. *
854 * The parameter is the UDataMemory in which the *
855 * search for a requested item failed. *
857 * return true if new data is loaded, false otherwise.*
859 *----------------------------------------------------------------------*/
860 static UBool
extendICUData(UDataMemory
*failedData
, UErrorCode
*pErr
)
862 /* If the data library that we are running with turns out to be the
863 * stub library (or, on the 390, the subset library), we will try to
864 * load a .dat file instead. The stub library has no entries in its
865 * TOC, which is how we identify it here.
868 UDataMemory copyPData
;
870 if (failedData
->vFuncs
->NumEntries(failedData
) > MAX_STUB_ENTRIES
) {
871 /* Not the stub. We can't extend. */
875 /* See if we can explicitly open a .dat file for the ICUData. */
876 pData
= openCommonData(
877 U_ICUDATA_NAME
, /* "icudt20l" , for example. */
878 FALSE
, /* Pretend we're not opening ICUData */
881 /* How about if there is no pData, eh... */
883 UDataMemory_init(©PData
);
885 UDatamemory_assign(©PData
, pData
);
886 copyPData
.map
= 0; /* The mapping for this data is owned by the hash table */
887 copyPData
.mapAddr
= 0; /* which will unmap it when ICU is shut down. */
888 /* CommonICUData is also unmapped when ICU is shut down.*/
889 /* To avoid unmapping the data twice, zero out the map */
890 /* fields in the UDataMemory that we're assigning */
891 /* to CommonICUData. */
893 setCommonICUData(©PData
, /* The new common data. */
894 failedData
, /* Old ICUData ptr. Overwrite of this value is ok, */
895 FALSE
, /* No warnings if write didn't happen */
896 pErr
); /* setCommonICUData honors errors; NOP if error set */
900 return gCommonICUData
!= failedData
; /* Return true if ICUData pointer was updated. */
901 /* (Could potentialy have been done by another thread racing */
902 /* us through here, but that's fine, we still return true */
903 /* so that current thread will also examine extended data. */
909 /*----------------------------------------------------------------------*
911 * udata_setCommonData *
913 *----------------------------------------------------------------------*/
914 U_CAPI
void U_EXPORT2
915 udata_setCommonData(const void *data
, UErrorCode
*pErrorCode
) {
916 UDataMemory dataMemory
;
918 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
923 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
927 /* do we already have common ICU data set? */
928 if(gCommonICUData
!= NULL
) {
929 *pErrorCode
=U_USING_DEFAULT_WARNING
;
933 /* set the data pointer and test for validity */
934 UDataMemory_init(&dataMemory
);
935 UDataMemory_setData(&dataMemory
, data
);
936 udata_checkCommonData(&dataMemory
, pErrorCode
);
937 if (U_FAILURE(*pErrorCode
)) {return;}
939 /* we have good data */
940 /* Set it up as the ICU Common Data. */
941 setCommonICUData(&dataMemory
, NULL
, TRUE
, pErrorCode
);
947 /*---------------------------------------------------------------------------
951 *---------------------------------------------------------------------------- */
952 U_CAPI
void U_EXPORT2
953 udata_setAppData(const char *path
, const void *data
, UErrorCode
*err
)
957 if(err
==NULL
|| U_FAILURE(*err
)) {
961 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
965 UDataMemory_init(&udm
);
967 udata_checkCommonData(&udm
, err
);
968 udata_cacheDataItem(path
, &udm
, err
);
971 /*----------------------------------------------------------------------------*
973 * checkDataItem Given a freshly located/loaded data item, either *
974 * an entry in a common file or a separately loaded file, *
975 * sanity check its header, and see if the data is *
976 * acceptable to the app. *
977 * If the data is good, create and return a UDataMemory *
978 * object that can be returned to the application. *
979 * Return NULL on any sort of failure. *
981 *----------------------------------------------------------------------------*/
985 const DataHeader
*pHeader
, /* The data item to be checked. */
986 UDataMemoryIsAcceptable
*isAcceptable
, /* App's call-back function */
987 void *context
, /* pass-thru param for above. */
988 const char *type
, /* pass-thru param for above. */
989 const char *name
, /* pass-thru param for above. */
990 UErrorCode
*nonFatalErr
, /* Error code if this data was not acceptable */
991 /* but openChoice should continue with */
992 /* trying to get data from fallback path. */
993 UErrorCode
*fatalErr
/* Bad error, caller should return immediately */
996 UDataMemory
*rDataMem
= NULL
; /* the new UDataMemory, to be returned. */
998 if (U_FAILURE(*fatalErr
)) {
1002 if(pHeader
->dataHeader
.magic1
==0xda &&
1003 pHeader
->dataHeader
.magic2
==0x27 &&
1004 (isAcceptable
==NULL
|| isAcceptable(context
, type
, name
, &pHeader
->info
))
1006 rDataMem
=UDataMemory_createNewInstance(fatalErr
);
1007 if (U_FAILURE(*fatalErr
)) {
1010 rDataMem
->pHeader
= pHeader
;
1012 /* the data is not acceptable, look further */
1013 /* If we eventually find something good, this errorcode will be */
1015 *nonFatalErr
=U_INVALID_FORMAT_ERROR
;
1024 * A note on the ownership of Mapped Memory
1026 * For common format files, ownership resides with the UDataMemory object
1027 * that lives in the cache of opened common data. These UDataMemorys are private
1028 * to the udata implementation, and are never seen directly by users.
1030 * The UDataMemory objects returned to users will have the address of some desired
1031 * data within the mapped region, but they wont have the mapping info itself, and thus
1032 * won't cause anything to be removed from memory when they are closed.
1034 * For individual data files, the UDataMemory returned to the user holds the
1035 * information necessary to unmap the data on close. If the user independently
1036 * opens the same data file twice, two completely independent mappings will be made.
1037 * (There is no cache of opened data items from individual files, only a cache of
1038 * opened Common Data files, that is, files containing a collection of data items.)
1040 * For common data passed in from the user via udata_setAppData() or
1041 * udata_setCommonData(), ownership remains with the user.
1043 * UDataMemory objects themselves, as opposed to the memory they describe,
1044 * can be anywhere - heap, stack/local or global.
1045 * They have a flag to indicate when they're heap allocated and thus
1046 * must be deleted when closed.
1050 /*----------------------------------------------------------------------------*
1052 * main data loading functions *
1054 *----------------------------------------------------------------------------*/
1055 static UDataMemory
*
1056 doOpenChoice(const char *path
, const char *type
, const char *name
,
1057 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1058 UErrorCode
*pErrorCode
)
1060 UDataMemory
*retVal
= NULL
;
1062 const char *pathBuffer
;
1064 TinyString tocEntryName
; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
1065 TinyString tocEntryPath
; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
1066 TinyString oldIndFileName
; /* ex: icudt28b_ar.res */
1067 TinyString oldStylePath
;
1068 TinyString oldStylePathBasename
;
1071 TinyString treeName
;
1072 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1073 TinyString altSepPath
;
1076 const char *dataPath
;
1078 const char *tocEntrySuffix
;
1079 int32_t tocEntrySuffixIndex
;
1080 const char *tocEntryPathSuffix
;
1081 UDataMemory dataMemory
;
1082 UDataMemory
*pCommonData
;
1083 UDataMemory
*pEntryData
;
1084 const DataHeader
*pHeader
;
1085 const char *inBasename
;
1086 UErrorCode errorCode
=U_ZERO_ERROR
;
1087 const char *treeChar
;
1089 UBool isICUData
= FALSE
;
1092 !strcmp(path
, U_ICUDATA_ALIAS
) ||
1093 !uprv_strncmp(path
, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING
,
1094 uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING
)) ||
1095 !uprv_strncmp(path
, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING
,
1096 uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING
))) {
1100 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1101 /* remap from alternate path char to the main one */
1102 TinyString_init(&altSepPath
);
1105 if((p
=uprv_strchr(path
,U_FILE_ALT_SEP_CHAR
))) {
1106 TinyString_append(&altSepPath
, path
);
1107 while((p
=uprv_strchr(altSepPath
.s
,U_FILE_ALT_SEP_CHAR
))) {
1108 *p
= U_FILE_SEP_CHAR
;
1110 #if defined (UDATA_DEBUG)
1111 fprintf(stderr
, "Changed path from [%s] to [%s]\n", path
, altSepPath
.s
);
1113 path
= altSepPath
.s
;
1118 TinyString_init(&oldIndFileName
);
1119 TinyString_init(&tocEntryName
);
1120 TinyString_init(&tocEntryPath
);
1121 TinyString_init(&oldStylePath
);
1122 TinyString_init(&oldStylePathBasename
);
1124 TinyString_init(&pkgName
);
1125 TinyString_init(&treeName
);
1129 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1133 pkg
= uprv_strrchr(path
, U_FILE_SEP_CHAR
);
1134 first
= uprv_strchr(path
, U_FILE_SEP_CHAR
);
1135 if(uprv_pathIsAbsolute(path
) || (pkg
!= first
)) { /* more than one slash in the path- not a tree name */
1136 /* see if this is an /absolute/path/to/package path */
1138 TinyString_append(&pkgName
, pkg
+1);
1140 TinyString_append(&pkgName
, path
);
1143 treeChar
= uprv_strchr(path
, U_TREE_SEPARATOR
);
1145 TinyString_append(&treeName
, treeChar
+1); /* following '-' */
1147 TinyString_appendn(&pkgName
, path
, (int32_t)(treeChar
-path
));
1149 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1153 TinyString_append(&pkgName
, path
);
1155 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1162 fprintf(stderr
, " P=%s T=%s\n", pkgName
.s
, treeName
.s
);
1165 /* Make up a full name by appending the type to the supplied
1166 * name, assuming that a type was supplied.
1169 /* prepend the package */
1170 TinyString_append(&tocEntryName
, pkgName
.s
);
1171 TinyString_append(&tocEntryPath
, pkgName
.s
);
1172 TinyString_append(&oldIndFileName
, pkgName
.s
);
1173 tocEntrySuffixIndex
= tocEntryName
.length
;
1176 TinyString_append(&tocEntryName
, U_TREE_ENTRY_SEP_STRING
);
1177 TinyString_append(&tocEntryName
, treeName
.s
);
1179 TinyString_append(&tocEntryPath
, U_FILE_SEP_STRING
);
1180 TinyString_append(&tocEntryPath
, treeName
.s
);
1183 TinyString_append(&oldIndFileName
, "_");
1184 TinyString_append(&tocEntryName
, U_TREE_ENTRY_SEP_STRING
);
1185 TinyString_append(&tocEntryPath
, U_FILE_SEP_STRING
);
1186 TinyString_append(&oldIndFileName
, name
);
1187 TinyString_append(&tocEntryName
, name
);
1188 TinyString_append(&tocEntryPath
, name
);
1189 if(type
!=NULL
&& *type
!=0) {
1190 TinyString_append(&tocEntryName
, ".");
1191 TinyString_append(&tocEntryName
, type
);
1192 TinyString_append(&tocEntryPath
, ".");
1193 TinyString_append(&tocEntryPath
, type
);
1194 TinyString_append(&oldIndFileName
, ".");
1195 TinyString_append(&oldIndFileName
, type
);
1197 tocEntrySuffix
= tocEntryName
.s
+tocEntrySuffixIndex
; /* suffix starts here */
1198 tocEntryPathSuffix
= tocEntryPath
.s
+tocEntrySuffixIndex
; /* suffix starts here */
1201 fprintf(stderr
, " tocEntryName = %s\n", tocEntryName
.s
);
1202 fprintf(stderr
, " tocEntryPath = %s\n", tocEntryName
.s
);
1203 fprintf(stderr
, " oldIndFileName = %s\n", oldIndFileName
.s
);
1207 /* the data was not found in the common data, look further, */
1208 /* try to get an individual data file */
1210 path
= COMMON_DATA_NAME
;
1211 inBasename
= COMMON_DATA_NAME
;
1214 inBasename
=COMMON_DATA_NAME
;
1216 inBasename
=findBasename(path
);
1220 /************************ Begin loop looking for ind. files ***************/
1222 fprintf(stderr
, "IND: inBasename = %s, pkg=%s\n", inBasename
, packageNameFromPath(path
));
1225 /* Deal with a null basename */
1226 if( (*inBasename
==0) && (uprv_strlen(path
) > 3) ) {
1227 /* the purpose of this exercise is to turn /tmp/foo/bar/ into
1228 path= /tmp/foo/bar/bar and basename= bar
1229 (i.e. /tmp/foo/bar/bar.dat or /tmp/foo/bar/bar_en_US.res )
1233 TinyString_append(&oldStylePath
, path
);
1234 /* chop off trailing slash */
1235 oldStylePath
.length
--;
1236 oldStylePath
.s
[oldStylePath
.length
] = 0;
1238 rightSlash
= (char*)uprv_strrchr(oldStylePath
.s
, U_FILE_SEP_CHAR
);
1239 if(rightSlash
!= NULL
) {
1241 TinyString_append(&oldStylePathBasename
, rightSlash
);
1242 inBasename
= oldStylePathBasename
.s
;
1243 TinyString_append(&oldStylePath
, U_FILE_SEP_STRING
);
1244 TinyString_append(&oldStylePath
, inBasename
); /* one more time, for the base name */
1245 path
= oldStylePath
.s
;
1247 *pErrorCode
= U_FILE_ACCESS_ERROR
; /* hopelessly bad case */
1252 /* End of dealing with a null basename */
1254 dataPath
= u_getDataDirectory();
1256 /* Check to make sure that there is a dataPath to iterate over */
1257 if ((dataPath
&& *dataPath
) || !isICUData
) {
1258 UDataPathIterator iter
;
1259 /* #1a look in ind. files: package\nam.typ ========================= */
1260 /* init path iterator for individual files */
1261 udata_pathiter_init(&iter
, dataPath
, pkgName
.s
, path
, tocEntryPathSuffix
, FALSE
);
1263 while((pathBuffer
= udata_pathiter_next(&iter
, NULL
)))
1266 fprintf(stderr
, "UDATA: trying individual file %s\n", pathBuffer
);
1268 if( uprv_mapFile(&dataMemory
, pathBuffer
) ||
1269 (inBasename
!=pathBuffer
&& uprv_mapFile(&dataMemory
, inBasename
)))
1271 pEntryData
= checkDataItem(dataMemory
.pHeader
, isAcceptable
, context
, type
, name
, &errorCode
, pErrorCode
);
1272 if (pEntryData
!= NULL
) {
1274 * Hand off ownership of the backing memory to the user's UDataMemory.
1276 pEntryData
->mapAddr
= dataMemory
.mapAddr
;
1277 pEntryData
->map
= dataMemory
.map
;
1280 fprintf(stderr
, "** Mapped file: %s\n", pathBuffer
);
1282 udata_pathiter_dt(&iter
);
1283 retVal
= pEntryData
;
1287 /* the data is not acceptable, or some error occured. Either way, unmap the memory */
1288 udata_close(&dataMemory
);
1290 /* If we had a nasty error, bail out completely. */
1291 if (U_FAILURE(*pErrorCode
)) {
1292 udata_pathiter_dt(&iter
);
1297 /* Otherwise remember that we found data but didn't like it for some reason */
1298 errorCode
=U_INVALID_FORMAT_ERROR
;
1301 fprintf(stderr
, "%s\n", UDataMemory_isLoaded(&dataMemory
)?"LOADED":"not loaded");
1304 udata_pathiter_dt(&iter
);
1306 /* #1b look in ind. files - with old naming (package_nam.typ not package\nam.typ) ==================== */
1307 /* init path iterator for individual files */
1308 udata_pathiter_init(&iter
, dataPath
, "", path
, oldIndFileName
.s
, FALSE
);
1310 while((pathBuffer
= udata_pathiter_next(&iter
, NULL
)))
1313 fprintf(stderr
, "UDATA: trying individual file %s\n", pathBuffer
);
1315 if( uprv_mapFile(&dataMemory
, pathBuffer
) ||
1316 (inBasename
!=pathBuffer
&& uprv_mapFile(&dataMemory
, inBasename
)))
1318 pEntryData
= checkDataItem(dataMemory
.pHeader
, isAcceptable
, context
, type
, name
, &errorCode
, pErrorCode
);
1319 if (pEntryData
!= NULL
) {
1321 * Hand off ownership of the backing memory to the user's UDataMemory.
1323 pEntryData
->mapAddr
= dataMemory
.mapAddr
;
1324 pEntryData
->map
= dataMemory
.map
;
1327 fprintf(stderr
, "** Mapped file: %s\n", pathBuffer
);
1329 udata_pathiter_dt(&iter
);
1330 retVal
= pEntryData
;
1334 /* the data is not acceptable, or some error occured. Either way, unmap the memory */
1335 udata_close(&dataMemory
);
1337 /* If we had a nasty error, bail out completely. */
1338 if (U_FAILURE(*pErrorCode
)) {
1339 udata_pathiter_dt(&iter
);
1344 /* Otherwise remember that we found data but didn't like it for some reason */
1345 errorCode
=U_INVALID_FORMAT_ERROR
;
1348 fprintf(stderr
, "%s\n", UDataMemory_isLoaded(&dataMemory
)?"LOADED":"not loaded");
1351 udata_pathiter_dt(&iter
);
1356 /* try to get common data. The loop is for platforms such as the 390 that do
1357 * not initially load the full set of ICU data. If the lookup of an ICU data item
1358 * fails, the full (but slower to load) set is loaded, the and the loop repeats,
1359 * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
1360 * repeat because the full set will be checked the first time through.
1362 * The loop also handles the fallback to a .dat file if the application linked
1363 * to the stub data library rather than a real library.
1366 pCommonData
=openCommonData(path
, isICUData
, &errorCode
); /** search for pkg **/
1368 if(U_SUCCESS(errorCode
)) {
1371 /* look up the data piece in the common data */
1372 pHeader
=pCommonData
->vFuncs
->Lookup(pCommonData
, tocEntryName
.s
, &length
, &errorCode
);
1374 fprintf(stderr
, "%s: pHeader=%p - %s\n", tocEntryName
.s
, pHeader
, u_errorName(errorCode
));
1376 if((pHeader
== NULL
) && !U_FAILURE(errorCode
)) {
1377 pHeader
=pCommonData
->vFuncs
->Lookup(pCommonData
, oldIndFileName
.s
, /* oldIndFileName is preceded by a slash */
1378 &length
, &errorCode
);
1380 fprintf(stderr
, "[OLD name] %s: pHeader=%p - %s\n", oldIndFileName
.s
, pHeader
, u_errorName(errorCode
));
1385 pEntryData
= checkDataItem(pHeader
, isAcceptable
, context
, type
, name
, &errorCode
, pErrorCode
);
1387 fprintf(stderr
, "pEntryData=%p\n", pEntryData
);
1389 if (U_FAILURE(*pErrorCode
)) {
1393 if (pEntryData
!= NULL
) {
1394 pEntryData
->length
= length
;
1395 retVal
= pEntryData
;
1400 /* Data wasn't found. If we were looking for an ICUData item and there is
1401 * more data available, load it and try again,
1402 * otherwise break out of this loop. */
1403 if (!(isICUData
&& pCommonData
&& extendICUData(pCommonData
, &errorCode
))) {
1408 /* data not found */
1409 if(U_SUCCESS(*pErrorCode
)) {
1410 if(U_SUCCESS(errorCode
)) {
1411 /* file not found */
1412 *pErrorCode
=U_FILE_ACCESS_ERROR
;
1414 /* entry point not found or rejected */
1415 *pErrorCode
=errorCode
;
1420 TinyString_dt(&tocEntryName
);
1421 TinyString_dt(&tocEntryPath
);
1422 TinyString_dt(&oldIndFileName
);
1423 TinyString_dt(&oldStylePath
);
1424 TinyString_dt(&oldStylePathBasename
);
1425 TinyString_dt(&pkgName
);
1426 TinyString_dt(&treeName
);
1427 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1428 TinyString_dt(&altSepPath
);
1435 /* API ---------------------------------------------------------------------- */
1437 U_CAPI UDataMemory
* U_EXPORT2
1438 udata_open(const char *path
, const char *type
, const char *name
,
1439 UErrorCode
*pErrorCode
) {
1441 fprintf(stderr
, "udata_open(): Opening: %s : %s . %s\n", (path
?path
:"NULL"), name
, type
);
1445 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1447 } else if(name
==NULL
|| *name
==0) {
1448 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1451 return doOpenChoice(path
, type
, name
, NULL
, NULL
, pErrorCode
);
1457 U_CAPI UDataMemory
* U_EXPORT2
1458 udata_openChoice(const char *path
, const char *type
, const char *name
,
1459 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1460 UErrorCode
*pErrorCode
) {
1462 fprintf(stderr
, "udata_openChoice(): Opening: %s : %s . %s\n", (path
?path
:"NULL"), name
, type
);
1465 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1467 } else if(name
==NULL
|| *name
==0 || isAcceptable
==NULL
) {
1468 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1471 return doOpenChoice(path
, type
, name
, isAcceptable
, context
, pErrorCode
);
1477 U_CAPI
void U_EXPORT2
1478 udata_getInfo(UDataMemory
*pData
, UDataInfo
*pInfo
) {
1480 if(pData
!=NULL
&& pData
->pHeader
!=NULL
) {
1481 const UDataInfo
*info
=&pData
->pHeader
->info
;
1482 uint16_t dataInfoSize
=udata_getInfoSize(info
);
1483 if(pInfo
->size
>dataInfoSize
) {
1484 pInfo
->size
=dataInfoSize
;
1486 uprv_memcpy((uint16_t *)pInfo
+1, (const uint16_t *)info
+1, pInfo
->size
-2);
1487 if(info
->isBigEndian
!=U_IS_BIG_ENDIAN
) {
1488 /* opposite endianness */
1489 uint16_t x
=info
->reservedWord
;
1490 pInfo
->reservedWord
=(uint16_t)((x
<<8)|(x
>>8));