2 ******************************************************************************
4 * Copyright (C) 1999-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999oct25
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/putil.h"
22 #include "unicode/udata.h"
23 #include "unicode/uversion.h"
32 /***********************************************************************
34 * Notes on the organization of the ICU data implementation
36 * All of the public API is defined in udata.h
38 * The implementation is split into several files...
40 * - udata.c (this file) contains higher level code that knows about
41 * the search paths for locating data, caching opened data, etc.
43 * - umapfile.c contains the low level platform-specific code for actually loading
44 * (memory mapping, file reading, whatever) data into memory.
46 * - ucmndata.c deals with the tables of contents of ICU data items within
47 * an ICU common format data file. The implementation includes
48 * an abstract interface and support for multiple TOC formats.
49 * All knowledge of any specific TOC format is encapsulated here.
51 * - udatamem.c has code for managing UDataMemory structs. These are little
52 * descriptor objects for blocks of memory holding ICU data of
56 /* configuration ---------------------------------------------------------- */
58 /* If you are excruciatingly bored turn this on .. */
59 /* #define UDATA_DEBUG 1 */
61 #if defined(UDATA_DEBUG)
66 /***********************************************************************
68 * static (Global) data
70 ************************************************************************/
71 static UDataMemory
*gCommonICUData
= NULL
; /* Pointer to the common ICU data. */
72 /* May be updated once, if we started with */
73 /* a stub or subset library. */
75 static UDataMemory
*gStubICUData
= NULL
; /* If gCommonICUData does get updated, remember */
76 /* the original one so that it can be cleaned */
77 /* up when ICU is shut down. */
79 static UHashtable
*gCommonDataCache
= NULL
; /* Global hash table of opened ICU data files. */
81 static UDataFileAccess gDataFileAccess
= UDATA_DEFAULT_ACCESS
;
83 static UBool U_CALLCONV
86 if (gCommonDataCache
) { /* Delete the cache of user data mappings. */
87 uhash_close(gCommonDataCache
); /* Table owns the contents, and will delete them. */
88 gCommonDataCache
= NULL
; /* Cleanup is not thread safe. */
91 if (gCommonICUData
!= NULL
) {
92 udata_close(gCommonICUData
); /* Clean up common ICU Data */
93 gCommonICUData
= NULL
;
96 if (gStubICUData
!= NULL
) {
97 udata_close(gStubICUData
); /* Clean up the stub ICU Data */
102 return TRUE
; /* Everything was cleaned up */
109 * setCommonICUData. Set a UDataMemory to be the global ICU Data
112 setCommonICUData(UDataMemory
*pData
, /* The new common data. Belongs to caller, we copy it. */
113 UDataMemory
*oldData
, /* Old ICUData ptr. Overwrite of this value is ok, */
114 /* of any others is not. */
115 UBool warn
, /* If true, set USING_DEFAULT warning if ICUData was */
116 /* changed by another thread before we got to it. */
119 UDataMemory
*newCommonData
= UDataMemory_createNewInstance(pErr
);
120 if (U_FAILURE(*pErr
)) {
124 /* For the assignment, other threads must cleanly see either the old */
125 /* or the new, not some partially initialized new. The old can not be */
126 /* deleted - someone may still have a pointer to it lying around in */
128 UDatamemory_assign(newCommonData
, pData
);
130 if (gCommonICUData
==oldData
) {
131 gStubICUData
= gCommonICUData
; /* remember the old Common Data, so it can be cleaned up. */
132 gCommonICUData
= newCommonData
;
133 ucln_common_registerCleanup(UCLN_COMMON_UDATA
, udata_cleanup
);
137 *pErr
= U_USING_DEFAULT_WARNING
;
139 uprv_free(newCommonData
);
145 findBasename(const char *path
) {
146 const char *basename
=uprv_strrchr(path
, U_FILE_SEP_CHAR
);
156 packageNameFromPath(const char *path
)
158 if((path
== NULL
) || (*path
== 0)) {
159 return U_ICUDATA_NAME
;
162 path
= findBasename(path
);
164 if((path
== NULL
) || (*path
== 0)) {
165 return U_ICUDATA_NAME
;
172 /*----------------------------------------------------------------------*
174 * Cache for common data *
175 * Functions for looking up or adding entries to a cache of *
176 * data that has been previously opened. Avoids a potentially *
177 * expensive operation of re-opening the data for subsequent *
180 * Data remains cached for the duration of the process. *
182 *----------------------------------------------------------------------*/
184 typedef struct DataCacheElement
{
192 * Deleter function for DataCacheElements.
193 * udata cleanup function closes the hash table; hash table in turn calls back to
194 * here for each entry.
196 static void U_CALLCONV
DataCacheElement_deleter(void *pDCEl
) {
197 DataCacheElement
*p
= (DataCacheElement
*)pDCEl
;
198 udata_close(p
->item
); /* unmaps storage */
199 uprv_free(p
->name
); /* delete the hash key string. */
200 uprv_free(pDCEl
); /* delete 'this' */
203 /* udata_getCacheHashTable()
204 * Get the hash table used to store the data cache entries.
205 * Lazy create it if it doesn't yet exist.
207 static UHashtable
*udata_getHashTable() {
208 UErrorCode err
= U_ZERO_ERROR
;
209 UBool cacheIsInitialized
;
210 UHashtable
*tHT
= NULL
;
212 UMTX_CHECK(NULL
, (gCommonDataCache
!= NULL
), cacheIsInitialized
);
214 if (cacheIsInitialized
) {
215 return gCommonDataCache
;
218 tHT
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &err
);
219 /* Check for null pointer. */
221 return NULL
; /* TODO: Handle this error better. */
223 uhash_setValueDeleter(tHT
, DataCacheElement_deleter
);
226 if (gCommonDataCache
== NULL
) {
227 gCommonDataCache
= tHT
;
229 ucln_common_registerCleanup(UCLN_COMMON_UDATA
, udata_cleanup
);
236 if (U_FAILURE(err
)) {
237 return NULL
; /* TODO: handle this error better. */
239 return gCommonDataCache
;
244 static UDataMemory
*udata_findCachedData(const char *path
)
247 UDataMemory
*retVal
= NULL
;
248 DataCacheElement
*el
;
249 const char *baseName
;
251 baseName
= findBasename(path
); /* Cache remembers only the base name, not the full path. */
252 htable
= udata_getHashTable();
254 el
= (DataCacheElement
*)uhash_get(htable
, baseName
);
260 fprintf(stderr
, "Cache: [%s] -> %p\n", baseName
, retVal
);
266 static UDataMemory
*udata_cacheDataItem(const char *path
, UDataMemory
*item
, UErrorCode
*pErr
) {
267 DataCacheElement
*newElement
;
268 const char *baseName
;
271 UDataMemory
*oldValue
= NULL
;
272 UErrorCode subErr
= U_ZERO_ERROR
;
274 if (U_FAILURE(*pErr
)) {
278 /* Create a new DataCacheElement - the thingy we store in the hash table -
279 * and copy the supplied path and UDataMemoryItems into it.
281 newElement
= uprv_malloc(sizeof(DataCacheElement
));
282 if (newElement
== NULL
) {
283 *pErr
= U_MEMORY_ALLOCATION_ERROR
;
286 newElement
->item
= UDataMemory_createNewInstance(pErr
);
287 if (U_FAILURE(*pErr
)) {
288 uprv_free(newElement
);
291 UDatamemory_assign(newElement
->item
, item
);
293 baseName
= findBasename(path
);
294 nameLen
= (int32_t)uprv_strlen(baseName
);
295 newElement
->name
= uprv_malloc(nameLen
+1);
296 if (newElement
->name
== NULL
) {
297 *pErr
= U_MEMORY_ALLOCATION_ERROR
;
298 uprv_free(newElement
->item
);
299 uprv_free(newElement
);
302 uprv_strcpy(newElement
->name
, baseName
);
304 /* Stick the new DataCacheElement into the hash table.
306 htable
= udata_getHashTable();
308 oldValue
= uhash_get(htable
, path
);
309 if (oldValue
!= NULL
) {
310 subErr
= U_USING_DEFAULT_WARNING
;
315 newElement
->name
, /* Key */
316 newElement
, /* Value */
322 fprintf(stderr
, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement
->name
,
323 newElement
->item
, u_errorName(subErr
), newElement
->item
->vFuncs
);
326 if (subErr
== U_USING_DEFAULT_WARNING
|| U_FAILURE(subErr
)) {
327 *pErr
= subErr
; /* copy sub err unto fillin ONLY if something happens. */
328 uprv_free(newElement
->name
);
329 uprv_free(newElement
->item
);
330 uprv_free(newElement
);
334 return newElement
->item
;
339 /*-------------------------------------------------------------------------------
341 * TinyString - a small set of really simple string functions, for
342 * the purpose of consolidating buffer overflow code in one place
344 * Use wherever you would otherwise declare a fixed sized char[xx] buffer.
345 * Do non-growing ops by accessing fields of struct directly
346 * Grow using the append function to automatically extend buffer
349 *-------------------------------------------------------------------------------*/
350 typedef struct TinyString
{
353 char fStaticBuf
[100];
357 static void TinyString_init(TinyString
*This
) {
358 This
->s
= This
->fStaticBuf
;
361 This
->fCapacity
= sizeof(This
->fStaticBuf
)-1;
364 static void TinyString_append(TinyString
*This
, const char *what
) {
366 newLen
= This
->length
+ (int32_t)uprv_strlen(what
);
367 if (newLen
>= This
->fCapacity
) {
368 int32_t newCapacity
= newLen
* 2;
369 char *newBuf
= (char *)uprv_malloc(newCapacity
+1);
370 if (newBuf
!= NULL
) {
371 uprv_strcpy(newBuf
, This
->s
);
372 if (This
->s
!= This
->fStaticBuf
) {
376 This
->fCapacity
= newCapacity
;
379 if (newLen
< This
->fCapacity
) {
380 uprv_strcat(This
->s
+This
->length
, what
);
381 This
->length
= newLen
;
385 static void TinyString_appendn(TinyString
*This
, const char *what
, int32_t n
) {
387 newLen
= This
->length
+ n
;
388 if (newLen
>= This
->fCapacity
) {
389 int32_t newCapacity
= newLen
* 2;
390 char *newBuf
= (char *)uprv_malloc(newCapacity
+1);
391 if (newBuf
!= NULL
) {
392 uprv_strcpy(newBuf
, This
->s
);
393 if (This
->s
!= This
->fStaticBuf
) {
397 This
->fCapacity
= newCapacity
;
400 if (newLen
< This
->fCapacity
) {
401 uprv_strncat(This
->s
+This
->length
, what
, n
);
402 This
->length
= newLen
;
406 static void TinyString_dt(TinyString
*This
) {
407 if (This
->s
!= This
->fStaticBuf
) {
410 TinyString_init(This
);
416 /*----------------------------------------------------------------------*==============
418 * Path management. Could be shared with other tools/etc if need be *
421 *----------------------------------------------------------------------*/
423 #define U_DATA_PATHITER_BUFSIZ 128 /* Size of local buffer for paths */
424 /* Overflow causes malloc of larger buf */
428 const char *path
; /* working path (u_icudata_Dir) */
429 const char *nextPath
; /* path following this one */
430 const char *basename
; /* item's basename (icudt22e_mt.res)*/
431 const char *suffix
; /* item suffix (can be null) */
433 uint32_t basenameLen
; /* length of basename */
435 char *itemPath
; /* path passed in with item name */
436 char itemPathBuf
[U_DATA_PATHITER_BUFSIZ
];
438 char *pathBuffer
; /* output path for this it'ion */
439 char pathBufferA
[U_DATA_PATHITER_BUFSIZ
];
441 char *packageStub
; /* example: "/icudt28b". Will ignore that leaf in set paths. */
442 char packageStubBuf
[U_DATA_PATHITER_BUFSIZ
];
443 uint32_t packageStubLen
;
445 UBool checkLastFour
; /* if TRUE then allow paths such as '/foo/myapp.dat'
446 * to match, checks last 4 chars of suffix with
447 * last 4 of path, then previous chars. */
452 * Initialize (or re-initialize) a user-supplied UDataPathIterator
453 * Note: UDataPathIterator does not allocate storage, so it doesn't need to be closed.
455 * @param iter The iterator to be initialized. Its current state does not matter.
456 * @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
457 * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l
458 * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
459 * @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
460 * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
461 * '/blarg/stuff.dat' would also be found.
463 static void udata_pathiter_init(UDataPathIterator
*iter
, const char *path
, const char *pkg
,
464 const char *item
, const char *suffix
, UBool doCheckLastFour
)
467 fprintf(stderr
, "SUFFIX1=%s PATH=%s\n", suffix
, path
);
471 iter
->path
= u_getDataDirectory();
478 iter
->packageStubLen
= 0;
479 iter
->packageStub
=iter
->packageStubBuf
;
480 iter
->packageStub
[0] = 0;
482 if(uprv_strlen(pkg
) + 2 > U_DATA_PATHITER_BUFSIZ
) {
483 iter
->packageStub
= uprv_malloc(uprv_strlen(pkg
)+2);
484 /* Check for null pointer. */
485 if (iter
->packageStub
== NULL
) {
489 iter
->packageStub
= iter
->packageStubBuf
;
491 iter
->packageStub
[0] = U_FILE_SEP_CHAR
;
492 uprv_strcpy(iter
->packageStub
+1, pkg
);
493 iter
->packageStubLen
= (int32_t)uprv_strlen(iter
->packageStub
);
496 fprintf(stderr
, "STUB=%s [%d]\n", iter
->packageStub
, iter
->packageStubLen
);
501 iter
->basename
= findBasename(item
);
502 iter
->basenameLen
= (int32_t)uprv_strlen(iter
->basename
);
505 iter
->itemPath
= iter
->itemPathBuf
;
506 if(iter
->basename
== item
) {
507 iter
->itemPath
[0] = 0;
508 iter
->nextPath
= iter
->path
;
510 int32_t itemPathLen
= (int32_t)(iter
->basename
-item
);
511 if (itemPathLen
>= U_DATA_PATHITER_BUFSIZ
) {
512 char *t
= (char *)uprv_malloc(itemPathLen
+1);
516 /* Malloc failed. Ignore the itemPath. */
520 uprv_strncpy(iter
->itemPath
, item
, itemPathLen
);
521 iter
->itemPath
[itemPathLen
]=0;
522 iter
->nextPath
= iter
->itemPath
;
525 fprintf(stderr
, "SUFFIX=%s [%p]\n", suffix
, suffix
);
530 iter
->suffix
= suffix
;
535 iter
->checkLastFour
= doCheckLastFour
;
537 /* pathBuffer will hold the output path strings returned by the this iterator
538 * Get an upper bound of possible string size, and make sure that the buffer
539 * is big enough (sum of length of each piece, 2 extra delimiters, + trailing NULL) */
541 int32_t maxPathLen
= (int32_t)(uprv_strlen(iter
->path
) + uprv_strlen(item
) + uprv_strlen(iter
->suffix
) + iter
->packageStubLen
+ 3);
542 iter
->pathBuffer
= iter
->pathBufferA
;
543 if (maxPathLen
>= U_DATA_PATHITER_BUFSIZ
) {
544 iter
->pathBuffer
= (char *)uprv_malloc(maxPathLen
);
545 if (iter
->pathBuffer
== NULL
) {
546 iter
->pathBuffer
= iter
->pathBufferA
;
553 fprintf(stderr
, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
561 iter
->checkLastFour
?"TRUE":"false");
567 * Get the next path on the list.
569 * @param iter The Iter to be used
570 * @param len If set, pointer to the length of the returned path, for convenience.
571 * @return Pointer to the next path segment, or NULL if there are no more.
573 static const char *udata_pathiter_next(UDataPathIterator
*iter
)
575 const char *path
= NULL
;
576 uint32_t pathLen
= 0;
577 const char *pathBasename
;
581 if( iter
->nextPath
== NULL
) {
585 path
= iter
->nextPath
;
587 if(iter
->nextPath
== iter
->itemPath
) { /* we were processing item's path. */
588 iter
->nextPath
= iter
->path
; /* start with regular path next tm. */
589 pathLen
= (int32_t)uprv_strlen(path
);
591 /* fix up next for next time */
592 iter
->nextPath
= uprv_strchr(path
, U_PATH_SEP_CHAR
);
593 if(iter
->nextPath
== NULL
) {
594 /* segment: entire path */
595 pathLen
= (int32_t)uprv_strlen(path
);
597 /* segment: until next segment */
598 pathLen
= (int32_t)(iter
->nextPath
- path
);
599 if(*iter
->nextPath
) { /* skip divider */
610 fprintf(stderr
, "rest of path (IDD) = %s\n", path
);
611 fprintf(stderr
, " ");
614 for(qqq
=0;qqq
<pathLen
;qqq
++)
616 fprintf(stderr
, " ");
619 fprintf(stderr
, "^\n");
622 uprv_strncpy(iter
->pathBuffer
, path
, pathLen
);
623 iter
->pathBuffer
[pathLen
] = 0;
625 /* check for .dat files */
626 pathBasename
= findBasename(iter
->pathBuffer
);
628 if(iter
->checkLastFour
== TRUE
&&
630 uprv_strncmp(iter
->pathBuffer
+(pathLen
-4),iter
->suffix
,4)==0 && /* suffix matches */
631 uprv_strncmp(findBasename(iter
->pathBuffer
),iter
->basename
,iter
->basenameLen
)==0 && /* base matches */
632 uprv_strlen(pathBasename
)==(iter
->basenameLen
+4)) { /* base+suffix = full len */
635 fprintf(stderr
, "Have %s file on the path: %s\n", iter
->suffix
, iter
->pathBuffer
);
640 { /* regular dir path */
641 if(iter
->pathBuffer
[pathLen
-1] != U_FILE_SEP_CHAR
) {
643 uprv_strncmp(iter
->pathBuffer
+(pathLen
-4), ".dat", 4) == 0)
646 fprintf(stderr
, "skipping non-directory .dat file %s\n", iter
->pathBuffer
);
651 /* Check if it is a directory with the same name as our package */
652 if(iter
->packageStubLen
&&
653 (pathLen
> iter
->packageStubLen
) &&
654 !uprv_strcmp(iter
->pathBuffer
+ pathLen
- iter
->packageStubLen
, iter
->packageStub
)) {
656 fprintf(stderr
, "Found stub %s ( will add package %s of len %d)\n", iter
->packageStub
, iter
->basename
, iter
->basenameLen
);
658 pathLen
-= iter
->packageStubLen
;
661 iter
->pathBuffer
[pathLen
++] = U_FILE_SEP_CHAR
;
664 uprv_strncpy(iter
->pathBuffer
+ pathLen
, /* + basename */
666 iter
->packageStubLen
-1);
668 pathLen
+= iter
->packageStubLen
-1;
670 if(*iter
->suffix
) /* tack on suffix */
672 uprv_strcpy(iter
->pathBuffer
+ pathLen
,
674 pathLen
+= (int32_t)uprv_strlen(iter
->suffix
);
680 fprintf(stderr
, " --> %s\n", iter
->pathBuffer
);
683 return iter
->pathBuffer
;
687 /* fell way off the end */
693 * Path Iterator Destructor. Clean up any allocated storage
695 static void udata_pathiter_dt(UDataPathIterator
*iter
) {
696 if (iter
->itemPath
!= iter
->itemPathBuf
) {
697 uprv_free(iter
->itemPath
);
698 iter
->itemPath
= NULL
;
700 if (iter
->pathBuffer
!= iter
->pathBufferA
) {
701 uprv_free(iter
->pathBuffer
);
702 iter
->pathBuffer
= NULL
;
704 if (iter
->packageStub
!= iter
->packageStubBuf
) {
705 uprv_free(iter
->packageStub
);
706 iter
->packageStub
= NULL
;
710 /* ==================================================================================*/
713 /*----------------------------------------------------------------------*
715 * Add a static reference to the common data library *
716 * Unless overridden by an explicit udata_setCommonData, this will be *
719 *----------------------------------------------------------------------*/
720 extern const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT
;
723 /*----------------------------------------------------------------------*
725 * openCommonData Attempt to open a common format (.dat) file *
726 * Map it into memory (if it's not there already) *
727 * and return a UDataMemory object for it. *
729 * If the requested data is already open and cached *
730 * just return the cached UDataMem object. *
732 *----------------------------------------------------------------------*/
734 openCommonData(const char *path
, /* Path from OpenChoice? */
735 UBool isICUData
, /* ICU Data true if path == NULL */
736 UErrorCode
*pErrorCode
)
739 UDataPathIterator iter
;
740 const char *pathBuffer
;
741 const char *inBasename
;
743 if (U_FAILURE(*pErrorCode
)) {
747 UDataMemory_init(&tData
);
749 /* ??????? TODO revisit this */
751 /* "mini-cache" for common ICU data */
752 if(gCommonICUData
!= NULL
) {
753 return gCommonICUData
;
756 tData
.pHeader
= &U_ICUDATA_ENTRY_POINT
;
757 udata_checkCommonData(&tData
, pErrorCode
);
758 setCommonICUData(&tData
, NULL
, FALSE
, pErrorCode
);
759 return gCommonICUData
;
763 /* request is NOT for ICU Data. */
765 /* Find the base name portion of the supplied path. */
766 /* inBasename will be left pointing somewhere within the original path string. */
767 inBasename
= findBasename(path
);
769 fprintf(stderr
, "inBasename = %s\n", inBasename
);
773 /* no basename. This will happen if the original path was a directory name, */
774 /* like "a/b/c/". (Fallback to separate files will still work.) */
776 fprintf(stderr
, "ocd: no basename in %s, bailing.\n", path
);
778 *pErrorCode
=U_FILE_ACCESS_ERROR
;
782 /* Is the requested common data file already open and cached? */
783 /* Note that the cache is keyed by the base name only. The rest of the path, */
784 /* if any, is not considered. */
786 UDataMemory
*dataToReturn
= udata_findCachedData(inBasename
);
787 if (dataToReturn
!= NULL
) {
792 /* Requested item is not in the cache.
793 * Hunt it down, trying all the path locations
796 udata_pathiter_init(&iter
, u_getDataDirectory(), inBasename
, path
, ".dat", TRUE
);
798 while((UDataMemory_isLoaded(&tData
)==FALSE
) &&
799 (pathBuffer
= udata_pathiter_next(&iter
)) != NULL
)
802 fprintf(stderr
, "ocd: trying path %s - ", pathBuffer
);
804 uprv_mapFile(&tData
, pathBuffer
);
806 fprintf(stderr
, "%s\n", UDataMemory_isLoaded(&tData
)?"LOADED":"not loaded");
809 udata_pathiter_dt(&iter
); /* Note: this call may invalidate "pathBuffer" */
811 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
812 if (!UDataMemory_isLoaded(&tData
)) {
813 char ourPathBuffer
[1024];
814 /* One more chance, for extendCommonData() */
815 uprv_strncpy(ourPathBuffer
, path
, 1019);
816 ourPathBuffer
[1019]=0;
817 uprv_strcat(ourPathBuffer
, ".dat");
818 uprv_mapFile(&tData
, ourPathBuffer
);
822 if (!UDataMemory_isLoaded(&tData
)) {
824 *pErrorCode
=U_FILE_ACCESS_ERROR
;
828 /* we have mapped a file, check its header */
829 udata_checkCommonData(&tData
, pErrorCode
);
832 /* Cache the UDataMemory struct for this .dat file,
833 * so we won't need to hunt it down and map it again next time
834 * something is needed from it. */
835 return udata_cacheDataItem(inBasename
, &tData
, pErrorCode
);
840 # define MAX_STUB_ENTRIES 8
842 # define MAX_STUB_ENTRIES 0
846 /*----------------------------------------------------------------------*
848 * extendICUData If the full set of ICU data was not loaded at *
849 * program startup, load it now. This function will *
850 * be called when the lookup of an ICU data item in *
851 * the common ICU data fails. *
853 * The parameter is the UDataMemory in which the *
854 * search for a requested item failed. *
856 * return true if new data is loaded, false otherwise.*
858 *----------------------------------------------------------------------*/
859 static UBool
extendICUData(UDataMemory
*failedData
, UErrorCode
*pErr
)
861 /* If the data library that we are running with turns out to be the
862 * stub library (or, on the 390, the subset library), we will try to
863 * load a .dat file instead. The stub library has no entries in its
864 * TOC, which is how we identify it here.
867 UDataMemory copyPData
;
869 if (failedData
->vFuncs
->NumEntries(failedData
) > MAX_STUB_ENTRIES
) {
870 /* Not the stub. We can't extend. */
874 /* See if we can explicitly open a .dat file for the ICUData. */
875 pData
= openCommonData(
876 U_ICUDATA_NAME
, /* "icudt20l" , for example. */
877 FALSE
, /* Pretend we're not opening ICUData */
880 /* How about if there is no pData, eh... */
882 UDataMemory_init(©PData
);
884 UDatamemory_assign(©PData
, pData
);
885 copyPData
.map
= 0; /* The mapping for this data is owned by the hash table */
886 copyPData
.mapAddr
= 0; /* which will unmap it when ICU is shut down. */
887 /* CommonICUData is also unmapped when ICU is shut down.*/
888 /* To avoid unmapping the data twice, zero out the map */
889 /* fields in the UDataMemory that we're assigning */
890 /* to CommonICUData. */
892 setCommonICUData(©PData
, /* The new common data. */
893 failedData
, /* Old ICUData ptr. Overwrite of this value is ok, */
894 FALSE
, /* No warnings if write didn't happen */
895 pErr
); /* setCommonICUData honors errors; NOP if error set */
899 return gCommonICUData
!= failedData
; /* Return true if ICUData pointer was updated. */
900 /* (Could potentialy have been done by another thread racing */
901 /* us through here, but that's fine, we still return true */
902 /* so that current thread will also examine extended data. */
908 /*----------------------------------------------------------------------*
910 * udata_setCommonData *
912 *----------------------------------------------------------------------*/
913 U_CAPI
void U_EXPORT2
914 udata_setCommonData(const void *data
, UErrorCode
*pErrorCode
) {
915 UDataMemory dataMemory
;
917 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
922 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
926 /* do we already have common ICU data set? */
927 if(gCommonICUData
!= NULL
) {
928 *pErrorCode
=U_USING_DEFAULT_WARNING
;
932 /* set the data pointer and test for validity */
933 UDataMemory_init(&dataMemory
);
934 UDataMemory_setData(&dataMemory
, data
);
935 udata_checkCommonData(&dataMemory
, pErrorCode
);
936 if (U_FAILURE(*pErrorCode
)) {return;}
938 /* we have good data */
939 /* Set it up as the ICU Common Data. */
940 setCommonICUData(&dataMemory
, NULL
, TRUE
, pErrorCode
);
946 /*---------------------------------------------------------------------------
950 *---------------------------------------------------------------------------- */
951 U_CAPI
void U_EXPORT2
952 udata_setAppData(const char *path
, const void *data
, UErrorCode
*err
)
956 if(err
==NULL
|| U_FAILURE(*err
)) {
960 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
964 UDataMemory_init(&udm
);
966 udata_checkCommonData(&udm
, err
);
967 udata_cacheDataItem(path
, &udm
, err
);
970 /*----------------------------------------------------------------------------*
972 * checkDataItem Given a freshly located/loaded data item, either *
973 * an entry in a common file or a separately loaded file, *
974 * sanity check its header, and see if the data is *
975 * acceptable to the app. *
976 * If the data is good, create and return a UDataMemory *
977 * object that can be returned to the application. *
978 * Return NULL on any sort of failure. *
980 *----------------------------------------------------------------------------*/
984 const DataHeader
*pHeader
, /* The data item to be checked. */
985 UDataMemoryIsAcceptable
*isAcceptable
, /* App's call-back function */
986 void *context
, /* pass-thru param for above. */
987 const char *type
, /* pass-thru param for above. */
988 const char *name
, /* pass-thru param for above. */
989 UErrorCode
*nonFatalErr
, /* Error code if this data was not acceptable */
990 /* but openChoice should continue with */
991 /* trying to get data from fallback path. */
992 UErrorCode
*fatalErr
/* Bad error, caller should return immediately */
995 UDataMemory
*rDataMem
= NULL
; /* the new UDataMemory, to be returned. */
997 if (U_FAILURE(*fatalErr
)) {
1001 if(pHeader
->dataHeader
.magic1
==0xda &&
1002 pHeader
->dataHeader
.magic2
==0x27 &&
1003 (isAcceptable
==NULL
|| isAcceptable(context
, type
, name
, &pHeader
->info
))
1005 rDataMem
=UDataMemory_createNewInstance(fatalErr
);
1006 if (U_FAILURE(*fatalErr
)) {
1009 rDataMem
->pHeader
= pHeader
;
1011 /* the data is not acceptable, look further */
1012 /* If we eventually find something good, this errorcode will be */
1014 *nonFatalErr
=U_INVALID_FORMAT_ERROR
;
1020 * @return 0 if not loaded, 1 if loaded or err
1022 static UDataMemory
*doLoadFromIndividualFiles(const char *pkgName
,
1023 const char *dataPath
, const char *tocEntryPathSuffix
,
1024 /* following arguments are the same as doOpenChoice itself */
1025 const char *path
, const char *type
, const char *name
,
1026 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1027 UErrorCode
*subErrorCode
,
1028 UErrorCode
*pErrorCode
)
1030 UDataMemory
*retVal
= NULL
;
1031 const char *pathBuffer
;
1032 UDataMemory dataMemory
;
1033 UDataMemory
*pEntryData
;
1035 UDataPathIterator iter
;
1036 /* look in ind. files: package\nam.typ ========================= */
1037 /* init path iterator for individual files */
1038 udata_pathiter_init(&iter
, dataPath
, pkgName
, path
, tocEntryPathSuffix
, FALSE
);
1040 while((pathBuffer
= udata_pathiter_next(&iter
)))
1043 fprintf(stderr
, "UDATA: trying individual file %s\n", pathBuffer
);
1045 if(uprv_mapFile(&dataMemory
, pathBuffer
))
1047 pEntryData
= checkDataItem(dataMemory
.pHeader
, isAcceptable
, context
, type
, name
, subErrorCode
, pErrorCode
);
1048 if (pEntryData
!= NULL
) {
1050 * Hand off ownership of the backing memory to the user's UDataMemory.
1052 pEntryData
->mapAddr
= dataMemory
.mapAddr
;
1053 pEntryData
->map
= dataMemory
.map
;
1056 fprintf(stderr
, "** Mapped file: %s\n", pathBuffer
);
1058 retVal
= pEntryData
;
1062 /* the data is not acceptable, or some error occured. Either way, unmap the memory */
1063 udata_close(&dataMemory
);
1065 /* If we had a nasty error, bail out completely. */
1066 if (U_FAILURE(*pErrorCode
)) {
1071 /* Otherwise remember that we found data but didn't like it for some reason */
1072 *subErrorCode
=U_INVALID_FORMAT_ERROR
;
1075 fprintf(stderr
, "%s\n", UDataMemory_isLoaded(&dataMemory
)?"LOADED":"not loaded");
1079 udata_pathiter_dt(&iter
);
1084 * @return 0 if not loaded, 1 if loaded or err
1086 static UDataMemory
*doLoadFromCommonData(UBool isICUData
, const char *pkgName
,
1087 const char *dataPath
, const char *tocEntryPathSuffix
, const char *tocEntryName
,
1088 /* following arguments are the same as doOpenChoice itself */
1089 const char *path
, const char *type
, const char *name
,
1090 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1091 UErrorCode
*subErrorCode
,
1092 UErrorCode
*pErrorCode
)
1094 UDataMemory
*retVal
= NULL
;
1095 UDataMemory
*pEntryData
;
1096 const DataHeader
*pHeader
;
1097 UDataMemory
*pCommonData
;
1098 /* try to get common data. The loop is for platforms such as the 390 that do
1099 * not initially load the full set of ICU data. If the lookup of an ICU data item
1100 * fails, the full (but slower to load) set is loaded, the and the loop repeats,
1101 * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
1102 * repeat because the full set will be checked the first time through.
1104 * The loop also handles the fallback to a .dat file if the application linked
1105 * to the stub data library rather than a real library.
1108 pCommonData
=openCommonData(path
, isICUData
, subErrorCode
); /** search for pkg **/
1110 if(U_SUCCESS(*subErrorCode
)) {
1113 /* look up the data piece in the common data */
1114 pHeader
=pCommonData
->vFuncs
->Lookup(pCommonData
, tocEntryName
, &length
, subErrorCode
);
1116 fprintf(stderr
, "%s: pHeader=%p - %s\n", tocEntryName
, pHeader
, u_errorName(*subErrorCode
));
1120 pEntryData
= checkDataItem(pHeader
, isAcceptable
, context
, type
, name
, subErrorCode
, pErrorCode
);
1122 fprintf(stderr
, "pEntryData=%p\n", pEntryData
);
1124 if (U_FAILURE(*pErrorCode
)) {
1128 if (pEntryData
!= NULL
) {
1129 pEntryData
->length
= length
;
1130 retVal
= pEntryData
;
1135 /* Data wasn't found. If we were looking for an ICUData item and there is
1136 * more data available, load it and try again,
1137 * otherwise break out of this loop. */
1138 if (!(isICUData
&& pCommonData
&& extendICUData(pCommonData
, subErrorCode
))) {
1148 * A note on the ownership of Mapped Memory
1150 * For common format files, ownership resides with the UDataMemory object
1151 * that lives in the cache of opened common data. These UDataMemorys are private
1152 * to the udata implementation, and are never seen directly by users.
1154 * The UDataMemory objects returned to users will have the address of some desired
1155 * data within the mapped region, but they wont have the mapping info itself, and thus
1156 * won't cause anything to be removed from memory when they are closed.
1158 * For individual data files, the UDataMemory returned to the user holds the
1159 * information necessary to unmap the data on close. If the user independently
1160 * opens the same data file twice, two completely independent mappings will be made.
1161 * (There is no cache of opened data items from individual files, only a cache of
1162 * opened Common Data files, that is, files containing a collection of data items.)
1164 * For common data passed in from the user via udata_setAppData() or
1165 * udata_setCommonData(), ownership remains with the user.
1167 * UDataMemory objects themselves, as opposed to the memory they describe,
1168 * can be anywhere - heap, stack/local or global.
1169 * They have a flag to indicate when they're heap allocated and thus
1170 * must be deleted when closed.
1174 /*----------------------------------------------------------------------------*
1176 * main data loading functions *
1178 *----------------------------------------------------------------------------*/
1179 static UDataMemory
*
1180 doOpenChoice(const char *path
, const char *type
, const char *name
,
1181 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1182 UErrorCode
*pErrorCode
)
1184 UDataMemory
*retVal
= NULL
;
1186 TinyString tocEntryName
; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
1187 TinyString tocEntryPath
; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
1190 TinyString treeName
;
1191 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* '/' vs '\' */
1192 TinyString altSepPath
;
1195 const char *dataPath
;
1197 int32_t tocEntrySuffixIndex
;
1198 const char *tocEntryPathSuffix
;
1199 UErrorCode subErrorCode
=U_ZERO_ERROR
;
1200 const char *treeChar
;
1202 UBool isICUData
= FALSE
;
1205 /* Is this path ICU data? */
1207 !strcmp(path
, U_ICUDATA_ALIAS
) || /* "ICUDATA" */
1208 !uprv_strncmp(path
, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING
, /* "icudt26e-" */
1209 uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING
)) ||
1210 !uprv_strncmp(path
, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING
, /* "ICUDATA-" */
1211 uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING
))) {
1215 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */
1216 /* remap from alternate path char to the main one */
1217 TinyString_init(&altSepPath
);
1220 if((p
=uprv_strchr(path
,U_FILE_ALT_SEP_CHAR
))) {
1221 TinyString_append(&altSepPath
, path
);
1222 while((p
=uprv_strchr(altSepPath
.s
,U_FILE_ALT_SEP_CHAR
))) {
1223 *p
= U_FILE_SEP_CHAR
;
1225 #if defined (UDATA_DEBUG)
1226 fprintf(stderr
, "Changed path from [%s] to [%s]\n", path
, altSepPath
.s
);
1228 path
= altSepPath
.s
;
1233 TinyString_init(&tocEntryName
);
1234 TinyString_init(&tocEntryPath
);
1236 TinyString_init(&pkgName
);
1237 TinyString_init(&treeName
);
1239 /* ======= Set up strings */
1241 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1245 pkg
= uprv_strrchr(path
, U_FILE_SEP_CHAR
);
1246 first
= uprv_strchr(path
, U_FILE_SEP_CHAR
);
1247 if(uprv_pathIsAbsolute(path
) || (pkg
!= first
)) { /* more than one slash in the path- not a tree name */
1248 /* see if this is an /absolute/path/to/package path */
1250 TinyString_append(&pkgName
, pkg
+1);
1252 TinyString_append(&pkgName
, path
);
1255 treeChar
= uprv_strchr(path
, U_TREE_SEPARATOR
);
1257 TinyString_append(&treeName
, treeChar
+1); /* following '-' */
1259 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1261 TinyString_appendn(&pkgName
, path
, (int32_t)(treeChar
-path
));
1262 if (first
== NULL
) {
1264 This user data has no path, but there is a tree name.
1265 Look up the correct path from the data cache later.
1272 TinyString_append(&pkgName
, U_ICUDATA_NAME
);
1274 TinyString_append(&pkgName
, path
);
1281 fprintf(stderr
, " P=%s T=%s\n", pkgName
.s
, treeName
.s
);
1284 /* setting up the entry name and file name
1285 * Make up a full name by appending the type to the supplied
1286 * name, assuming that a type was supplied.
1289 /* prepend the package */
1290 TinyString_append(&tocEntryName
, pkgName
.s
);
1291 TinyString_append(&tocEntryPath
, pkgName
.s
);
1292 tocEntrySuffixIndex
= tocEntryName
.length
;
1295 TinyString_append(&tocEntryName
, U_TREE_ENTRY_SEP_STRING
);
1296 TinyString_append(&tocEntryName
, treeName
.s
);
1298 TinyString_append(&tocEntryPath
, U_FILE_SEP_STRING
);
1299 TinyString_append(&tocEntryPath
, treeName
.s
);
1302 TinyString_append(&tocEntryName
, U_TREE_ENTRY_SEP_STRING
);
1303 TinyString_append(&tocEntryPath
, U_FILE_SEP_STRING
);
1304 TinyString_append(&tocEntryName
, name
);
1305 TinyString_append(&tocEntryPath
, name
);
1306 if(type
!=NULL
&& *type
!=0) {
1307 TinyString_append(&tocEntryName
, ".");
1308 TinyString_append(&tocEntryName
, type
);
1309 TinyString_append(&tocEntryPath
, ".");
1310 TinyString_append(&tocEntryPath
, type
);
1312 tocEntryPathSuffix
= tocEntryPath
.s
+tocEntrySuffixIndex
; /* suffix starts here */
1315 fprintf(stderr
, " tocEntryName = %s\n", tocEntryName
.s
);
1316 fprintf(stderr
, " tocEntryPath = %s\n", tocEntryName
.s
);
1320 path
= COMMON_DATA_NAME
; /* "icudt26e" */
1323 /************************ Begin loop looking for ind. files ***************/
1325 fprintf(stderr
, "IND: inBasename = %s, pkg=%s\n", inBasename
, packageNameFromPath(path
));
1328 /* End of dealing with a null basename */
1329 dataPath
= u_getDataDirectory();
1331 /**** COMMON PACKAGE - only if packages are first. */
1332 if(gDataFileAccess
== UDATA_PACKAGES_FIRST
) {
1334 fprintf(stderr
, "Trying packages (UDATA_PACKAGES_FIRST)\n");
1337 retVal
= doLoadFromCommonData(isICUData
,
1338 pkgName
.s
, dataPath
, tocEntryPathSuffix
, tocEntryName
.s
,
1339 path
, type
, name
, isAcceptable
, context
, &subErrorCode
, pErrorCode
);
1340 if((retVal
!= NULL
) || U_FAILURE(*pErrorCode
)) {
1345 /**** INDIVIDUAL FILES */
1346 if((gDataFileAccess
==UDATA_PACKAGES_FIRST
) ||
1347 (gDataFileAccess
==UDATA_FILES_FIRST
)) {
1349 fprintf(stderr
, "Trying individual files\n");
1351 /* Check to make sure that there is a dataPath to iterate over */
1352 if ((dataPath
&& *dataPath
) || !isICUData
) {
1353 retVal
= doLoadFromIndividualFiles(pkgName
.s
, dataPath
, tocEntryPathSuffix
,
1354 path
, type
, name
, isAcceptable
, context
, &subErrorCode
, pErrorCode
);
1355 if((retVal
!= NULL
) || U_FAILURE(*pErrorCode
)) {
1361 /**** COMMON PACKAGE */
1362 if((gDataFileAccess
==UDATA_ONLY_PACKAGES
) ||
1363 (gDataFileAccess
==UDATA_FILES_FIRST
)) {
1365 fprintf(stderr
, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
1367 retVal
= doLoadFromCommonData(isICUData
,
1368 pkgName
.s
, dataPath
, tocEntryPathSuffix
, tocEntryName
.s
,
1369 path
, type
, name
, isAcceptable
, context
, &subErrorCode
, pErrorCode
);
1370 if((retVal
!= NULL
) || U_FAILURE(*pErrorCode
)) {
1375 /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to
1376 try a DLL (static or setCommonData/etc) load.
1377 If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */
1378 if(gDataFileAccess
==UDATA_NO_FILES
) {
1380 fprintf(stderr
, "Trying common data (UDATA_NO_FILES)\n");
1382 retVal
= doLoadFromCommonData(isICUData
,
1383 pkgName
.s
, "", tocEntryPathSuffix
, tocEntryName
.s
,
1384 path
, type
, name
, isAcceptable
, context
, &subErrorCode
, pErrorCode
);
1385 if((retVal
!= NULL
) || U_FAILURE(*pErrorCode
)) {
1390 /* data not found */
1391 if(U_SUCCESS(*pErrorCode
)) {
1392 if(U_SUCCESS(subErrorCode
)) {
1393 /* file not found */
1394 *pErrorCode
=U_FILE_ACCESS_ERROR
;
1396 /* entry point not found or rejected */
1397 *pErrorCode
=subErrorCode
;
1402 TinyString_dt(&tocEntryName
);
1403 TinyString_dt(&tocEntryPath
);
1404 TinyString_dt(&pkgName
);
1405 TinyString_dt(&treeName
);
1406 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1407 TinyString_dt(&altSepPath
);
1414 /* API ---------------------------------------------------------------------- */
1416 U_CAPI UDataMemory
* U_EXPORT2
1417 udata_open(const char *path
, const char *type
, const char *name
,
1418 UErrorCode
*pErrorCode
) {
1420 fprintf(stderr
, "udata_open(): Opening: %s : %s . %s\n", (path
?path
:"NULL"), name
, type
);
1424 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1426 } else if(name
==NULL
|| *name
==0) {
1427 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1430 return doOpenChoice(path
, type
, name
, NULL
, NULL
, pErrorCode
);
1436 U_CAPI UDataMemory
* U_EXPORT2
1437 udata_openChoice(const char *path
, const char *type
, const char *name
,
1438 UDataMemoryIsAcceptable
*isAcceptable
, void *context
,
1439 UErrorCode
*pErrorCode
) {
1441 fprintf(stderr
, "udata_openChoice(): Opening: %s : %s . %s\n", (path
?path
:"NULL"), name
, type
);
1444 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1446 } else if(name
==NULL
|| *name
==0 || isAcceptable
==NULL
) {
1447 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1450 return doOpenChoice(path
, type
, name
, isAcceptable
, context
, pErrorCode
);
1456 U_CAPI
void U_EXPORT2
1457 udata_getInfo(UDataMemory
*pData
, UDataInfo
*pInfo
) {
1459 if(pData
!=NULL
&& pData
->pHeader
!=NULL
) {
1460 const UDataInfo
*info
=&pData
->pHeader
->info
;
1461 uint16_t dataInfoSize
=udata_getInfoSize(info
);
1462 if(pInfo
->size
>dataInfoSize
) {
1463 pInfo
->size
=dataInfoSize
;
1465 uprv_memcpy((uint16_t *)pInfo
+1, (const uint16_t *)info
+1, pInfo
->size
-2);
1466 if(info
->isBigEndian
!=U_IS_BIG_ENDIAN
) {
1467 /* opposite endianness */
1468 uint16_t x
=info
->reservedWord
;
1469 pInfo
->reservedWord
=(uint16_t)((x
<<8)|(x
>>8));
1478 U_CAPI
void U_EXPORT2
udata_setFileAccess(UDataFileAccess access
, UErrorCode
*status
)
1480 gDataFileAccess
= access
;