[apple/icu.git] / icuSources / common / ucmndata.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1999-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************/


/*------------------------------------------------------------------------------
 *
 *   UCommonData   An abstract interface for dealing with ICU Common Data Files.
 *                 ICU Common Data Files are a grouping of a number of individual
 *                 data items (resources, converters, tables, anything) into a
 *                 single file or dll.  The combined format includes a table of
 *                 contents for locating the individual items by name.
 *
 *                 Two formats for the table of contents are supported, which is
 *                 why there is an abstract inteface involved.
 *
 */

#include "unicode/utypes.h"
#include "unicode/udata.h"
#include "cstring.h"
#include "ucmndata.h"
#include "udatamem.h"

#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
#   include <stdio.h>
#endif

U_CFUNC uint16_t
udata_getHeaderSize(const DataHeader *udh) {
    if(udh==NULL) {
        return 0;
    } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
        /* same endianness */
        return udh->dataHeader.headerSize;
    } else {
        /* opposite endianness */
        uint16_t x=udh->dataHeader.headerSize;
        return (uint16_t)((x<<8)|(x>>8));
    }
}

U_CFUNC uint16_t
udata_getInfoSize(const UDataInfo *info) {
    if(info==NULL) {
        return 0;
    } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
        /* same endianness */
        return info->size;
    } else {
        /* opposite endianness */
        uint16_t x=info->size;
        return (uint16_t)((x<<8)|(x>>8));
    }
}

/*-----------------------------------------------------------------------------*
 *                                                                             *
 *  Pointer TOCs.   TODO: This form of table-of-contents should be removed     *
 *                  because DLLs must be relocated on loading to correct the   *
 *                  pointer values and this operation makes shared memory      *
 *                  mapping of the data much less likely to work.              *
 *                                                                             *
 *-----------------------------------------------------------------------------*/
typedef struct {
    const char       *entryName;
    const DataHeader *pHeader;
} PointerTOCEntry;


typedef struct  {
    uint32_t          count;
    uint32_t          reserved;
    /**
     * Variable-length array declared with length 1 to disable bounds checkers.
     * The actual array length is in the count field.
     */
    PointerTOCEntry   entry[1];
}  PointerTOC;


/* definition of OffsetTOC struct types moved to ucmndata.h */

/*-----------------------------------------------------------------------------*
 *                                                                             *
 *    entry point lookup implementations                                       *
 *                                                                             *
 *-----------------------------------------------------------------------------*/

#ifndef MIN
#define MIN(a,b) (((a)<(b)) ? (a) : (b))
#endif

/**
 * Compare strings where we know the shared prefix length,
 * and advance the prefix length as we find that the strings share even more characters.
 */
static int32_t
strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) {
    int32_t pl=*pPrefixLength;
    int32_t cmp=0;
    s1+=pl;
    s2+=pl;
    for(;;) {
        int32_t c1=(uint8_t)*s1++;
        int32_t c2=(uint8_t)*s2++;
        cmp=c1-c2;
        if(cmp!=0 || c1==0) {  /* different or done */
            break;
        }
        ++pl;  /* increment shared same-prefix length */
    }
    *pPrefixLength=pl;
    return cmp;
}

static int32_t
offsetTOCPrefixBinarySearch(const char *s, const char *names,
                            const UDataOffsetTOCEntry *toc, int32_t count) {
    int32_t start=0;
    int32_t limit=count;
    /*
     * Remember the shared prefix between s, start and limit,
     * and don't compare that shared prefix again.
     * The shared prefix should get longer as we narrow the [start, limit[ range.
     */
    int32_t startPrefixLength=0;
    int32_t limitPrefixLength=0;
    if(count==0) {
        return -1;
    }
    /*
     * Prime the prefix lengths so that we don't keep prefixLength at 0 until
     * both the start and limit indexes have moved.
     * At the same time, we find if s is one of the start and (limit-1) names,
     * and if not, exclude them from the actual binary search.
     */
    if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) {
        return 0;
    }
    ++start;
    --limit;
    if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) {
        return limit;
    }
    while(start<limit) {
        int32_t i=(start+limit)/2;
        int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
        int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength);
        if(cmp<0) {
            limit=i;
            limitPrefixLength=prefixLength;
        } else if(cmp==0) {
            return i;
        } else {
            start=i+1;
            startPrefixLength=prefixLength;
        }
    }
    return -1;
}

static int32_t
pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) {
    int32_t start=0;
    int32_t limit=count;
    /*
     * Remember the shared prefix between s, start and limit,
     * and don't compare that shared prefix again.
     * The shared prefix should get longer as we narrow the [start, limit[ range.
     */
    int32_t startPrefixLength=0;
    int32_t limitPrefixLength=0;
    if(count==0) {
        return -1;
    }
    /*
     * Prime the prefix lengths so that we don't keep prefixLength at 0 until
     * both the start and limit indexes have moved.
     * At the same time, we find if s is one of the start and (limit-1) names,
     * and if not, exclude them from the actual binary search.
     */
    if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) {
        return 0;
    }
    ++start;
    --limit;
    if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) {
        return limit;
    }
    while(start<limit) {
        int32_t i=(start+limit)/2;
        int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
        int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength);
        if(cmp<0) {
            limit=i;
            limitPrefixLength=prefixLength;
        } else if(cmp==0) {
            return i;
        } else {
            start=i+1;
            startPrefixLength=prefixLength;
        }
    }
    return -1;
}

U_CDECL_BEGIN
static uint32_t U_CALLCONV
offsetTOCEntryCount(const UDataMemory *pData) {
    int32_t          retVal=0;
    const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
    if (toc != NULL) {
        retVal = toc->count;
    }
    return retVal;
}

static const DataHeader * U_CALLCONV
offsetTOCLookupFn(const UDataMemory *pData,
                  const char *tocEntryName,
                  int32_t *pLength,
                  UErrorCode *pErrorCode) {
    (void)pErrorCode;
    const UDataOffsetTOC  *toc = (UDataOffsetTOC *)pData->toc;
    if(toc!=NULL) {
        const char *base=(const char *)toc;
        int32_t number, count=(int32_t)toc->count;

        /* perform a binary search for the data in the common data's table of contents */
#if defined (UDATA_DEBUG_DUMP)
        /* list the contents of the TOC each time .. not recommended */
        for(number=0; number<count; ++number) {
            fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]);
        }
#endif
        number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count);
        if(number>=0) {
            /* found it */
            const UDataOffsetTOCEntry *entry=toc->entry+number;
#ifdef UDATA_DEBUG
            fprintf(stderr, "%s: Found.\n", tocEntryName);
#endif
            if((number+1) < count) {
                *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
            } else {
                *pLength = -1;
            }
            return (const DataHeader *)(base+entry->dataOffset);
        } else {
#ifdef UDATA_DEBUG
            fprintf(stderr, "%s: Not found.\n", tocEntryName);
#endif
            return NULL;
        }
    } else {
#ifdef UDATA_DEBUG
        fprintf(stderr, "returning header\n");
#endif

        return pData->pHeader;
    }
}


static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) {
    const PointerTOC *toc = (PointerTOC *)pData->toc;
    return (uint32_t)((toc != NULL) ? (toc->count) : 0);
}

static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData,
                   const char *name,
                   int32_t *pLength,
                   UErrorCode *pErrorCode) {
    (void)pErrorCode;
    if(pData->toc!=NULL) {
        const PointerTOC *toc = (PointerTOC *)pData->toc;
        int32_t number, count=(int32_t)toc->count;

#if defined (UDATA_DEBUG_DUMP)
        /* list the contents of the TOC each time .. not recommended */
        for(number=0; number<count; ++number) {
            fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName);
        }
#endif
        number=pointerTOCPrefixBinarySearch(name, toc->entry, count);
        if(number>=0) {
            /* found it */
#ifdef UDATA_DEBUG
            fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
#endif
            *pLength=-1;
            return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
        } else {
#ifdef UDATA_DEBUG
            fprintf(stderr, "%s: Not found.\n", name);
#endif
            return NULL;
        }
    } else {
        return pData->pHeader;
    }
}
U_CDECL_END


static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn,  offsetTOCEntryCount};
static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};


/*----------------------------------------------------------------------*
 *                                                                      *
 *  checkCommonData   Validate the format of a common data file.        *
 *                    Fill in the virtual function ptr based on TOC type *
 *                    If the data is invalid, close the UDataMemory     *
 *                    and set the appropriate error code.               *
 *                                                                      *
 *----------------------------------------------------------------------*/
U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
    if (U_FAILURE(*err)) {
        return;
    }

    if(udm==NULL || udm->pHeader==NULL) {
      *err=U_INVALID_FORMAT_ERROR;
    } else if(!(udm->pHeader->dataHeader.magic1==0xda &&
        udm->pHeader->dataHeader.magic2==0x27 &&
        udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
        udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
        ) {
        /* header not valid */
        *err=U_INVALID_FORMAT_ERROR;
    }
    else if (udm->pHeader->info.dataFormat[0]==0x43 &&
        udm->pHeader->info.dataFormat[1]==0x6d &&
        udm->pHeader->info.dataFormat[2]==0x6e &&
        udm->pHeader->info.dataFormat[3]==0x44 &&
        udm->pHeader->info.formatVersion[0]==1
        ) {
        /* dataFormat="CmnD" */
        udm->vFuncs = &CmnDFuncs;
        udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
    }
    else if(udm->pHeader->info.dataFormat[0]==0x54 &&
        udm->pHeader->info.dataFormat[1]==0x6f &&
        udm->pHeader->info.dataFormat[2]==0x43 &&
        udm->pHeader->info.dataFormat[3]==0x50 &&
        udm->pHeader->info.formatVersion[0]==1
        ) {
        /* dataFormat="ToCP" */
        udm->vFuncs = &ToCPFuncs;
        udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
    }
    else {
        /* dataFormat not recognized */
        *err=U_INVALID_FORMAT_ERROR;
    }

    if (U_FAILURE(*err)) {
        /* If the data is no good and we memory-mapped it ourselves,
         *  close the memory mapping so it doesn't leak.  Note that this has
         *  no effect on non-memory mapped data, other than clearing fields in udm.
         */
        udata_close(udm);
    }
}

/*
 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
 * header but not its sub-items.
 * This function will be needed for automatic runtime swapping.
 * Sub-items should not be swapped to limit the swapping to the parts of the
 * package that are actually used.
 *
 * Since lengths of items are implicit in the order and offsets of their
 * ToC entries, and since offsets are relative to the start of the ToC,
 * a swapped version may need to generate a different data structure
 * with pointers to the original data items and with their lengths
 * (-1 for the last one if it is not known), and maybe even pointers to the
 * swapped versions of the items.
 * These pointers to swapped versions would establish a cache;
 * instead, each open data item could simply own the storage for its swapped
 * data. This fits better with the current design.
 *
 * markus 2003sep18 Jitterbug 2235
 */
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f A	3	/*
	4	******************************************************************************
	5	*
4388f060	6	* Copyright (C) 1999-2011, International Business Machines
b75a7d8f A	7	* Corporation and others. All Rights Reserved.
	8	*
	9	******************************************************************************/
	10
	11
374ca955	12	/*------------------------------------------------------------------------------
b75a7d8f A	13	*
	14	* UCommonData An abstract interface for dealing with ICU Common Data Files.
	15	* ICU Common Data Files are a grouping of a number of individual
	16	* data items (resources, converters, tables, anything) into a
	17	* single file or dll. The combined format includes a table of
	18	* contents for locating the individual items by name.
	19	*
	20	* Two formats for the table of contents are supported, which is
	21	* why there is an abstract inteface involved.
	22	*
374ca955 A	23	*/
374ca955 A	24
b75a7d8f A	25	#include "unicode/utypes.h"
	26	#include "unicode/udata.h"
	27	#include "cstring.h"
	28	#include "ucmndata.h"
	29	#include "udatamem.h"
	30
374ca955 A	31	#if defined(UDATA_DEBUG) \|\| defined(UDATA_DEBUG_DUMP)
	32	# include <stdio.h>
	33	#endif
	34
	35	U_CFUNC uint16_t
	36	udata_getHeaderSize(const DataHeader *udh) {
	37	if(udh==NULL) {
	38	return 0;
	39	} else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
	40	/* same endianness */
	41	return udh->dataHeader.headerSize;
	42	} else {
	43	/* opposite endianness */
	44	uint16_t x=udh->dataHeader.headerSize;
	45	return (uint16_t)((x<<8)\|(x>>8));
	46	}
	47	}
	48
	49	U_CFUNC uint16_t
	50	udata_getInfoSize(const UDataInfo *info) {
	51	if(info==NULL) {
	52	return 0;
	53	} else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
	54	/* same endianness */
	55	return info->size;
	56	} else {
	57	/* opposite endianness */
	58	uint16_t x=info->size;
	59	return (uint16_t)((x<<8)\|(x>>8));
	60	}
	61	}
b75a7d8f	62
374ca955 A	63	/-----------------------------------------------------------------------------
	64	* *
	65	* Pointer TOCs. TODO: This form of table-of-contents should be removed *
	66	* because DLLs must be relocated on loading to correct the *
	67	* pointer values and this operation makes shared memory *
	68	* mapping of the data much less likely to work. *
	69	* *
	70	-----------------------------------------------------------------------------/
b75a7d8f A	71	typedef struct {
	72	const char *entryName;
	73	const DataHeader *pHeader;
	74	} PointerTOCEntry;
	75
	76
	77	typedef struct {
	78	uint32_t count;
	79	uint32_t reserved;
0f5d89e8 A	80	/**
	81	* Variable-length array declared with length 1 to disable bounds checkers.
	82	* The actual array length is in the count field.
	83	*/
	84	PointerTOCEntry entry[1];
b75a7d8f A	85	} PointerTOC;
	86
	87
374ca955	88	/* definition of OffsetTOC struct types moved to ucmndata.h */
b75a7d8f	89
374ca955 A	90	/-----------------------------------------------------------------------------
	91	* *
	92	* entry point lookup implementations *
	93	* *
	94	-----------------------------------------------------------------------------/
4388f060 A	95
	96	#ifndef MIN
	97	#define MIN(a,b) (((a)<(b)) ? (a) : (b))
	98	#endif
	99
	100	/**
	101	* Compare strings where we know the shared prefix length,
	102	* and advance the prefix length as we find that the strings share even more characters.
	103	*/
	104	static int32_t
	105	strcmpAfterPrefix(const char s1, const char s2, int32_t *pPrefixLength) {
	106	int32_t pl=*pPrefixLength;
	107	int32_t cmp=0;
	108	s1+=pl;
	109	s2+=pl;
	110	for(;;) {
	111	int32_t c1=(uint8_t)*s1++;
	112	int32_t c2=(uint8_t)*s2++;
	113	cmp=c1-c2;
	114	if(cmp!=0 \|\| c1==0) { /* different or done */
	115	break;
	116	}
	117	++pl; /* increment shared same-prefix length */
	118	}
	119	*pPrefixLength=pl;
	120	return cmp;
	121	}
	122
	123	static int32_t
	124	offsetTOCPrefixBinarySearch(const char s, const char names,
	125	const UDataOffsetTOCEntry *toc, int32_t count) {
	126	int32_t start=0;
	127	int32_t limit=count;
	128	/*
	129	* Remember the shared prefix between s, start and limit,
	130	* and don't compare that shared prefix again.
	131	* The shared prefix should get longer as we narrow the [start, limit[ range.
	132	*/
	133	int32_t startPrefixLength=0;
	134	int32_t limitPrefixLength=0;
	135	if(count==0) {
	136	return -1;
	137	}
	138	/*
	139	* Prime the prefix lengths so that we don't keep prefixLength at 0 until
	140	* both the start and limit indexes have moved.
	141	* At the same time, we find if s is one of the start and (limit-1) names,
	142	* and if not, exclude them from the actual binary search.
	143	*/
	144	if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) {
	145	return 0;
	146	}
	147	++start;
	148	--limit;
	149	if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) {
	150	return limit;
	151	}
	152	while(start<limit) {
	153	int32_t i=(start+limit)/2;
	154	int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
	155	int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength);
	156	if(cmp<0) {
	157	limit=i;
	158	limitPrefixLength=prefixLength;
159	} else if(cmp==0) {
160	return i;
161	} else {
162	start=i+1;
163	startPrefixLength=prefixLength;
164	}
165	}
166	return -1;
167	}
168
169	static int32_t
170	pointerTOCPrefixBinarySearch(const char s, const PointerTOCEntry toc, int32_t count) {
171	int32_t start=0;
172	int32_t limit=count;
173	/*
174	* Remember the shared prefix between s, start and limit,
175	* and don't compare that shared prefix again.
176	* The shared prefix should get longer as we narrow the [start, limit[ range.
177	*/
178	int32_t startPrefixLength=0;
179	int32_t limitPrefixLength=0;
180	if(count==0) {
181	return -1;
182	}
183	/*
184	* Prime the prefix lengths so that we don't keep prefixLength at 0 until
185	* both the start and limit indexes have moved.
186	* At the same time, we find if s is one of the start and (limit-1) names,
187	* and if not, exclude them from the actual binary search.
188	*/
189	if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) {
190	return 0;
191	}
192	++start;
193	--limit;
194	if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) {
195	return limit;
196	}
197	while(start<limit) {
198	int32_t i=(start+limit)/2;
199	int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
200	int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength);
201	if(cmp<0) {
202	limit=i;
203	limitPrefixLength=prefixLength;
204	} else if(cmp==0) {
205	return i;
206	} else {
207	start=i+1;
208	startPrefixLength=prefixLength;
209	}
210	}
211	return -1;
212	}
213
f3c0d7a5 A	214	U_CDECL_BEGIN
	215	static uint32_t U_CALLCONV
	216	offsetTOCEntryCount(const UDataMemory *pData) {
b75a7d8f	217	int32_t retVal=0;
374ca955	218	const UDataOffsetTOC toc = (UDataOffsetTOC )pData->toc;
b75a7d8f A	219	if (toc != NULL) {
b75a7d8f A	220	retVal = toc->count;
374ca955	221	}
b75a7d8f A	222	return retVal;
	223	}
	224
f3c0d7a5	225	static const DataHeader * U_CALLCONV
b75a7d8f A	226	offsetTOCLookupFn(const UDataMemory *pData,
b75a7d8f A	227	const char *tocEntryName,
374ca955	228	int32_t *pLength,
b75a7d8f	229	UErrorCode *pErrorCode) {
f3c0d7a5	230	(void)pErrorCode;
374ca955	231	const UDataOffsetTOC toc = (UDataOffsetTOC )pData->toc;
b75a7d8f	232	if(toc!=NULL) {
73c04bcf	233	const char base=(const char )toc;
4388f060	234	int32_t number, count=(int32_t)toc->count;
b75a7d8f A	235
b75a7d8f A	236	/* perform a binary search for the data in the common data's table of contents */
374ca955 A	237	#if defined (UDATA_DEBUG_DUMP)
374ca955 A	238	/* list the contents of the TOC each time .. not recommended */
4388f060 A	239	for(number=0; number<count; ++number) {
4388f060 A	240	fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]);
374ca955 A	241	}
374ca955 A	242	#endif
4388f060 A	243	number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count);
	244	if(number>=0) {
	245	/* found it */
	246	const UDataOffsetTOCEntry *entry=toc->entry+number;
b75a7d8f	247	#ifdef UDATA_DEBUG
4388f060	248	fprintf(stderr, "%s: Found.\n", tocEntryName);
b75a7d8f	249	#endif
4388f060 A	250	if((number+1) < count) {
	251	*pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
	252	} else {
	253	*pLength = -1;
374ca955	254	}
4388f060 A	255	return (const DataHeader *)(base+entry->dataOffset);
4388f060 A	256	} else {
b75a7d8f	257	#ifdef UDATA_DEBUG
4388f060	258	fprintf(stderr, "%s: Not found.\n", tocEntryName);
b75a7d8f	259	#endif
4388f060 A	260	return NULL;
4388f060 A	261	}
b75a7d8f A	262	} else {
	263	#ifdef UDATA_DEBUG
	264	fprintf(stderr, "returning header\n");
	265	#endif
	266
	267	return pData->pHeader;
	268	}
	269	}
	270
	271
f3c0d7a5	272	static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) {
b75a7d8f	273	const PointerTOC toc = (PointerTOC )pData->toc;
73c04bcf	274	return (uint32_t)((toc != NULL) ? (toc->count) : 0);
b75a7d8f A	275	}
b75a7d8f A	276
f3c0d7a5	277	static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData,
b75a7d8f	278	const char *name,
374ca955	279	int32_t *pLength,
b75a7d8f	280	UErrorCode *pErrorCode) {
f3c0d7a5	281	(void)pErrorCode;
b75a7d8f A	282	if(pData->toc!=NULL) {
b75a7d8f A	283	const PointerTOC toc = (PointerTOC )pData->toc;
4388f060	284	int32_t number, count=(int32_t)toc->count;
374ca955 A	285
	286	#if defined (UDATA_DEBUG_DUMP)
	287	/* list the contents of the TOC each time .. not recommended */
4388f060 A	288	for(number=0; number<count; ++number) {
4388f060 A	289	fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName);
374ca955 A	290	}
374ca955 A	291	#endif
4388f060 A	292	number=pointerTOCPrefixBinarySearch(name, toc->entry, count);
	293	if(number>=0) {
	294	/* found it */
374ca955	295	#ifdef UDATA_DEBUG
4388f060	296	fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
374ca955	297	#endif
4388f060 A	298	*pLength=-1;
	299	return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
	300	} else {
374ca955	301	#ifdef UDATA_DEBUG
4388f060	302	fprintf(stderr, "%s: Not found.\n", name);
374ca955	303	#endif
4388f060 A	304	return NULL;
4388f060 A	305	}
b75a7d8f A	306	} else {
	307	return pData->pHeader;
	308	}
	309	}
f3c0d7a5 A	310	U_CDECL_END
f3c0d7a5 A	311
b75a7d8f A	312
	313	static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
	314	static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
	315
	316
	317
	318	/----------------------------------------------------------------------
	319	* *
	320	* checkCommonData Validate the format of a common data file. *
374ca955	321	* Fill in the virtual function ptr based on TOC type *
b75a7d8f A	322	* If the data is invalid, close the UDataMemory *
	323	* and set the appropriate error code. *
	324	* *
	325	----------------------------------------------------------------------/
729e4ab9	326	U_CFUNC void udata_checkCommonData(UDataMemory udm, UErrorCode err) {
b75a7d8f A	327	if (U_FAILURE(*err)) {
	328	return;
	329	}
	330
4388f060 A	331	if(udm==NULL \|\| udm->pHeader==NULL) {
	332	*err=U_INVALID_FORMAT_ERROR;
	333	} else if(!(udm->pHeader->dataHeader.magic1==0xda &&
b75a7d8f A	334	udm->pHeader->dataHeader.magic2==0x27 &&
	335	udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
	336	udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
	337	) {
	338	/* header not valid */
	339	*err=U_INVALID_FORMAT_ERROR;
	340	}
	341	else if (udm->pHeader->info.dataFormat[0]==0x43 &&
	342	udm->pHeader->info.dataFormat[1]==0x6d &&
	343	udm->pHeader->info.dataFormat[2]==0x6e &&
	344	udm->pHeader->info.dataFormat[3]==0x44 &&
	345	udm->pHeader->info.formatVersion[0]==1
	346	) {
	347	/* dataFormat="CmnD" */
	348	udm->vFuncs = &CmnDFuncs;
374ca955	349	udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f A	350	}
	351	else if(udm->pHeader->info.dataFormat[0]==0x54 &&
	352	udm->pHeader->info.dataFormat[1]==0x6f &&
	353	udm->pHeader->info.dataFormat[2]==0x43 &&
	354	udm->pHeader->info.dataFormat[3]==0x50 &&
	355	udm->pHeader->info.formatVersion[0]==1
	356	) {
	357	/* dataFormat="ToCP" */
	358	udm->vFuncs = &ToCPFuncs;
374ca955	359	udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
b75a7d8f A	360	}
	361	else {
	362	/* dataFormat not recognized */
	363	*err=U_INVALID_FORMAT_ERROR;
	364	}
	365
	366	if (U_FAILURE(*err)) {
	367	/* If the data is no good and we memory-mapped it ourselves,
	368	* close the memory mapping so it doesn't leak. Note that this has
	369	* no effect on non-memory mapped data, other than clearing fields in udm.
	370	*/
	371	udata_close(udm);
	372	}
	373	}
	374
374ca955 A	375	/*
	376	* TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
	377	* header but not its sub-items.
	378	* This function will be needed for automatic runtime swapping.
	379	* Sub-items should not be swapped to limit the swapping to the parts of the
	380	* package that are actually used.
	381	*
	382	* Since lengths of items are implicit in the order and offsets of their
	383	* ToC entries, and since offsets are relative to the start of the ToC,
	384	* a swapped version may need to generate a different data structure
	385	* with pointers to the original data items and with their lengths
	386	* (-1 for the last one if it is not known), and maybe even pointers to the
	387	* swapped versions of the items.
	388	* These pointers to swapped versions would establish a cache;
	389	* instead, each open data item could simply own the storage for its swapped
	390	* data. This fits better with the current design.
	391	*
	392	* markus 2003sep18 Jitterbug 2235
	393	*/