[apple/icu.git] / icuSources / i18n / tzdat.h

/*
**********************************************************************
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/24/99    aliu        Creation.
*   12/13/1999  srl         Padded OffsetIndex to 4 byte values
*   02/01/01    aliu        Added country index
**********************************************************************
*/

#ifndef TZDAT_H
#define TZDAT_H

#include "unicode/utypes.h"

/* This file defines the format of the memory-mapped data file
 * containing system time zone data for icu.  See also gentz
 * and tz.pl.
 *
 * The format is designed specifically to allow certain operations:
 *
 * 1. Performing a fast binary search by name, and locating the
 *    corresponding zone data.  This is the most important operation.
 *    It corresponds to the TimeZone::createTimeZone() method.
 *
 * 2. Performing a fast iteration over zones having a specific GMT
 *    offset.  For this operation, the zone data need not be
 *    retrieved, just the IDs.  This corresponds to the
 *    TimeZone::createAvailableIDs(int32_t) method.
 *
 * 3. Iterating over all zone IDs.  This corresponds to the
 *    TimeZone::createAvailableIDs() method.
 *
 * The createAvailableIDs() methods return arrays of pointers to
 * existing static UnicodeString IDs that it owns.  Thus
 * createAvailableIDs() needs a way to reference one of these IDs when
 * iterating.  Note that these IDs are _not_ stored in the
 * memory-mapped data file, so we cannot store offsets.  To solve this
 * problem, we define a canonical index number for each zone.  This
 * index number runs from 0..n-1, where n is the total number of
 * zones.  The name table is stored in index number order, and we
 * provide a table that is sorted by GMT offset with keys being GMT
 * offset values and values being canonical index numbers.
 *
 * (Later, we might change createAvailableIDs() to return char*
 * strings rather than UnicodeString pointers.  In that case, this
 * data structure could be modified to index into the name table
 * directly.)
 *
 * Any field with a name ending in "delta" is an offset value
 * from the first byte of the TZHeader structure, unless otherwise
 * specified.
 *
 * When using the name index table and the offset index table,
 * code can determine whether an indexed zone is a standard
 * zone or a DST zone by examining its delta.  If the delta is
 * less than dstDelta, it is a standard zone.  Otherwise it 
 * is a DST zone.
 */

/* tz.icu data file format
 *
 * Here is the overall structure of the tz.icu file, expressed as a
 * pseudo-C-struct.  Refer to actual struct declarations below for
 * more details on each subelement of tz.icu.  Many of the elements
 * are of variable size.  Padding is used when necessary to align
 * words and longs properly; see structure declarations for details.
 *
 * struct tz.icu {
 *
 *  // The header gives offsets to various tables within tz.icu.
 *
 *  struct TZHeader            header;
 * 
 *  // The equivalency groups; repeated; one element for each
 *  // equivalency group.  Each one is of variable size.  Typically,
 *  // an equivalency group is found given an ID index.  The index is
 *  // used to find an entry of nameToEquiv[].  That entry is added to
 *  // the start of the header to obtain a pointer to one of the
 *  // entries equivTable[i].  The number of equivalency groups (n1)
 *  // is not stored anywhere; it can be discovered by walking the
 *  // table.
 *
 *  struct TZEquivalencyGroup  equivTable[n1];
 * 
 *  // An index which groups timezones having the same raw offset
 *  // together; repeated; one element for each raw offset struct.
 *  // Typically the code walks through this table starting at the
 *  // beginning until the desired index is found or the end of the
 *  // table is reached.  The number of offset groups (n2) is not
 *  // stored anywhere; it can be discovered by walking the table.
 *
 *  struct OffsetIndex         offsetIndex[n2];
 * 
 *  // An index which groups timezones having the same country
 *  // together; repeated; one element for each country.  Typically
 *  // the code walks through this table starting at the beginning
 *  // until the desired country is found or the end of the table is
 *  // reached.  The number of offset groups (n3) is not stored
 *  // anywhere; it can be discovered by walking the table.
 *
 *  struct CountryIndex        countryIndex[n3];
 * 
 *  // An array of offsets, one for each name.  Each offset, when
 *  // added to the start of the header, gives a pointer to an entry
 *  // equivTable[i], the equivalency group struct for the given zone.
 *  // The nubmer of names is given by TZHeader.count.  The order of
 *  // entries is the same as nameTable[].
 *
 *  uint32                     nameToEquiv[header.count];
 * 
 *  // All the time zone IDs, in sorted order, with 0 termination.
 *  // The number of entries is given by TZHeader.count.  The total
 *  // number of characters in this table (n4) is not stored anywhere;
 *  // it can be discovered by walking the table.  The order of
 *  // entries is the same as nameToEquiv[].
 *
 *  char                       nameTable[n4];
 * };
 */

// Information used to identify and validate the data

#define TZ_DATA_NAME "tz"
#define TZ_DATA_TYPE "icu"

#if !UCONFIG_NO_FORMATTING

// Fields in UDataInfo:

// TZ_SIG[] is encoded as numeric literals for compatibility with the HP compiler
static const uint8_t TZ_SIG_0 = 0x7a; // z
static const uint8_t TZ_SIG_1 = 0x6f; // o
static const uint8_t TZ_SIG_2 = 0x6e; // n
static const uint8_t TZ_SIG_3 = 0x65; // e

// This must match the version number at the top of tz.txt as
// well as the version number in the udata header.
static const int8_t TZ_FORMAT_VERSION = 4; // formatVersion[0]

struct TZHeader {    
    uint16_t versionYear;     // e.g. "1999j" -> 1999
    uint16_t versionSuffix;   // e.g. "1999j" -> 10

    uint32_t count;           // standardCount + dstCount

    uint32_t equivTableDelta;  // delta to equivalency group table
    uint32_t offsetIndexDelta; // delta to gmtOffset index table

    uint32_t countryIndexDelta; // delta to country code index table

    uint32_t nameIndexDelta;   // delta to name index table
    // The name index table is an array of 'count' 32-bit offsets from
    // the start of this header to equivalency group table entries.

    uint32_t nameTableDelta;   // delta to name (aka ID) table
    // The name table contains all zone IDs, in sort order, each name
    // terminated by a zero byte.
};

struct StandardZone {
    int32_t  gmtOffset;   // gmt offset in milliseconds
};

struct TZRule {
    uint8_t  month;  // month
    int8_t   dowim;  // dowim
    int8_t   dow;    // dow
    uint16_t time;   // time in minutes
    int8_t   mode;   // (w/s/u) == TimeZone::TimeMode enum as int
};

struct DSTZone {
    int32_t  gmtOffset;   // gmtoffset in milliseconds
    uint16_t dstSavings;  // savings in minutes
    TZRule   onsetRule;   // onset rule
    TZRule   ceaseRule;   // cease rule
};

/**
 * This variable-sized struct represents a time zone equivalency group.
 * This is a set of one or more zones that are identical in GMT offset
 * and rules, but differ in ID.  The struct has a variable size because
 * the standard zone has no rule data, and also because it contains a
 * variable number of index values listing the zones in the group.
 * The struct is padded to take up 4n bytes so that 4-byte integers
 * within the struct stay 4-aligned (namely, the gmtOffset members of
 * the zone structs).
 */
struct TZEquivalencyGroup {
    uint16_t nextEntryDelta;    // 0 for last entry
    uint8_t  isDST;             // != 0 for DSTZone
    uint8_t  reserved;
    union {
        struct {
            StandardZone zone;
            uint16_t     count;
            uint16_t     index; // There are actually 'count' uint16_t's here
        } s;
        struct {
            DSTZone      zone;
            uint16_t     count;
            uint16_t     index; // There are actually 'count' uint16_t's here
        } d;
    } u;
    // There may be two bytes of padding HERE to make the whole struct
    // have size 4n bytes.
};

/**
 * This variable-sized struct makes up the offset index table.  To get
 * from one table entry to the next, add the nextEntryDelta.  If the
 * nextEntryDelta is zero then this is the last entry.  The offset
 * index table is designed for sequential access, not random access.
 * Given the small number of distinct offsets (39 in 1999j), this
 * suffices.
 *
 * The value of default is the zone within this list that should be
 * selected as the default zone in the absence of any other
 * discriminating information.  This information comes from the file
 * tz.default.  Note that this is itself a zone number, like
 * those in the array starting at &zoneNumber.
 *
 * The gmtOffset field must be 4-aligned for some architectures.  To
 * ensure this, we do two things: 1. The entire struct is 4-aligned.
 * 2. The gmtOffset is placed at a 4-aligned position within the
 * struct.  3. The size of the whole structure is padded out to 4n
 * bytes.  We achieve this last condition by adding two bytes of
 * padding after the last zoneNumber, if count is _even_.  That is,
 * the struct size is 10+2count+padding, where padding is (count%2==0
 * ? 2:0).  See gentz for implementation.
 */
struct OffsetIndex {
    int32_t   gmtOffset;  // in ms - 4-aligned
    uint16_t  nextEntryDelta;
    uint16_t  defaultZone; // a zone number from 0..TZHeader.count-1
    uint16_t  count;
    uint16_t  zoneNumber; // There are actually 'count' uint16_t's here
    // Following the 'count' uint16_t's starting with zoneNumber,
    // there may be two bytes of padding to make the whole struct have
    // a size of 4n.  nextEntryDelta skips over any padding.
};

/**
 * This variable-sized struct makes up the country index table.  To get
 * from one table entry to the next, add the nextEntryDelta.  If the
 * nextEntryDelta is zero then this is the last entry.  The country
 * index table is designed for sequential access, not random access.
 *
 * The intcode is an integer representation of the two-letter country
 * code.  It is computed as (c1-'A')*32 + (c0-'A') where the country
 * code is a two-character string c1 c0, 'A' <= ci <= 'Z'.
 *
 * There are no 4-byte integers in this table, so we don't 4-align the
 * entries.
 */
struct CountryIndex {
    uint16_t  intcode; // see above
    uint16_t  nextEntryDelta;
    uint16_t  count;
    uint16_t  zoneNumber; // There are actually 'count' uint16_t's here
};

#endif /* #if !UCONFIG_NO_FORMATTING */

#endif
Commit	Line	Data
b75a7d8f A	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1999-2003, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* Date Name Description
	7	* 11/24/99 aliu Creation.
	8	* 12/13/1999 srl Padded OffsetIndex to 4 byte values
	9	* 02/01/01 aliu Added country index
	10	**********************************************************************
	11	*/
	12
	13	#ifndef TZDAT_H
	14	#define TZDAT_H
	15
	16	#include "unicode/utypes.h"
	17
	18	/* This file defines the format of the memory-mapped data file
	19	* containing system time zone data for icu. See also gentz
	20	* and tz.pl.
	21	*
	22	* The format is designed specifically to allow certain operations:
	23	*
	24	* 1. Performing a fast binary search by name, and locating the
	25	* corresponding zone data. This is the most important operation.
	26	* It corresponds to the TimeZone::createTimeZone() method.
	27	*
	28	* 2. Performing a fast iteration over zones having a specific GMT
	29	* offset. For this operation, the zone data need not be
	30	* retrieved, just the IDs. This corresponds to the
	31	* TimeZone::createAvailableIDs(int32_t) method.
	32	*
	33	* 3. Iterating over all zone IDs. This corresponds to the
	34	* TimeZone::createAvailableIDs() method.
	35	*
	36	* The createAvailableIDs() methods return arrays of pointers to
	37	* existing static UnicodeString IDs that it owns. Thus
	38	* createAvailableIDs() needs a way to reference one of these IDs when
	39	* iterating. Note that these IDs are _not_ stored in the
	40	* memory-mapped data file, so we cannot store offsets. To solve this
	41	* problem, we define a canonical index number for each zone. This
	42	* index number runs from 0..n-1, where n is the total number of
	43	* zones. The name table is stored in index number order, and we
	44	* provide a table that is sorted by GMT offset with keys being GMT
	45	* offset values and values being canonical index numbers.
	46	*
	47	* (Later, we might change createAvailableIDs() to return char*
	48	* strings rather than UnicodeString pointers. In that case, this
	49	* data structure could be modified to index into the name table
	50	* directly.)
	51	*
	52	* Any field with a name ending in "delta" is an offset value
	53	* from the first byte of the TZHeader structure, unless otherwise
	54	* specified.
	55	*
	56	* When using the name index table and the offset index table,
	57	* code can determine whether an indexed zone is a standard
	58	* zone or a DST zone by examining its delta. If the delta is
	59	* less than dstDelta, it is a standard zone. Otherwise it
	60	* is a DST zone.
	61	*/
	62
	63	/* tz.icu data file format
	64	*
65	* Here is the overall structure of the tz.icu file, expressed as a
66	* pseudo-C-struct. Refer to actual struct declarations below for
67	* more details on each subelement of tz.icu. Many of the elements
68	* are of variable size. Padding is used when necessary to align
69	* words and longs properly; see structure declarations for details.
70	*
71	* struct tz.icu {
72	*
73	* // The header gives offsets to various tables within tz.icu.
74	*
75	* struct TZHeader header;
76	*
77	* // The equivalency groups; repeated; one element for each
78	* // equivalency group. Each one is of variable size. Typically,
79	* // an equivalency group is found given an ID index. The index is
80	* // used to find an entry of nameToEquiv[]. That entry is added to
81	* // the start of the header to obtain a pointer to one of the
82	* // entries equivTable[i]. The number of equivalency groups (n1)
83	* // is not stored anywhere; it can be discovered by walking the
84	* // table.
85	*
86	* struct TZEquivalencyGroup equivTable[n1];
87	*
88	* // An index which groups timezones having the same raw offset
89	* // together; repeated; one element for each raw offset struct.
90	* // Typically the code walks through this table starting at the
91	* // beginning until the desired index is found or the end of the
92	* // table is reached. The number of offset groups (n2) is not
93	* // stored anywhere; it can be discovered by walking the table.
94	*
95	* struct OffsetIndex offsetIndex[n2];
96	*
97	* // An index which groups timezones having the same country
98	* // together; repeated; one element for each country. Typically
99	* // the code walks through this table starting at the beginning
100	* // until the desired country is found or the end of the table is
101	* // reached. The number of offset groups (n3) is not stored
102	* // anywhere; it can be discovered by walking the table.
103	*
104	* struct CountryIndex countryIndex[n3];
105	*
106	* // An array of offsets, one for each name. Each offset, when
107	* // added to the start of the header, gives a pointer to an entry
108	* // equivTable[i], the equivalency group struct for the given zone.
109	* // The nubmer of names is given by TZHeader.count. The order of
110	* // entries is the same as nameTable[].
111	*
112	* uint32 nameToEquiv[header.count];
113	*
114	* // All the time zone IDs, in sorted order, with 0 termination.
115	* // The number of entries is given by TZHeader.count. The total
116	* // number of characters in this table (n4) is not stored anywhere;
117	* // it can be discovered by walking the table. The order of
118	* // entries is the same as nameToEquiv[].
119	*
120	* char nameTable[n4];
121	* };
122	*/
123
124	// Information used to identify and validate the data
125
126	#define TZ_DATA_NAME "tz"
127	#define TZ_DATA_TYPE "icu"
128
129	#if !UCONFIG_NO_FORMATTING
130
131	// Fields in UDataInfo:
132
133	// TZ_SIG[] is encoded as numeric literals for compatibility with the HP compiler
134	static const uint8_t TZ_SIG_0 = 0x7a; // z
135	static const uint8_t TZ_SIG_1 = 0x6f; // o
136	static const uint8_t TZ_SIG_2 = 0x6e; // n
137	static const uint8_t TZ_SIG_3 = 0x65; // e
138
139	// This must match the version number at the top of tz.txt as
140	// well as the version number in the udata header.
141	static const int8_t TZ_FORMAT_VERSION = 4; // formatVersion[0]
142
143	struct TZHeader {
144	uint16_t versionYear; // e.g. "1999j" -> 1999
145	uint16_t versionSuffix; // e.g. "1999j" -> 10
146
147	uint32_t count; // standardCount + dstCount
148
149	uint32_t equivTableDelta; // delta to equivalency group table
150	uint32_t offsetIndexDelta; // delta to gmtOffset index table
151
152	uint32_t countryIndexDelta; // delta to country code index table
153
154	uint32_t nameIndexDelta; // delta to name index table
155	// The name index table is an array of 'count' 32-bit offsets from
156	// the start of this header to equivalency group table entries.
157
158	uint32_t nameTableDelta; // delta to name (aka ID) table
159	// The name table contains all zone IDs, in sort order, each name
160	// terminated by a zero byte.
161	};
162
163	struct StandardZone {
164	int32_t gmtOffset; // gmt offset in milliseconds
165	};
166
167	struct TZRule {
168	uint8_t month; // month
169	int8_t dowim; // dowim
170	int8_t dow; // dow
171	uint16_t time; // time in minutes
172	int8_t mode; // (w/s/u) == TimeZone::TimeMode enum as int
173	};
174
175	struct DSTZone {
176	int32_t gmtOffset; // gmtoffset in milliseconds
177	uint16_t dstSavings; // savings in minutes
178	TZRule onsetRule; // onset rule
179	TZRule ceaseRule; // cease rule
180	};
181
182	/**
183	* This variable-sized struct represents a time zone equivalency group.
184	* This is a set of one or more zones that are identical in GMT offset
185	* and rules, but differ in ID. The struct has a variable size because
186	* the standard zone has no rule data, and also because it contains a
187	* variable number of index values listing the zones in the group.
188	* The struct is padded to take up 4n bytes so that 4-byte integers
189	* within the struct stay 4-aligned (namely, the gmtOffset members of
190	* the zone structs).
191	*/
192	struct TZEquivalencyGroup {
193	uint16_t nextEntryDelta; // 0 for last entry
194	uint8_t isDST; // != 0 for DSTZone
195	uint8_t reserved;
196	union {
197	struct {
198	StandardZone zone;
199	uint16_t count;
200	uint16_t index; // There are actually 'count' uint16_t's here
201	} s;
202	struct {
203	DSTZone zone;
204	uint16_t count;
205	uint16_t index; // There are actually 'count' uint16_t's here
206	} d;
207	} u;
208	// There may be two bytes of padding HERE to make the whole struct
209	// have size 4n bytes.
210	};
211
212	/**
213	* This variable-sized struct makes up the offset index table. To get
214	* from one table entry to the next, add the nextEntryDelta. If the
215	* nextEntryDelta is zero then this is the last entry. The offset
216	* index table is designed for sequential access, not random access.
217	* Given the small number of distinct offsets (39 in 1999j), this
218	* suffices.
219	*
220	* The value of default is the zone within this list that should be
221	* selected as the default zone in the absence of any other
222	* discriminating information. This information comes from the file
223	* tz.default. Note that this is itself a zone number, like
224	* those in the array starting at &zoneNumber.
225	*
226	* The gmtOffset field must be 4-aligned for some architectures. To
227	* ensure this, we do two things: 1. The entire struct is 4-aligned.
228	* 2. The gmtOffset is placed at a 4-aligned position within the
229	* struct. 3. The size of the whole structure is padded out to 4n
230	* bytes. We achieve this last condition by adding two bytes of
231	* padding after the last zoneNumber, if count is _even_. That is,
232	* the struct size is 10+2count+padding, where padding is (count%2==0
233	* ? 2:0). See gentz for implementation.
234	*/
235	struct OffsetIndex {
236	int32_t gmtOffset; // in ms - 4-aligned
237	uint16_t nextEntryDelta;
238	uint16_t defaultZone; // a zone number from 0..TZHeader.count-1
239	uint16_t count;
240	uint16_t zoneNumber; // There are actually 'count' uint16_t's here
241	// Following the 'count' uint16_t's starting with zoneNumber,
242	// there may be two bytes of padding to make the whole struct have
243	// a size of 4n. nextEntryDelta skips over any padding.
244	};
245
246	/**
247	* This variable-sized struct makes up the country index table. To get
248	* from one table entry to the next, add the nextEntryDelta. If the
249	* nextEntryDelta is zero then this is the last entry. The country
250	* index table is designed for sequential access, not random access.
251	*
252	* The intcode is an integer representation of the two-letter country
253	* code. It is computed as (c1-'A')*32 + (c0-'A') where the country
254	* code is a two-character string c1 c0, 'A' <= ci <= 'Z'.
255	*
256	* There are no 4-byte integers in this table, so we don't 4-align the
257	* entries.
258	*/
259	struct CountryIndex {
260	uint16_t intcode; // see above
261	uint16_t nextEntryDelta;
262	uint16_t count;
263	uint16_t zoneNumber; // There are actually 'count' uint16_t's here
264	};
265
266	#endif /* #if !UCONFIG_NO_FORMATTING */
267
268	#endif