]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/tzdat.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / i18n / tzdat.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (C) 1999-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/24/99 aliu Creation.
8* 12/13/1999 srl Padded OffsetIndex to 4 byte values
9* 02/01/01 aliu Added country index
10**********************************************************************
11*/
12
13#ifndef TZDAT_H
14#define TZDAT_H
15
16#include "unicode/utypes.h"
17
18/* This file defines the format of the memory-mapped data file
19 * containing system time zone data for icu. See also gentz
20 * and tz.pl.
21 *
22 * The format is designed specifically to allow certain operations:
23 *
24 * 1. Performing a fast binary search by name, and locating the
25 * corresponding zone data. This is the most important operation.
26 * It corresponds to the TimeZone::createTimeZone() method.
27 *
28 * 2. Performing a fast iteration over zones having a specific GMT
29 * offset. For this operation, the zone data need not be
30 * retrieved, just the IDs. This corresponds to the
31 * TimeZone::createAvailableIDs(int32_t) method.
32 *
33 * 3. Iterating over all zone IDs. This corresponds to the
34 * TimeZone::createAvailableIDs() method.
35 *
36 * The createAvailableIDs() methods return arrays of pointers to
37 * existing static UnicodeString IDs that it owns. Thus
38 * createAvailableIDs() needs a way to reference one of these IDs when
39 * iterating. Note that these IDs are _not_ stored in the
40 * memory-mapped data file, so we cannot store offsets. To solve this
41 * problem, we define a canonical index number for each zone. This
42 * index number runs from 0..n-1, where n is the total number of
43 * zones. The name table is stored in index number order, and we
44 * provide a table that is sorted by GMT offset with keys being GMT
45 * offset values and values being canonical index numbers.
46 *
47 * (Later, we might change createAvailableIDs() to return char*
48 * strings rather than UnicodeString pointers. In that case, this
49 * data structure could be modified to index into the name table
50 * directly.)
51 *
52 * Any field with a name ending in "delta" is an offset value
53 * from the first byte of the TZHeader structure, unless otherwise
54 * specified.
55 *
56 * When using the name index table and the offset index table,
57 * code can determine whether an indexed zone is a standard
58 * zone or a DST zone by examining its delta. If the delta is
59 * less than dstDelta, it is a standard zone. Otherwise it
60 * is a DST zone.
61 */
62
63/* tz.icu data file format
64 *
65 * Here is the overall structure of the tz.icu file, expressed as a
66 * pseudo-C-struct. Refer to actual struct declarations below for
67 * more details on each subelement of tz.icu. Many of the elements
68 * are of variable size. Padding is used when necessary to align
69 * words and longs properly; see structure declarations for details.
70 *
71 * struct tz.icu {
72 *
73 * // The header gives offsets to various tables within tz.icu.
74 *
75 * struct TZHeader header;
76 *
77 * // The equivalency groups; repeated; one element for each
78 * // equivalency group. Each one is of variable size. Typically,
79 * // an equivalency group is found given an ID index. The index is
80 * // used to find an entry of nameToEquiv[]. That entry is added to
81 * // the start of the header to obtain a pointer to one of the
82 * // entries equivTable[i]. The number of equivalency groups (n1)
83 * // is not stored anywhere; it can be discovered by walking the
84 * // table.
85 *
86 * struct TZEquivalencyGroup equivTable[n1];
87 *
88 * // An index which groups timezones having the same raw offset
89 * // together; repeated; one element for each raw offset struct.
90 * // Typically the code walks through this table starting at the
91 * // beginning until the desired index is found or the end of the
92 * // table is reached. The number of offset groups (n2) is not
93 * // stored anywhere; it can be discovered by walking the table.
94 *
95 * struct OffsetIndex offsetIndex[n2];
96 *
97 * // An index which groups timezones having the same country
98 * // together; repeated; one element for each country. Typically
99 * // the code walks through this table starting at the beginning
100 * // until the desired country is found or the end of the table is
101 * // reached. The number of offset groups (n3) is not stored
102 * // anywhere; it can be discovered by walking the table.
103 *
104 * struct CountryIndex countryIndex[n3];
105 *
106 * // An array of offsets, one for each name. Each offset, when
107 * // added to the start of the header, gives a pointer to an entry
108 * // equivTable[i], the equivalency group struct for the given zone.
109 * // The nubmer of names is given by TZHeader.count. The order of
110 * // entries is the same as nameTable[].
111 *
112 * uint32 nameToEquiv[header.count];
113 *
114 * // All the time zone IDs, in sorted order, with 0 termination.
115 * // The number of entries is given by TZHeader.count. The total
116 * // number of characters in this table (n4) is not stored anywhere;
117 * // it can be discovered by walking the table. The order of
118 * // entries is the same as nameToEquiv[].
119 *
120 * char nameTable[n4];
121 * };
122 */
123
124// Information used to identify and validate the data
125
126#define TZ_DATA_NAME "tz"
127#define TZ_DATA_TYPE "icu"
128
129#if !UCONFIG_NO_FORMATTING
130
131// Fields in UDataInfo:
132
133// TZ_SIG[] is encoded as numeric literals for compatibility with the HP compiler
134static const uint8_t TZ_SIG_0 = 0x7a; // z
135static const uint8_t TZ_SIG_1 = 0x6f; // o
136static const uint8_t TZ_SIG_2 = 0x6e; // n
137static const uint8_t TZ_SIG_3 = 0x65; // e
138
139// This must match the version number at the top of tz.txt as
140// well as the version number in the udata header.
141static const int8_t TZ_FORMAT_VERSION = 4; // formatVersion[0]
142
143struct TZHeader {
144 uint16_t versionYear; // e.g. "1999j" -> 1999
145 uint16_t versionSuffix; // e.g. "1999j" -> 10
146
147 uint32_t count; // standardCount + dstCount
148
149 uint32_t equivTableDelta; // delta to equivalency group table
150 uint32_t offsetIndexDelta; // delta to gmtOffset index table
151
152 uint32_t countryIndexDelta; // delta to country code index table
153
154 uint32_t nameIndexDelta; // delta to name index table
155 // The name index table is an array of 'count' 32-bit offsets from
156 // the start of this header to equivalency group table entries.
157
158 uint32_t nameTableDelta; // delta to name (aka ID) table
159 // The name table contains all zone IDs, in sort order, each name
160 // terminated by a zero byte.
161};
162
163struct StandardZone {
164 int32_t gmtOffset; // gmt offset in milliseconds
165};
166
167struct TZRule {
168 uint8_t month; // month
169 int8_t dowim; // dowim
170 int8_t dow; // dow
171 uint16_t time; // time in minutes
172 int8_t mode; // (w/s/u) == TimeZone::TimeMode enum as int
173};
174
175struct DSTZone {
176 int32_t gmtOffset; // gmtoffset in milliseconds
177 uint16_t dstSavings; // savings in minutes
178 TZRule onsetRule; // onset rule
179 TZRule ceaseRule; // cease rule
180};
181
182/**
183 * This variable-sized struct represents a time zone equivalency group.
184 * This is a set of one or more zones that are identical in GMT offset
185 * and rules, but differ in ID. The struct has a variable size because
186 * the standard zone has no rule data, and also because it contains a
187 * variable number of index values listing the zones in the group.
188 * The struct is padded to take up 4n bytes so that 4-byte integers
189 * within the struct stay 4-aligned (namely, the gmtOffset members of
190 * the zone structs).
191 */
192struct TZEquivalencyGroup {
193 uint16_t nextEntryDelta; // 0 for last entry
194 uint8_t isDST; // != 0 for DSTZone
195 uint8_t reserved;
196 union {
197 struct {
198 StandardZone zone;
199 uint16_t count;
200 uint16_t index; // There are actually 'count' uint16_t's here
201 } s;
202 struct {
203 DSTZone zone;
204 uint16_t count;
205 uint16_t index; // There are actually 'count' uint16_t's here
206 } d;
207 } u;
208 // There may be two bytes of padding HERE to make the whole struct
209 // have size 4n bytes.
210};
211
212/**
213 * This variable-sized struct makes up the offset index table. To get
214 * from one table entry to the next, add the nextEntryDelta. If the
215 * nextEntryDelta is zero then this is the last entry. The offset
216 * index table is designed for sequential access, not random access.
217 * Given the small number of distinct offsets (39 in 1999j), this
218 * suffices.
219 *
220 * The value of default is the zone within this list that should be
221 * selected as the default zone in the absence of any other
222 * discriminating information. This information comes from the file
223 * tz.default. Note that this is itself a zone number, like
224 * those in the array starting at &zoneNumber.
225 *
226 * The gmtOffset field must be 4-aligned for some architectures. To
227 * ensure this, we do two things: 1. The entire struct is 4-aligned.
228 * 2. The gmtOffset is placed at a 4-aligned position within the
229 * struct. 3. The size of the whole structure is padded out to 4n
230 * bytes. We achieve this last condition by adding two bytes of
231 * padding after the last zoneNumber, if count is _even_. That is,
232 * the struct size is 10+2count+padding, where padding is (count%2==0
233 * ? 2:0). See gentz for implementation.
234 */
235struct OffsetIndex {
236 int32_t gmtOffset; // in ms - 4-aligned
237 uint16_t nextEntryDelta;
238 uint16_t defaultZone; // a zone number from 0..TZHeader.count-1
239 uint16_t count;
240 uint16_t zoneNumber; // There are actually 'count' uint16_t's here
241 // Following the 'count' uint16_t's starting with zoneNumber,
242 // there may be two bytes of padding to make the whole struct have
243 // a size of 4n. nextEntryDelta skips over any padding.
244};
245
246/**
247 * This variable-sized struct makes up the country index table. To get
248 * from one table entry to the next, add the nextEntryDelta. If the
249 * nextEntryDelta is zero then this is the last entry. The country
250 * index table is designed for sequential access, not random access.
251 *
252 * The intcode is an integer representation of the two-letter country
253 * code. It is computed as (c1-'A')*32 + (c0-'A') where the country
254 * code is a two-character string c1 c0, 'A' <= ci <= 'Z'.
255 *
256 * There are no 4-byte integers in this table, so we don't 4-align the
257 * entries.
258 */
259struct CountryIndex {
260 uint16_t intcode; // see above
261 uint16_t nextEntryDelta;
262 uint16_t count;
263 uint16_t zoneNumber; // There are actually 'count' uint16_t's here
264};
265
266#endif /* #if !UCONFIG_NO_FORMATTING */
267
268#endif