icuSources/i18n/tzdat.h

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1999-2003, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   Date        Name        Description
   7 *   11/24/99    aliu        Creation.
   8 *   12/13/1999  srl         Padded OffsetIndex to 4 byte values
   9 *   02/01/01    aliu        Added country index
  10 **********************************************************************
  11 */
  12
  13 #ifndef TZDAT_H
  14 #define TZDAT_H
  15
  16 #include "unicode/utypes.h"
  17
  18 /* This file defines the format of the memory-mapped data file
  19  * containing system time zone data for icu.  See also gentz
  20  * and tz.pl.
  21  *
  22  * The format is designed specifically to allow certain operations:
  23  *
  24  * 1. Performing a fast binary search by name, and locating the
  25  *    corresponding zone data.  This is the most important operation.
  26  *    It corresponds to the TimeZone::createTimeZone() method.
  27  *
  28  * 2. Performing a fast iteration over zones having a specific GMT
  29  *    offset.  For this operation, the zone data need not be
  30  *    retrieved, just the IDs.  This corresponds to the
  31  *    TimeZone::createAvailableIDs(int32_t) method.
  32  *
  33  * 3. Iterating over all zone IDs.  This corresponds to the
  34  *    TimeZone::createAvailableIDs() method.
  35  *
  36  * The createAvailableIDs() methods return arrays of pointers to
  37  * existing static UnicodeString IDs that it owns.  Thus
  38  * createAvailableIDs() needs a way to reference one of these IDs when
  39  * iterating.  Note that these IDs are _not_ stored in the
  40  * memory-mapped data file, so we cannot store offsets.  To solve this
  41  * problem, we define a canonical index number for each zone.  This
  42  * index number runs from 0..n-1, where n is the total number of
  43  * zones.  The name table is stored in index number order, and we
  44  * provide a table that is sorted by GMT offset with keys being GMT
  45  * offset values and values being canonical index numbers.
  46  *
  47  * (Later, we might change createAvailableIDs() to return char*
  48  * strings rather than UnicodeString pointers.  In that case, this
  49  * data structure could be modified to index into the name table
  50  * directly.)
  51  *
  52  * Any field with a name ending in "delta" is an offset value
  53  * from the first byte of the TZHeader structure, unless otherwise
  54  * specified.
  55  *
  56  * When using the name index table and the offset index table,
  57  * code can determine whether an indexed zone is a standard
  58  * zone or a DST zone by examining its delta.  If the delta is
  59  * less than dstDelta, it is a standard zone.  Otherwise it
  60  * is a DST zone.
  61  */
  62
  63 /* tz.icu data file format
  64  *
  65  * Here is the overall structure of the tz.icu file, expressed as a
  66  * pseudo-C-struct.  Refer to actual struct declarations below for
  67  * more details on each subelement of tz.icu.  Many of the elements
  68  * are of variable size.  Padding is used when necessary to align
  69  * words and longs properly; see structure declarations for details.
  70  *
  71  * struct tz.icu {
  72  *
  73  *  // The header gives offsets to various tables within tz.icu.
  74  *
  75  *  struct TZHeader            header;
  76  *
  77  *  // The equivalency groups; repeated; one element for each
  78  *  // equivalency group.  Each one is of variable size.  Typically,
  79  *  // an equivalency group is found given an ID index.  The index is
  80  *  // used to find an entry of nameToEquiv[].  That entry is added to
  81  *  // the start of the header to obtain a pointer to one of the
  82  *  // entries equivTable[i].  The number of equivalency groups (n1)
  83  *  // is not stored anywhere; it can be discovered by walking the
  84  *  // table.
  85  *
  86  *  struct TZEquivalencyGroup  equivTable[n1];
  87  *
  88  *  // An index which groups timezones having the same raw offset
  89  *  // together; repeated; one element for each raw offset struct.
  90  *  // Typically the code walks through this table starting at the
  91  *  // beginning until the desired index is found or the end of the
  92  *  // table is reached.  The number of offset groups (n2) is not
  93  *  // stored anywhere; it can be discovered by walking the table.
  94  *
  95  *  struct OffsetIndex         offsetIndex[n2];
  96  *
  97  *  // An index which groups timezones having the same country
  98  *  // together; repeated; one element for each country.  Typically
  99  *  // the code walks through this table starting at the beginning
 100  *  // until the desired country is found or the end of the table is
 101  *  // reached.  The number of offset groups (n3) is not stored
 102  *  // anywhere; it can be discovered by walking the table.
 103  *
 104  *  struct CountryIndex        countryIndex[n3];
 105  *
 106  *  // An array of offsets, one for each name.  Each offset, when
 107  *  // added to the start of the header, gives a pointer to an entry
 108  *  // equivTable[i], the equivalency group struct for the given zone.
 109  *  // The nubmer of names is given by TZHeader.count.  The order of
 110  *  // entries is the same as nameTable[].
 111  *
 112  *  uint32                     nameToEquiv[header.count];
 113  *
 114  *  // All the time zone IDs, in sorted order, with 0 termination.
 115  *  // The number of entries is given by TZHeader.count.  The total
 116  *  // number of characters in this table (n4) is not stored anywhere;
 117  *  // it can be discovered by walking the table.  The order of
 118  *  // entries is the same as nameToEquiv[].
 119  *
 120  *  char                       nameTable[n4];
 121  * };
 122  */
 123
 124 // Information used to identify and validate the data
 125
 126 #define TZ_DATA_NAME "tz"
 127 #define TZ_DATA_TYPE "icu"
 128
 129 #if !UCONFIG_NO_FORMATTING
 130
 131 // Fields in UDataInfo:
 132
 133 // TZ_SIG[] is encoded as numeric literals for compatibility with the HP compiler
 134 static const uint8_t TZ_SIG_0 = 0x7a; // z
 135 static const uint8_t TZ_SIG_1 = 0x6f; // o
 136 static const uint8_t TZ_SIG_2 = 0x6e; // n
 137 static const uint8_t TZ_SIG_3 = 0x65; // e
 138
 139 // This must match the version number at the top of tz.txt as
 140 // well as the version number in the udata header.
 141 static const int8_t TZ_FORMAT_VERSION = 4; // formatVersion[0]
 142
 143 struct TZHeader {
 144     uint16_t versionYear;     // e.g. "1999j" -> 1999
 145     uint16_t versionSuffix;   // e.g. "1999j" -> 10
 146
 147     uint32_t count;           // standardCount + dstCount
 148
 149     uint32_t equivTableDelta;  // delta to equivalency group table
 150     uint32_t offsetIndexDelta; // delta to gmtOffset index table
 151
 152     uint32_t countryIndexDelta; // delta to country code index table
 153
 154     uint32_t nameIndexDelta;   // delta to name index table
 155     // The name index table is an array of 'count' 32-bit offsets from
 156     // the start of this header to equivalency group table entries.
 157
 158     uint32_t nameTableDelta;   // delta to name (aka ID) table
 159     // The name table contains all zone IDs, in sort order, each name
 160     // terminated by a zero byte.
 161 };
 162
 163 struct StandardZone {
 164     int32_t  gmtOffset;   // gmt offset in milliseconds
 165 };
 166
 167 struct TZRule {
 168     uint8_t  month;  // month
 169     int8_t   dowim;  // dowim
 170     int8_t   dow;    // dow
 171     uint16_t time;   // time in minutes
 172     int8_t   mode;   // (w/s/u) == TimeZone::TimeMode enum as int
 173 };
 174
 175 struct DSTZone {
 176     int32_t  gmtOffset;   // gmtoffset in milliseconds
 177     uint16_t dstSavings;  // savings in minutes
 178     TZRule   onsetRule;   // onset rule
 179     TZRule   ceaseRule;   // cease rule
 180 };
 181
 182 /**
 183  * This variable-sized struct represents a time zone equivalency group.
 184  * This is a set of one or more zones that are identical in GMT offset
 185  * and rules, but differ in ID.  The struct has a variable size because
 186  * the standard zone has no rule data, and also because it contains a
 187  * variable number of index values listing the zones in the group.
 188  * The struct is padded to take up 4n bytes so that 4-byte integers
 189  * within the struct stay 4-aligned (namely, the gmtOffset members of
 190  * the zone structs).
 191  */
 192 struct TZEquivalencyGroup {
 193     uint16_t nextEntryDelta;    // 0 for last entry
 194     uint8_t  isDST;             // != 0 for DSTZone
 195     uint8_t  reserved;
 196     union {
 197         struct {
 198             StandardZone zone;
 199             uint16_t     count;
 200             uint16_t     index; // There are actually 'count' uint16_t's here
 201         } s;
 202         struct {
 203             DSTZone      zone;
 204             uint16_t     count;
 205             uint16_t     index; // There are actually 'count' uint16_t's here
 206         } d;
 207     } u;
 208     // There may be two bytes of padding HERE to make the whole struct
 209     // have size 4n bytes.
 210 };
 211
 212 /**
 213  * This variable-sized struct makes up the offset index table.  To get
 214  * from one table entry to the next, add the nextEntryDelta.  If the
 215  * nextEntryDelta is zero then this is the last entry.  The offset
 216  * index table is designed for sequential access, not random access.
 217  * Given the small number of distinct offsets (39 in 1999j), this
 218  * suffices.
 219  *
 220  * The value of default is the zone within this list that should be
 221  * selected as the default zone in the absence of any other
 222  * discriminating information.  This information comes from the file
 223  * tz.default.  Note that this is itself a zone number, like
 224  * those in the array starting at &zoneNumber.
 225  *
 226  * The gmtOffset field must be 4-aligned for some architectures.  To
 227  * ensure this, we do two things: 1. The entire struct is 4-aligned.
 228  * 2. The gmtOffset is placed at a 4-aligned position within the
 229  * struct.  3. The size of the whole structure is padded out to 4n
 230  * bytes.  We achieve this last condition by adding two bytes of
 231  * padding after the last zoneNumber, if count is _even_.  That is,
 232  * the struct size is 10+2count+padding, where padding is (count%2==0
 233  * ? 2:0).  See gentz for implementation.
 234  */
 235 struct OffsetIndex {
 236     int32_t   gmtOffset;  // in ms - 4-aligned
 237     uint16_t  nextEntryDelta;
 238     uint16_t  defaultZone; // a zone number from 0..TZHeader.count-1
 239     uint16_t  count;
 240     uint16_t  zoneNumber; // There are actually 'count' uint16_t's here
 241     // Following the 'count' uint16_t's starting with zoneNumber,
 242     // there may be two bytes of padding to make the whole struct have
 243     // a size of 4n.  nextEntryDelta skips over any padding.
 244 };
 245
 246 /**
 247  * This variable-sized struct makes up the country index table.  To get
 248  * from one table entry to the next, add the nextEntryDelta.  If the
 249  * nextEntryDelta is zero then this is the last entry.  The country
 250  * index table is designed for sequential access, not random access.
 251  *
 252  * The intcode is an integer representation of the two-letter country
 253  * code.  It is computed as (c1-'A')*32 + (c0-'A') where the country
 254  * code is a two-character string c1 c0, 'A' <= ci <= 'Z'.
 255  *
 256  * There are no 4-byte integers in this table, so we don't 4-align the
 257  * entries.
 258  */
 259 struct CountryIndex {
 260     uint16_t  intcode; // see above
 261     uint16_t  nextEntryDelta;
 262     uint16_t  count;
 263     uint16_t  zoneNumber; // There are actually 'count' uint16_t's here
 264 };
 265
 266 #endif /* #if !UCONFIG_NO_FORMATTING */
 267
 268 #endif