2 **********************************************************************
3 * Copyright (c) 2003-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: July 10 2003
9 **********************************************************************
11 #include "tzfile.h" // from Olson tzcode archive, copied to this dir
16 #undef min // windows.h/STL conflict
17 #undef max // windows.h/STL conflict
18 // "identifier was truncated to 'number' characters" warning
19 #pragma warning(disable: 4786)
51 //--------------------------------------------------------------------
53 //--------------------------------------------------------------------
55 const long SECS_PER_YEAR
= 31536000; // 365 days
56 const long SECS_PER_LEAP_YEAR
= 31622400; // 366 days
59 return (y%4
== 0) && ((y%100
!= 0) || (y%400
== 0)); // Gregorian
62 long secsPerYear(int y
) {
63 return isLeap(y
) ? SECS_PER_LEAP_YEAR
: SECS_PER_YEAR
;
67 * Given a calendar year, return the GMT epoch seconds for midnight
68 * GMT of January 1 of that year. yearToSeconds(1970) == 0.
70 long yearToSeconds(int year
) {
71 // inefficient but foolproof
75 s
+= secsPerYear(y
++);
78 s
-= secsPerYear(--y
);
84 * Given 1970 GMT epoch seconds, return the calendar year containing
85 * that time. secondsToYear(0) == 1970.
87 int secondsToYear(long seconds
) {
88 // inefficient but foolproof
93 s
+= secsPerYear(y
++);
94 if (s
> seconds
) break;
99 s
-= secsPerYear(--y
);
100 if (s
<= seconds
) break;
106 //--------------------------------------------------------------------
108 //--------------------------------------------------------------------
112 struct SimplifiedZoneType
;
114 // A transition from one ZoneType to another
115 // Minimal size = 5 bytes (4+1)
117 long time
; // seconds, 1970 epoch
118 int type
; // index into 'ZoneInfo.types' 0..255
119 Transition(long _time
, int _type
) {
125 // A behavior mode (what zic calls a 'type') of a time zone.
126 // Minimal size = 6 bytes (4+1+3bits)
127 // SEE: SimplifiedZoneType
129 long rawoffset
; // raw seconds offset from GMT
130 long dstoffset
; // dst seconds offset from GMT
132 // We don't really need any of the following, but they are
133 // retained for possible future use. See SimplifiedZoneType.
134 int abbr
; // index into ZoneInfo.abbrs 0..n-1
139 ZoneType(const SimplifiedZoneType
&); // used by optimizeTypeList
141 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}
143 // A restricted equality, of just the raw and dst offset
144 bool matches(const ZoneType
& other
) {
145 return rawoffset
== other
.rawoffset
&&
146 dstoffset
== other
.dstoffset
;
150 // A collection of transitions from one ZoneType to another, together
151 // with a list of the ZoneTypes. A ZoneInfo object may have a long
152 // list of transitions between a smaller list of ZoneTypes.
154 // This object represents the contents of a single zic-created
157 vector
<Transition
> transitions
;
158 vector
<ZoneType
> types
;
159 vector
<string
> abbrs
;
163 int finalYear
; // -1 if none
165 // If this is an alias, then all other fields are meaningless, and
166 // this field will point to the "real" zone 0..n-1.
167 int aliasTo
; // -1 if this is a "real" zone
169 // If there are aliases TO this zone, then the following set will
170 // contain their index numbers (each index >= 0).
173 ZoneInfo() : finalYear(-1), aliasTo(-1) {}
175 void mergeFinalData(const FinalZone
& fz
);
177 void optimizeTypeList();
179 // Set this zone to be an alias TO another zone.
180 void setAliasTo(int index
);
182 // Clear the list of aliases OF this zone.
185 // Add an alias to the list of aliases OF this zone.
186 void addAlias(int index
);
188 // Is this an alias to another zone?
189 bool isAlias() const {
193 // Retrieve alias list
194 const set
<int>& getAliases() const {
198 void print(ostream
& os
, const string
& id
) const;
201 void ZoneInfo::clearAliases() {
206 void ZoneInfo::addAlias(int index
) {
207 assert(aliasTo
< 0 && index
>= 0 && aliases
.find(index
) == aliases
.end());
208 aliases
.insert(index
);
211 void ZoneInfo::setAliasTo(int index
) {
213 assert(aliases
.size() == 0);
217 typedef map
<string
, ZoneInfo
> ZoneMap
;
219 typedef ZoneMap::const_iterator ZoneMapIter
;
221 //--------------------------------------------------------------------
223 //--------------------------------------------------------------------
225 // Global map holding all our ZoneInfo objects, indexed by id.
228 //--------------------------------------------------------------------
229 // zoneinfo file parsing
230 //--------------------------------------------------------------------
232 // Read zic-coded 32-bit integer from file
233 long readcoded(ifstream
& file
, long minv
=numeric_limits
<long>::min(),
234 long maxv
=numeric_limits
<long>::max()) {
235 unsigned char buf
[4]; // must be UNSIGNED
237 file
.read((char*)buf
, 4);
238 for(int i
=0,shift
=24;i
<4;++i
,shift
-=8) {
239 val
|= buf
[i
] << shift
;
241 if (val
< minv
|| val
> maxv
) {
243 os
<< "coded value out-of-range: " << val
<< ", expected ["
244 << minv
<< ", " << maxv
<< "]";
245 throw out_of_range(os
.str());
250 // Read a boolean value
251 bool readbool(ifstream
& file
) {
256 os
<< "boolean value out-of-range: " << (int)c
;
257 throw out_of_range(os
.str());
263 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
264 * @param file an already-open file stream
266 void readzoneinfo(ifstream
& file
, ZoneInfo
& info
) {
269 // Check for TZ_ICU_MAGIC signature at file start. If we get a
270 // signature mismatch, it means we're trying to read a file which
271 // isn't a ICU-modified-zic-created zoneinfo file. Typically this
272 // means the user is passing in a "normal" zoneinfo directory, or
273 // a zoneinfo directory that is polluted with other files, or that
274 // the user passed in the wrong directory.
277 if (strncmp(buf
, TZ_ICU_MAGIC
, 4) != 0) {
278 throw invalid_argument("TZ_ICU_MAGIC signature missing");
281 // Read reserved bytes. The first of these will be a version byte.
283 if (*(ICUZoneinfoVersion
*)&buf
!= TZ_ICU_VERSION
) {
284 throw invalid_argument("File version mismatch");
288 long isgmtcnt
= readcoded(file
, 0);
289 long isdstcnt
= readcoded(file
, 0);
290 long leapcnt
= readcoded(file
, 0);
291 long timecnt
= readcoded(file
, 0);
292 long typecnt
= readcoded(file
, 0);
293 long charcnt
= readcoded(file
, 0);
295 // Confirm sizes that we assume to be equal. These assumptions
296 // are drawn from a reading of the zic source (2003a), so they
297 // should hold unless the zic source changes.
298 if (isgmtcnt
!= typecnt
|| isdstcnt
!= typecnt
) {
299 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt");
302 // Used temporarily to store transition times and types. We need
303 // to do this because the times and types are stored in two
305 vector
<long> transitionTimes(timecnt
, -1); // temporary
306 vector
<int> transitionTypes(timecnt
, -1); // temporary
308 // Read transition times
309 for (i
=0; i
<timecnt
; ++i
) {
310 transitionTimes
[i
] = readcoded(file
);
313 // Read transition types
314 for (i
=0; i
<timecnt
; ++i
) {
316 file
.read((char*) &c
, 1);
318 if (t
< 0 || t
>= typecnt
) {
320 os
<< "illegal type: " << t
<< ", expected [0, " << (typecnt
-1) << "]";
321 throw out_of_range(os
.str());
323 transitionTypes
[i
] = t
;
326 // Build transitions vector out of corresponding times and types.
327 for (i
=0; i
<timecnt
; ++i
) {
328 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
331 // Read types (except for the isdst and isgmt flags, which come later (why??))
332 for (i
=0; i
<typecnt
; ++i
) {
335 type
.rawoffset
= readcoded(file
);
336 type
.dstoffset
= readcoded(file
);
337 type
.isdst
= readbool(file
);
340 file
.read((char*) &c
, 1);
343 if (type
.isdst
!= (type
.dstoffset
!= 0)) {
344 throw invalid_argument("isdst does not reflect dstoffset");
347 info
.types
.push_back(type
);
349 assert(info
.types
.size() == (unsigned) typecnt
);
351 // Read the abbreviation string
353 // All abbreviations are concatenated together, with a 0 at
354 // the end of each abbr.
355 char* str
= new char[charcnt
+ 8];
356 file
.read(str
, charcnt
);
358 // Split abbreviations apart into individual strings. Record
359 // offset of each abbr in a vector.
360 vector
<int> abbroffset
;
361 char *limit
=str
+charcnt
;
362 for (char* p
=str
; p
<limit
; ++p
) {
365 info
.abbrs
.push_back(string(start
, p
-start
));
366 abbroffset
.push_back(start
-str
);
369 // Remap all the abbrs. Old value is offset into concatenated
370 // raw abbr strings. New value is index into vector of
371 // strings. E.g., 0,5,10,14 => 0,1,2,3.
373 // Keep track of which abbreviations get used.
374 vector
<bool> abbrseen(abbroffset
.size(), false);
376 for (vector
<ZoneType
>::iterator it
=info
.types
.begin();
377 it
!=info
.types
.end();
379 vector
<int>::const_iterator x
=
380 find(abbroffset
.begin(), abbroffset
.end(), it
->abbr
);
381 if (x
==abbroffset
.end()) {
382 // TODO: Modify code to add a new string to the end of
383 // the abbr list when a middle offset is given, e.g.,
384 // "abc*def*" where * == '\0', take offset of 1 and
385 // make the array "abc", "def", "bc", and translate 1
386 // => 2. NOT CRITICAL since we don't even use the
387 // abbr at this time.
389 // TODO: Re-enable this warning if we start using
390 // the Olson abbr data, or if the above TODO is completed.
392 os
<< "Warning: unusual abbr offset " << it
->abbr
393 << ", expected one of";
394 for (vector
<int>::const_iterator y
=abbroffset
.begin();
395 y
!=abbroffset
.end(); ++y
) {
398 cerr
<< os
.str() << "; using 0" << endl
;
402 int index
= x
- abbroffset
.begin();
404 abbrseen
[index
] = true;
408 for (int ii
=0;ii
<(int) abbrseen
.size();++ii
) {
410 cerr
<< "Warning: unused abbreviation: " << ii
<< endl
;
415 // Read leap second info, if any.
416 // *** We discard leap second data. ***
417 for (i
=0; i
<leapcnt
; ++i
) {
418 readcoded(file
); // transition time
419 readcoded(file
); // total correction after above
423 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isstd
= readbool(file
);
426 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isgmt
= readbool(file
);
429 //--------------------------------------------------------------------
430 // Directory and file reading
431 //--------------------------------------------------------------------
434 * Process a single zoneinfo file, adding the data to ZONEINFO
435 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"
436 * @param id the zone ID, e.g., "America/Los_Angeles"
438 void handleFile(string path
, string id
) {
439 // Check for duplicate id
440 if (ZONEINFO
.find(id
) != ZONEINFO
.end()) {
442 os
<< "duplicate zone ID: " << id
;
443 throw invalid_argument(os
.str());
446 ifstream
file(path
.c_str(), ios::in
| ios::binary
);
448 throw invalid_argument("can't open file");
451 readzoneinfo(file
, info
);
455 throw invalid_argument("read error");
458 // Check eof-relative pos (there may be a cleaner way to do this)
459 long eofPos
= (long) file
.tellg();
460 file
.seekg(0, ios::end
);
461 eofPos
= eofPos
- (long) file
.tellg();
464 os
<< (-eofPos
) << " unprocessed bytes at end";
465 throw invalid_argument(os
.str());
472 * Recursively scan the given directory, calling handleFile() for each
473 * file in the tree. The user should call with the root directory and
474 * a prefix of "". The function will call itself with non-empty
479 void scandir(string dirname
, string prefix
="") {
481 WIN32_FIND_DATA FileData
;
483 // Get the first file
484 hList
= FindFirstFile((dirname
+ "\\*").c_str(), &FileData
);
485 if (hList
== INVALID_HANDLE_VALUE
) {
486 cerr
<< "Error: Invalid directory: " << dirname
<< endl
;
490 string
name(FileData
.cFileName
);
491 string
path(dirname
+ "\\" + name
);
492 if (FileData
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
) {
493 if (name
!= "." && name
!= "..") {
494 scandir(path
, prefix
+ name
+ "/");
498 string id
= prefix
+ name
;
499 handleFile(path
, id
);
500 } catch (const exception
& e
) {
501 cerr
<< "Error: While processing \"" << path
<< "\", "
507 if (!FindNextFile(hList
, &FileData
)) {
508 if (GetLastError() == ERROR_NO_MORE_FILES
) {
518 void scandir(string dir
, string prefix
="") {
520 struct dirent
*dir_entry
;
521 struct stat stat_info
;
523 vector
<string
> subdirs
;
524 vector
<string
> subfiles
;
526 if ((dp
= opendir(dir
.c_str())) == NULL
) {
527 cerr
<< "Error: Invalid directory: " << dir
<< endl
;
530 if (!getcwd(pwd
, sizeof(pwd
))) {
531 cerr
<< "Error: Directory name too long" << endl
;
535 while ((dir_entry
= readdir(dp
)) != NULL
) {
536 string name
= dir_entry
->d_name
;
537 string path
= dir
+ "/" + name
;
538 lstat(dir_entry
->d_name
,&stat_info
);
539 if (S_ISDIR(stat_info
.st_mode
)) {
540 if (name
!= "." && name
!= "..") {
541 subdirs
.push_back(path
);
542 subdirs
.push_back(prefix
+ name
+ "/");
543 // scandir(path, prefix + name + "/");
547 string id
= prefix
+ name
;
548 subfiles
.push_back(path
);
549 subfiles
.push_back(id
);
550 // handleFile(path, id);
551 } catch (const exception
& e
) {
552 cerr
<< "Error: While processing \"" << path
<< "\", "
561 for(int i
=0;i
<(int)subfiles
.size();i
+=2) {
563 handleFile(subfiles
[i
], subfiles
[i
+1]);
564 } catch (const exception
& e
) {
565 cerr
<< "Error: While processing \"" << subfiles
[i
] << "\", "
570 for(int i
=0;i
<(int)subdirs
.size();i
+=2) {
571 scandir(subdirs
[i
], subdirs
[i
+1]);
577 //--------------------------------------------------------------------
578 // Final zone and rule info
579 //--------------------------------------------------------------------
582 * Read and discard the current line.
584 void consumeLine(istream
& in
) {
588 } while (c
!= EOF
&& c
!= '\n');
597 const char* TIME_MODE
[] = {"w", "s", "u"};
599 const int MONTH_LEN
[] = {31,28,31,30,31,30,31,31,30,31,30,31};
601 const int HOUR
= 3600;
604 int offset
; // raw offset
605 int year
; // takes effect for y >= year
608 FinalZone(int _offset
, int _year
, const string
& _ruleid
) :
609 offset(_offset
), year(_year
), ruleid(_ruleid
) {
610 if (offset
<= -16*HOUR
|| offset
>= 16*HOUR
||
611 year
< 1900 || year
>= 2050) {
612 throw invalid_argument("Invalid input arguments");
615 FinalZone() : offset(-1), year(-1) {}
616 void addLink(const string
& alias
) {
617 if (aliases
.find(alias
) != aliases
.end()) {
618 throw invalid_argument("Duplicate alias");
620 aliases
.insert(alias
);
624 struct FinalRulePart
{
630 int offset
; // dst offset, usually either 0 or 1:00
632 // Isstd and isgmt only have 3 valid states, corresponding to local
633 // wall time, local standard time, and GMT standard time.
634 // Here is how the isstd & isgmt flags are set by zic:
635 //| case 's': /* Standard */
636 //| rp->r_todisstd = TRUE;
637 //| rp->r_todisgmt = FALSE;
638 //| case 'w': /* Wall */
639 //| rp->r_todisstd = FALSE;
640 //| rp->r_todisgmt = FALSE;
641 //| case 'g': /* Greenwich */
642 //| case 'u': /* Universal */
643 //| case 'z': /* Zulu */
644 //| rp->r_todisstd = TRUE;
645 //| rp->r_todisgmt = TRUE;
649 bool isset
; // used during building; later ignored
651 FinalRulePart() : isset(false) {}
652 void set(const string
& _mode
,
661 throw invalid_argument("FinalRulePart set twice");
664 if (_mode
== "DOWLEQ") {
666 } else if (_mode
== "DOWGEQ") {
668 } else if (_mode
== "DOM") {
671 throw invalid_argument("Unrecognized FinalRulePart mode");
680 if (month
< 0 || month
>= 12 || dom
< 1 || dom
> MONTH_LEN
[month
] ||
681 (mode
!= DOM
&& (dow
< 0 || dow
>= 7)) ||
682 offset
< 0 || offset
> HOUR
||
684 throw invalid_argument("Invalid input arguments");
689 * Return the time mode as an ICU SimpleTimeZone int from 0..2;
692 int timemode() const {
695 return 2; // gmt standard
698 return 1; // local standard
700 return 0; // local wall
703 // The SimpleTimeZone encoding method for rules is as follows:
706 // DOWGEQ: dom -(dow+1)
707 // DOWLEQ: -dom -(dow+1)
708 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2
709 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2
710 // to encode 7, use stz_dowim=7, stz_dow=0
711 // Note that for this program and for SimpleTimeZone, 0==Jan,
712 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.
715 * Return a "dowim" param suitable for SimpleTimeZone.
717 int stz_dowim() const {
718 return (mode
== DOWLEQ
) ? -dom
: dom
;
722 * Return a "dow" param suitable for SimpleTimeZone.
724 int stz_dow() const {
725 return (mode
== DOM
) ? 0 : -(dow
+1);
730 FinalRulePart part
[2];
733 return part
[0].isset
&& part
[1].isset
;
736 void print(ostream
& os
) const;
739 map
<string
,FinalZone
> finalZones
;
740 map
<string
,FinalRule
> finalRules
;
742 map
<string
, set
<string
> > links
;
743 map
<string
, string
> reverseLinks
;
744 map
<string
, string
> linkSource
; // id => "Olson link" or "ICU alias"
747 * Predicate used to find FinalRule objects that do not have both
748 * sub-parts set (indicating an error in the input file).
750 bool isNotSet(const pair
<const string
,FinalRule
>& p
) {
751 return !p
.second
.isset();
755 * Predicate used to find FinalZone objects that do not map to a known
756 * rule (indicating an error in the input file).
758 bool mapsToUnknownRule(const pair
<const string
,FinalZone
>& p
) {
759 return finalRules
.find(p
.second
.ruleid
) == finalRules
.end();
763 * This set is used to make sure each rule in finalRules is used at
764 * least once. First we populate it with all the rules from
765 * finalRules; then we remove all the rules referred to in
768 set
<string
> ruleIDset
;
770 void insertRuleID(const pair
<string
,FinalRule
>& p
) {
771 ruleIDset
.insert(p
.first
);
774 void eraseRuleID(const pair
<string
,FinalZone
>& p
) {
775 ruleIDset
.erase(p
.second
.ruleid
);
779 * Populate finalZones and finalRules from the given istream.
781 void readFinalZonesAndRules(istream
& in
) {
786 if (in
.eof() || !in
) {
788 } else if (token
== "zone") {
789 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0)
792 in
>> id
>> offset
>> year
>> ruleid
;
794 finalZones
[id
] = FinalZone(offset
, year
, ruleid
);
795 } else if (token
== "rule") {
796 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600
797 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0
799 int month
, dom
, dow
, time
, offset
;
801 in
>> id
>> mode
>> month
>> dom
>> dow
>> time
>> isstd
>> isgmt
>> offset
;
803 FinalRule
& fr
= finalRules
[id
];
804 int p
= fr
.part
[0].isset
? 1 : 0;
805 fr
.part
[p
].set(mode
, month
, dom
, dow
, time
, isstd
, isgmt
, offset
);
806 } else if (token
== "link") {
807 string fromid
, toid
; // fromid == "real" zone, toid == alias
808 in
>> fromid
>> toid
;
809 // DO NOT consumeLine(in);
810 if (finalZones
.find(toid
) != finalZones
.end()) {
811 throw invalid_argument("Bad link: `to' id is a \"real\" zone");
814 links
[fromid
].insert(toid
);
815 reverseLinks
[toid
] = fromid
;
817 linkSource
[fromid
] = "Olson link";
818 linkSource
[toid
] = "Olson link";
819 } else if (token
.length() > 0 && token
[0] == '#') {
822 throw invalid_argument("Unrecognized keyword");
826 if (!in
.eof() && !in
) {
827 throw invalid_argument("Parse failure");
830 // Perform validity check: Each rule should have data for 2 parts.
831 if (count_if(finalRules
.begin(), finalRules
.end(), isNotSet
) != 0) {
832 throw invalid_argument("One or more incomplete rule pairs");
835 // Perform validity check: Each zone should map to a known rule.
836 if (count_if(finalZones
.begin(), finalZones
.end(), mapsToUnknownRule
) != 0) {
837 throw invalid_argument("One or more zones refers to an unknown rule");
840 // Perform validity check: Each rule should be referred to by a zone.
842 for_each(finalRules
.begin(), finalRules
.end(), insertRuleID
);
843 for_each(finalZones
.begin(), finalZones
.end(), eraseRuleID
);
844 if (ruleIDset
.size() != 0) {
845 throw invalid_argument("Unused rules");
849 //--------------------------------------------------------------------
850 // Resource bundle output
851 //--------------------------------------------------------------------
853 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT
855 void ZoneInfo::print(ostream
& os
, const string
& id
) const {
856 // Implement compressed format #2:
858 os
<< " /* " << id
<< " */ ";
861 assert(aliases
.size() == 0);
862 os
<< ":int { " << aliasTo
<< " } "; // No endl - save room for comment.
866 os
<< ":array {" << endl
;
868 vector
<Transition
>::const_iterator trn
;
869 vector
<ZoneType
>::const_iterator typ
;
872 os
<< " :intvector { ";
873 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
874 if (!first
) os
<< ", ";
881 os
<< " :intvector { ";
882 for (typ
= types
.begin(); typ
!= types
.end(); ++typ
) {
883 if (!first
) os
<< ", ";
885 os
<< typ
->rawoffset
<< ", " << typ
->dstoffset
;
889 os
<< " :bin { \"" << hex
<< setfill('0');
890 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
891 os
<< setw(2) << trn
->type
;
893 os
<< dec
<< "\" }" << endl
;
895 // Final zone info, if any
896 if (finalYear
!= -1) {
897 os
<< " \"" << finalRuleID
<< "\"" << endl
;
898 os
<< " :intvector { " << finalOffset
<< ", "
899 << finalYear
<< " }" << endl
;
902 // Alias list, if any
903 if (aliases
.size() != 0) {
905 os
<< " :intvector { ";
906 for (set
<int>::const_iterator i
=aliases
.begin(); i
!=aliases
.end(); ++i
) {
907 if (!first
) os
<< ", ";
914 os
<< " } "; // no trailing 'endl', so comments can be placed.
918 operator<<(ostream
& os
, const ZoneMap
& zoneinfo
) {
920 for (ZoneMapIter it
= zoneinfo
.begin();
921 it
!= zoneinfo
.end();
924 it
->second
.print(os
, it
->first
);
925 os
<< "//Z#" << c
++ << endl
;
930 // print the string list
931 ostream
& printStringList( ostream
& os
, const ZoneMap
& zoneinfo
) {
933 int col
= 0; // column
934 os
<< " Names {" << endl
936 for (ZoneMapIter it
= zoneinfo
.begin();
937 it
!= zoneinfo
.end();
943 const string
& id
= it
->first
;
944 os
<< "\"" << id
<< "\"";
945 col
+= id
.length() + 2;
947 os
<< " // " << n
<< endl
953 os
<< " // " << (n
-1) << endl
959 //--------------------------------------------------------------------
961 //--------------------------------------------------------------------
963 // Unary predicate for finding transitions after a given time
964 bool isAfter(const Transition t
, long thresh
) {
965 return t
.time
>= thresh
;
969 * A zone type that contains only the raw and dst offset. Used by the
970 * optimizeTypeList() method.
972 struct SimplifiedZoneType
{
975 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}
976 SimplifiedZoneType(const ZoneType
& t
) : rawoffset(t
.rawoffset
),
977 dstoffset(t
.dstoffset
) {}
978 bool operator<(const SimplifiedZoneType
& t
) const {
979 return rawoffset
< t
.rawoffset
||
980 (rawoffset
== t
.rawoffset
&&
981 dstoffset
< t
.dstoffset
);
986 * Construct a ZoneType from a SimplifiedZoneType. Note that this
987 * discards information; the new ZoneType will have meaningless
988 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,
989 * since ignoring these is how we do optimization (we have no use for
990 * these in historical transitions).
992 ZoneType::ZoneType(const SimplifiedZoneType
& t
) :
993 rawoffset(t
.rawoffset
), dstoffset(t
.dstoffset
),
994 abbr(-1), isdst(false), isstd(false), isgmt(false) {}
997 * Optimize the type list to remove excess entries. The type list may
998 * contain entries that are distinct only in terms of their dst, std,
999 * or gmt flags. Since we don't care about those flags, we can reduce
1000 * the type list to a set of unique raw/dst offset pairs, and remap
1001 * the type indices in the transition list, which stores, for each
1002 * transition, a transition time and a type index.
1004 void ZoneInfo::optimizeTypeList() {
1005 // Assemble set of unique types; only those in the `transitions'
1006 // list, since there may be unused types in the `types' list
1007 // corresponding to transitions that have been trimmed (during
1008 // merging of final data).
1010 if (aliasTo
>= 0) return; // Nothing to do for aliases
1012 // If there are zero transitions and one type, then leave that as-is.
1013 if (transitions
.size() == 0) {
1014 if (types
.size() != 1) {
1015 cerr
<< "Error: transition count = 0, type count = " << types
.size() << endl
;
1020 set
<SimplifiedZoneType
> simpleset
;
1021 for (vector
<Transition
>::const_iterator i
=transitions
.begin();
1022 i
!=transitions
.end(); ++i
) {
1023 assert(i
->type
< (int)types
.size());
1024 simpleset
.insert(types
[i
->type
]);
1027 // Map types to integer indices
1028 map
<SimplifiedZoneType
,int> simplemap
;
1030 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin();
1031 i
!=simpleset
.end(); ++i
) {
1032 simplemap
[*i
] = n
++;
1035 // Remap transitions
1036 for (vector
<Transition
>::iterator i
=transitions
.begin();
1037 i
!=transitions
.end(); ++i
) {
1038 assert(i
->type
< (int)types
.size());
1039 ZoneType oldtype
= types
[i
->type
];
1040 SimplifiedZoneType
newtype(oldtype
);
1041 assert(simplemap
.find(newtype
) != simplemap
.end());
1042 i
->type
= simplemap
[newtype
];
1045 // Replace type list
1047 copy(simpleset
.begin(), simpleset
.end(), back_inserter(types
));
1051 * Merge final zone data into this zone.
1053 void ZoneInfo::mergeFinalData(const FinalZone
& fz
) {
1055 long seconds
= yearToSeconds(year
);
1056 vector
<Transition
>::iterator it
=
1057 find_if(transitions
.begin(), transitions
.end(),
1058 bind2nd(ptr_fun(isAfter
), seconds
));
1059 transitions
.erase(it
, transitions
.end());
1061 if (finalYear
!= -1) {
1062 throw invalid_argument("Final zone already merged in");
1064 finalYear
= fz
.year
;
1065 finalOffset
= fz
.offset
;
1066 finalRuleID
= fz
.ruleid
;
1070 * Merge the data from the given final zone into the core zone data by
1071 * calling the ZoneInfo member function mergeFinalData.
1073 void mergeOne(const string
& zoneid
, const FinalZone
& fz
) {
1074 if (ZONEINFO
.find(zoneid
) == ZONEINFO
.end()) {
1075 throw invalid_argument("Unrecognized final zone ID");
1077 ZONEINFO
[zoneid
].mergeFinalData(fz
);
1081 * Visitor function that merges the final zone data into the main zone
1082 * data structures. It calls mergeOne for each final zone and its
1085 void mergeFinalZone(const pair
<string
,FinalZone
>& p
) {
1086 const string
& id
= p
.first
;
1087 const FinalZone
& fz
= p
.second
;
1093 * Print this rule in resource bundle format to os. ID and enclosing
1094 * braces handled elsewhere.
1096 void FinalRule::print(ostream
& os
) const {
1097 // First print the rule part that enters DST; then the rule part
1099 int whichpart
= (part
[0].offset
!= 0) ? 0 : 1;
1100 assert(part
[whichpart
].offset
!= 0);
1101 assert(part
[1-whichpart
].offset
== 0);
1104 for (int i
=0; i
<2; ++i
) {
1105 const FinalRulePart
& p
= part
[whichpart
];
1106 whichpart
= 1-whichpart
;
1107 os
<< p
.month
<< ", " << p
.stz_dowim() << ", " << p
.stz_dow() << ", "
1108 << p
.time
<< ", " << p
.timemode() << ", ";
1110 os
<< part
[whichpart
].offset
<< endl
;
1113 int main(int argc
, char *argv
[]) {
1114 string rootpath
, zonetab
, version
;
1117 cout
<< "Usage: tz2icu <dir> <cmap> <vers>" << endl
1118 << " <dir> path to zoneinfo file tree generated by" << endl
1119 << " ICU-patched version of zic" << endl
1120 << " <cmap> country map, from tzdata archive," << endl
1121 << " typically named \"zone.tab\"" << endl
1122 << " <vers> version string, such as \"2003e\"" << endl
;
1130 cout
<< "Olson data version: " << version
<< endl
;
1133 ifstream
finals(ICU_ZONE_FILE
);
1135 readFinalZonesAndRules(finals
);
1137 cout
<< "Finished reading " << finalZones
.size()
1138 << " final zones and " << finalRules
.size()
1139 << " final rules from " ICU_ZONE_FILE
<< endl
;
1141 cerr
<< "Error: Unable to open " ICU_ZONE_FILE
<< endl
;
1144 } catch (const exception
& error
) {
1145 cerr
<< "Error: While reading " ICU_ZONE_FILE
": " << error
.what() << endl
;
1149 // Read the legacy alias list and process it. Treat the legacy mappings
1150 // like links, but also record them in the "legacy" hash.
1152 ifstream
aliases(ICU_TZ_ALIAS
);
1154 cerr
<< "Error: Unable to open " ICU_TZ_ALIAS
<< endl
;
1159 while (getline(aliases
, line
)) {
1160 string::size_type lb
= line
.find('#');
1161 if (lb
!= string::npos
) {
1162 line
.resize(lb
); // trim comments
1165 istringstream
is(line
);
1166 copy(istream_iterator
<string
>(is
),istream_iterator
<string
>(),
1168 if (a
.size() == 0) continue; // blank line
1169 if (a
.size() != 2) {
1170 cerr
<< "Error: Can't parse \"" << line
<< "\" in "
1171 ICU_TZ_ALIAS
<< endl
;
1176 string
alias(a
[0]), olson(a
[1]);
1177 if (links
.find(alias
) != links
.end()) {
1178 cerr
<< "Error: Alias \"" << alias
1179 << "\" is an Olson zone in "
1180 ICU_TZ_ALIAS
<< endl
;
1183 if (reverseLinks
.find(alias
) != reverseLinks
.end()) {
1184 cerr
<< "Error: Alias \"" << alias
1185 << "\" is an Olson link to \"" << reverseLinks
[olson
]
1186 << "\" in " << ICU_TZ_ALIAS
<< endl
;
1190 // Record source for error reporting
1191 if (linkSource
.find(olson
) == linkSource
.end()) {
1192 linkSource
[olson
] = "ICU alias";
1194 assert(linkSource
.find(alias
) == linkSource
.end());
1195 linkSource
[alias
] = "ICU alias";
1197 links
[olson
].insert(alias
);
1198 reverseLinks
[alias
] = olson
;
1200 cout
<< "Finished reading " << n
1201 << " aliases from " ICU_TZ_ALIAS
<< endl
;
1202 } catch (const exception
& error
) {
1203 cerr
<< "Error: While reading " ICU_TZ_ALIAS
": " << error
.what() << endl
;
1208 // Recursively scan all files below the given path, accumulating
1209 // their data into ZONEINFO. All files must be TZif files. Any
1210 // failure along the way will result in a call to exit(1).
1212 } catch (const exception
& error
) {
1213 cerr
<< "Error: While scanning " << rootpath
<< ": " << error
.what() << endl
;
1217 cout
<< "Finished reading " << ZONEINFO
.size() << " zoneinfo files ["
1218 << (ZONEINFO
.begin())->first
<< ".."
1219 << (--ZONEINFO
.end())->first
<< "]" << endl
;
1222 for_each(finalZones
.begin(), finalZones
.end(), mergeFinalZone
);
1223 } catch (const exception
& error
) {
1224 cerr
<< "Error: While merging final zone data: " << error
.what() << endl
;
1228 // Process links (including ICU aliases). For each link set we have
1229 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more
1230 // aliases (e.g., PST, PST8PDT, ...).
1232 // 1. Add all aliases as zone objects in ZONEINFO
1233 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1234 i
!=links
.end(); ++i
) {
1235 const string
& olson
= i
->first
;
1236 const set
<string
>& aliases
= i
->second
;
1237 if (ZONEINFO
.find(olson
) == ZONEINFO
.end()) {
1238 cerr
<< "Error: Invalid " << linkSource
[olson
] << " to non-existent \""
1239 << olson
<< "\"" << endl
;
1242 for (set
<string
>::const_iterator j
=aliases
.begin();
1243 j
!=aliases
.end(); ++j
) {
1244 ZONEINFO
[*j
] = ZoneInfo();
1248 // 2. Create a mapping from zones to index numbers 0..n-1.
1249 map
<string
,int> zoneIDs
;
1250 vector
<string
> zoneIDlist
;
1252 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1253 zoneIDs
[i
->first
] = z
++;
1254 zoneIDlist
.push_back(i
->first
);
1256 assert(z
== (int) ZONEINFO
.size());
1258 // 3. Merge aliases. Sometimes aliases link to other aliases; we
1259 // resolve these into simplest possible sets.
1260 map
<string
,set
<string
> > links2
;
1261 map
<string
,string
> reverse2
;
1262 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1263 i
!=links
.end(); ++i
) {
1264 string olson
= i
->first
;
1265 while (reverseLinks
.find(olson
) != reverseLinks
.end()) {
1266 olson
= reverseLinks
[olson
];
1268 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1269 links2
[olson
].insert(*j
);
1270 reverse2
[*j
] = olson
;
1274 reverseLinks
= reverse2
;
1276 if (false) { // Debugging: Emit link map
1277 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1278 i
!=links
.end(); ++i
) {
1279 cout
<< i
->first
<< ": ";
1280 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1287 // 4. Update aliases
1288 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1289 i
!=links
.end(); ++i
) {
1290 const string
& olson
= i
->first
;
1291 const set
<string
>& aliases
= i
->second
;
1292 ZONEINFO
[olson
].clearAliases();
1293 ZONEINFO
[olson
].addAlias(zoneIDs
[olson
]);
1294 for (set
<string
>::const_iterator j
=aliases
.begin();
1295 j
!=aliases
.end(); ++j
) {
1296 assert(zoneIDs
.find(olson
) != zoneIDs
.end());
1297 assert(zoneIDs
.find(*j
) != zoneIDs
.end());
1298 assert(ZONEINFO
.find(*j
) != ZONEINFO
.end());
1299 ZONEINFO
[*j
].setAliasTo(zoneIDs
[olson
]);
1300 ZONEINFO
[olson
].addAlias(zoneIDs
[*j
]);
1304 // Once merging of final data is complete, we can optimize the type list
1305 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1306 i
->second
.optimizeTypeList();
1309 // Create the country map
1310 map
<string
, set
<string
> > countryMap
; // country -> set of zones
1311 map
<string
, string
> reverseCountryMap
; // zone -> country
1313 ifstream
f(zonetab
.c_str());
1315 cerr
<< "Error: Unable to open " << zonetab
<< endl
;
1320 while (getline(f
, line
)) {
1321 string::size_type lb
= line
.find('#');
1322 if (lb
!= string::npos
) {
1323 line
.resize(lb
); // trim comments
1325 string country
, coord
, zone
;
1326 istringstream
is(line
);
1327 is
>> country
>> coord
>> zone
;
1328 if (country
.size() == 0) continue;
1329 if (country
.size() != 2 || zone
.size() < 1) {
1330 cerr
<< "Error: Can't parse " << line
<< " in " << zonetab
<< endl
;
1333 if (ZONEINFO
.find(zone
) == ZONEINFO
.end()) {
1334 cerr
<< "Error: Country maps to invalid zone " << zone
1335 << " in " << zonetab
<< endl
;
1338 countryMap
[country
].insert(zone
);
1339 reverseCountryMap
[zone
] = country
;
1340 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;
1343 cout
<< "Finished reading " << n
1344 << " country entries from " << zonetab
<< endl
;
1345 } catch (const exception
& error
) {
1346 cerr
<< "Error: While reading " << zonetab
<< ": " << error
.what() << endl
;
1350 // Merge ICU aliases into country map. Don't merge any alias
1351 // that already has a country map, since that doesn't make sense.
1352 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we
1353 // should cross-map the countries between these two zones.
1354 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1355 i
!=links
.end(); ++i
) {
1356 const string
& olson(i
->first
);
1357 if (reverseCountryMap
.find(olson
) == reverseCountryMap
.end()) {
1360 string c
= reverseCountryMap
[olson
];
1361 const set
<string
>& aliases(i
->second
);
1362 for (set
<string
>::const_iterator j
=aliases
.begin();
1363 j
!= aliases
.end(); ++j
) {
1364 if (reverseCountryMap
.find(*j
) == reverseCountryMap
.end()) {
1365 countryMap
[c
].insert(*j
);
1366 reverseCountryMap
[*j
] = c
;
1367 //cerr << "Aliased country: " << c << " <=> " << *j << endl;
1372 // Create a pseudo-country containing all zones belonging to no country
1373 set
<string
> nocountry
;
1374 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1375 if (reverseCountryMap
.find(i
->first
) == reverseCountryMap
.end()) {
1376 nocountry
.insert(i
->first
);
1379 countryMap
[""] = nocountry
;
1381 // Get local time & year for below
1384 struct tm
* now
= localtime(&sec
);
1385 int thisYear
= now
->tm_year
+ 1900;
1387 // Write out a resource-bundle source file containing data for
1389 ofstream
file(ICU_TZ_RESOURCE
".txt");
1391 file
<< "//---------------------------------------------------------" << endl
1392 << "// Copyright (C) 2003";
1393 if (thisYear
> 2003) {
1394 file
<< "-" << thisYear
;
1396 file
<< ", International Business Machines" << endl
1397 << "// Corporation and others. All Rights Reserved." << endl
1398 << "//---------------------------------------------------------" << endl
1399 << "// Build tool: tz2icu" << endl
1400 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1401 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1402 << "// Olson version: " << version
<< endl
1403 << "//---------------------------------------------------------" << endl
1404 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1405 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1406 << "//---------------------------------------------------------" << endl
1408 << ICU_TZ_RESOURCE
" {" << endl
1409 << " Zones:array { " << endl
1410 << ZONEINFO
// Zones (the actual data)
1413 // Names correspond to the Zones list, used for binary searching.
1414 printStringList ( file
, ZONEINFO
); // print the Names list
1416 // Final Rules are used if requested by the zone
1417 file
<< " Rules { " << endl
;
1420 for(map
<string
,FinalRule
>::iterator i
=finalRules
.begin();
1421 i
!=finalRules
.end(); ++i
) {
1422 const string
& id
= i
->first
;
1423 const FinalRule
& r
= i
->second
;
1424 file
<< " " << id
<< ":intvector {" << endl
;
1426 file
<< " } //_#" << frc
++ << endl
;
1428 file
<< " }" << endl
;
1430 // Emit country (region) map. Emitting the string zone IDs results
1431 // in a 188 kb binary resource; emitting the zone index numbers
1432 // trims this to 171 kb. More work for the runtime code, but
1433 // a smaller data footprint.
1434 file
<< " Regions { " << endl
;
1436 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1437 i
!= countryMap
.end(); ++i
) {
1438 string country
= i
->first
;
1439 const set
<string
>& zones(i
->second
);
1444 file
<< country
<< ":intvector { ";
1446 for (set
<string
>::const_iterator j
=zones
.begin();
1447 j
!= zones
.end(); ++j
) {
1448 if (!first
) file
<< ", ";
1450 if (zoneIDs
.find(*j
) == zoneIDs
.end()) {
1451 cerr
<< "Error: Nonexistent zone in country map: " << *j
<< endl
;
1454 file
<< zoneIDs
[*j
]; // emit the zone's index number
1456 file
<< " } //R#" << rc
++ << endl
;
1458 file
<< " }" << endl
;
1460 file
<< "}" << endl
;
1465 if (file
) { // recheck error bit
1466 cout
<< "Finished writing " ICU_TZ_RESOURCE
".txt" << endl
;
1468 cerr
<< "Error: Unable to open/write to " ICU_TZ_RESOURCE
".txt" << endl
;
1472 #define ICU4J_TZ_CLASS "ZoneMetaData"
1474 // Write out a Java source file containing only a few pieces of
1475 // meta-data missing from the core JDK: the equivalency lists and
1477 ofstream
java(ICU4J_TZ_CLASS
".java");
1479 java
<< "//---------------------------------------------------------" << endl
1480 << "// Copyright (C) 2003";
1481 if (thisYear
> 2003) {
1482 java
<< "-" << thisYear
;
1484 java
<< ", International Business Machines" << endl
1485 << "// Corporation and others. All Rights Reserved." << endl
1486 << "//---------------------------------------------------------" << endl
1487 << "// Build tool: tz2icu" << endl
1488 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1489 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1490 << "// Olson version: " << version
<< endl
1491 << "//---------------------------------------------------------" << endl
1492 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1493 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1494 << "//---------------------------------------------------------" << endl
1496 << "package com.ibm.icu.impl;" << endl
1498 << "public final class " ICU4J_TZ_CLASS
" {" << endl
;
1500 // Emit equivalency lists
1502 java
<< " public static final String[][] EQUIV = {" << endl
;
1503 for (ZoneMap::const_iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1504 if (i
->second
.isAlias() || i
->second
.getAliases().size() == 0) {
1507 if (!first1
) java
<< "," << endl
;
1509 // The ID of this zone (the canonical zone, to which the
1510 // aliases point) will be sorted into the list, so it
1511 // won't be at position 0. If we want to know which is
1512 // the canonical zone, we should move it to position 0.
1515 const set
<int>& s
= i
->second
.getAliases();
1516 for (set
<int>::const_iterator j
=s
.begin(); j
!=s
.end(); ++j
) {
1517 if (!first2
) java
<< ", ";
1518 java
<< '"' << zoneIDlist
[*j
] << '"';
1526 // Emit country map.
1528 java
<< " public static final String[][] COUNTRY = {" << endl
;
1529 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1530 i
!= countryMap
.end(); ++i
) {
1531 if (!first1
) java
<< "," << endl
;
1533 string country
= i
->first
;
1534 const set
<string
>& zones(i
->second
);
1535 java
<< " { \"" << country
<< '"';
1536 for (set
<string
>::const_iterator j
=zones
.begin();
1537 j
!= zones
.end(); ++j
) {
1538 java
<< ", \"" << *j
<< '"';
1545 java
<< "}" << endl
;
1550 if (java
) { // recheck error bit
1551 cout
<< "Finished writing " ICU4J_TZ_CLASS
".java" << endl
;
1553 cerr
<< "Error: Unable to open/write to " ICU4J_TZ_CLASS
".java" << endl
;