2 **********************************************************************
3 * Copyright (c) 2003-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: July 10 2003
9 **********************************************************************
11 #include "tzfile.h" // from Olson tzcode archive, copied to this dir
16 #undef min // windows.h/STL conflict
17 #undef max // windows.h/STL conflict
18 // "identifier was truncated to 'number' characters" warning
19 #pragma warning(disable: 4786)
48 #include "unicode/uversion.h"
52 //--------------------------------------------------------------------
54 //--------------------------------------------------------------------
56 const long SECS_PER_YEAR
= 31536000; // 365 days
57 const long SECS_PER_LEAP_YEAR
= 31622400; // 366 days
60 return (y%4
== 0) && ((y%100
!= 0) || (y%400
== 0)); // Gregorian
63 long secsPerYear(int y
) {
64 return isLeap(y
) ? SECS_PER_LEAP_YEAR
: SECS_PER_YEAR
;
68 * Given a calendar year, return the GMT epoch seconds for midnight
69 * GMT of January 1 of that year. yearToSeconds(1970) == 0.
71 long yearToSeconds(int year
) {
72 // inefficient but foolproof
76 s
+= secsPerYear(y
++);
79 s
-= secsPerYear(--y
);
85 * Given 1970 GMT epoch seconds, return the calendar year containing
86 * that time. secondsToYear(0) == 1970.
88 int secondsToYear(long seconds
) {
89 // inefficient but foolproof
94 s
+= secsPerYear(y
++);
95 if (s
> seconds
) break;
100 s
-= secsPerYear(--y
);
101 if (s
<= seconds
) break;
107 //--------------------------------------------------------------------
109 //--------------------------------------------------------------------
113 struct SimplifiedZoneType
;
115 // A transition from one ZoneType to another
116 // Minimal size = 5 bytes (4+1)
118 long time
; // seconds, 1970 epoch
119 int type
; // index into 'ZoneInfo.types' 0..255
120 Transition(long _time
, int _type
) {
126 // A behavior mode (what zic calls a 'type') of a time zone.
127 // Minimal size = 6 bytes (4+1+3bits)
128 // SEE: SimplifiedZoneType
130 long rawoffset
; // raw seconds offset from GMT
131 long dstoffset
; // dst seconds offset from GMT
133 // We don't really need any of the following, but they are
134 // retained for possible future use. See SimplifiedZoneType.
135 int abbr
; // index into ZoneInfo.abbrs 0..n-1
140 ZoneType(const SimplifiedZoneType
&); // used by optimizeTypeList
142 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}
144 // A restricted equality, of just the raw and dst offset
145 bool matches(const ZoneType
& other
) {
146 return rawoffset
== other
.rawoffset
&&
147 dstoffset
== other
.dstoffset
;
151 // A collection of transitions from one ZoneType to another, together
152 // with a list of the ZoneTypes. A ZoneInfo object may have a long
153 // list of transitions between a smaller list of ZoneTypes.
155 // This object represents the contents of a single zic-created
158 vector
<Transition
> transitions
;
159 vector
<ZoneType
> types
;
160 vector
<string
> abbrs
;
164 int finalYear
; // -1 if none
166 // If this is an alias, then all other fields are meaningless, and
167 // this field will point to the "real" zone 0..n-1.
168 int aliasTo
; // -1 if this is a "real" zone
170 // If there are aliases TO this zone, then the following set will
171 // contain their index numbers (each index >= 0).
174 ZoneInfo() : finalYear(-1), aliasTo(-1) {}
176 void mergeFinalData(const FinalZone
& fz
);
178 void optimizeTypeList();
180 // Set this zone to be an alias TO another zone.
181 void setAliasTo(int index
);
183 // Clear the list of aliases OF this zone.
186 // Add an alias to the list of aliases OF this zone.
187 void addAlias(int index
);
189 // Is this an alias to another zone?
190 bool isAlias() const {
194 // Retrieve alias list
195 const set
<int>& getAliases() const {
199 void print(ostream
& os
, const string
& id
) const;
202 void ZoneInfo::clearAliases() {
207 void ZoneInfo::addAlias(int index
) {
208 assert(aliasTo
< 0 && index
>= 0 && aliases
.find(index
) == aliases
.end());
209 aliases
.insert(index
);
212 void ZoneInfo::setAliasTo(int index
) {
214 assert(aliases
.size() == 0);
218 typedef map
<string
, ZoneInfo
> ZoneMap
;
220 typedef ZoneMap::const_iterator ZoneMapIter
;
222 //--------------------------------------------------------------------
224 //--------------------------------------------------------------------
226 // Global map holding all our ZoneInfo objects, indexed by id.
229 //--------------------------------------------------------------------
230 // zoneinfo file parsing
231 //--------------------------------------------------------------------
233 // Read zic-coded 32-bit integer from file
234 long readcoded(ifstream
& file
, long minv
=numeric_limits
<long>::min(),
235 long maxv
=numeric_limits
<long>::max()) {
236 unsigned char buf
[4]; // must be UNSIGNED
238 file
.read((char*)buf
, 4);
239 for(int i
=0,shift
=24;i
<4;++i
,shift
-=8) {
240 val
|= buf
[i
] << shift
;
242 if (val
< minv
|| val
> maxv
) {
244 os
<< "coded value out-of-range: " << val
<< ", expected ["
245 << minv
<< ", " << maxv
<< "]";
246 throw out_of_range(os
.str());
251 // Read a boolean value
252 bool readbool(ifstream
& file
) {
257 os
<< "boolean value out-of-range: " << (int)c
;
258 throw out_of_range(os
.str());
264 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
265 * @param file an already-open file stream
267 void readzoneinfo(ifstream
& file
, ZoneInfo
& info
) {
270 // Check for TZ_ICU_MAGIC signature at file start. If we get a
271 // signature mismatch, it means we're trying to read a file which
272 // isn't a ICU-modified-zic-created zoneinfo file. Typically this
273 // means the user is passing in a "normal" zoneinfo directory, or
274 // a zoneinfo directory that is polluted with other files, or that
275 // the user passed in the wrong directory.
278 if (strncmp(buf
, TZ_ICU_MAGIC
, 4) != 0) {
279 throw invalid_argument("TZ_ICU_MAGIC signature missing");
281 // skip additional Olson byte version
283 // if '\0', we have just one copy of data, if '2', there is additional
284 // 64 bit version at the end.
285 if(buf
[0]!=0 && buf
[0]!='2') {
286 throw invalid_argument("Bad Olson version info");
289 // Read reserved bytes. The first of these will be a version byte.
291 if (*(ICUZoneinfoVersion
*)&buf
!= TZ_ICU_VERSION
) {
292 throw invalid_argument("File version mismatch");
296 long isgmtcnt
= readcoded(file
, 0);
297 long isdstcnt
= readcoded(file
, 0);
298 long leapcnt
= readcoded(file
, 0);
299 long timecnt
= readcoded(file
, 0);
300 long typecnt
= readcoded(file
, 0);
301 long charcnt
= readcoded(file
, 0);
303 // Confirm sizes that we assume to be equal. These assumptions
304 // are drawn from a reading of the zic source (2003a), so they
305 // should hold unless the zic source changes.
306 if (isgmtcnt
!= typecnt
|| isdstcnt
!= typecnt
) {
307 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt");
310 // Used temporarily to store transition times and types. We need
311 // to do this because the times and types are stored in two
313 vector
<long> transitionTimes(timecnt
, -1); // temporary
314 vector
<int> transitionTypes(timecnt
, -1); // temporary
316 // Read transition times
317 for (i
=0; i
<timecnt
; ++i
) {
318 transitionTimes
[i
] = readcoded(file
);
321 // Read transition types
322 for (i
=0; i
<timecnt
; ++i
) {
324 file
.read((char*) &c
, 1);
326 if (t
< 0 || t
>= typecnt
) {
328 os
<< "illegal type: " << t
<< ", expected [0, " << (typecnt
-1) << "]";
329 throw out_of_range(os
.str());
331 transitionTypes
[i
] = t
;
334 // Build transitions vector out of corresponding times and types.
335 for (i
=0; i
<timecnt
; ++i
) {
336 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
339 // Read types (except for the isdst and isgmt flags, which come later (why??))
340 for (i
=0; i
<typecnt
; ++i
) {
343 type
.rawoffset
= readcoded(file
);
344 type
.dstoffset
= readcoded(file
);
345 type
.isdst
= readbool(file
);
348 file
.read((char*) &c
, 1);
351 if (type
.isdst
!= (type
.dstoffset
!= 0)) {
352 throw invalid_argument("isdst does not reflect dstoffset");
355 info
.types
.push_back(type
);
357 assert(info
.types
.size() == (unsigned) typecnt
);
359 // Read the abbreviation string
361 // All abbreviations are concatenated together, with a 0 at
362 // the end of each abbr.
363 char* str
= new char[charcnt
+ 8];
364 file
.read(str
, charcnt
);
366 // Split abbreviations apart into individual strings. Record
367 // offset of each abbr in a vector.
368 vector
<int> abbroffset
;
369 char *limit
=str
+charcnt
;
370 for (char* p
=str
; p
<limit
; ++p
) {
373 info
.abbrs
.push_back(string(start
, p
-start
));
374 abbroffset
.push_back(start
-str
);
377 // Remap all the abbrs. Old value is offset into concatenated
378 // raw abbr strings. New value is index into vector of
379 // strings. E.g., 0,5,10,14 => 0,1,2,3.
381 // Keep track of which abbreviations get used.
382 vector
<bool> abbrseen(abbroffset
.size(), false);
384 for (vector
<ZoneType
>::iterator it
=info
.types
.begin();
385 it
!=info
.types
.end();
387 vector
<int>::const_iterator x
=
388 find(abbroffset
.begin(), abbroffset
.end(), it
->abbr
);
389 if (x
==abbroffset
.end()) {
390 // TODO: Modify code to add a new string to the end of
391 // the abbr list when a middle offset is given, e.g.,
392 // "abc*def*" where * == '\0', take offset of 1 and
393 // make the array "abc", "def", "bc", and translate 1
394 // => 2. NOT CRITICAL since we don't even use the
395 // abbr at this time.
397 // TODO: Re-enable this warning if we start using
398 // the Olson abbr data, or if the above TODO is completed.
400 os
<< "Warning: unusual abbr offset " << it
->abbr
401 << ", expected one of";
402 for (vector
<int>::const_iterator y
=abbroffset
.begin();
403 y
!=abbroffset
.end(); ++y
) {
406 cerr
<< os
.str() << "; using 0" << endl
;
410 int index
= x
- abbroffset
.begin();
412 abbrseen
[index
] = true;
416 for (int ii
=0;ii
<(int) abbrseen
.size();++ii
) {
418 cerr
<< "Warning: unused abbreviation: " << ii
<< endl
;
423 // Read leap second info, if any.
424 // *** We discard leap second data. ***
425 for (i
=0; i
<leapcnt
; ++i
) {
426 readcoded(file
); // transition time
427 readcoded(file
); // total correction after above
431 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isstd
= readbool(file
);
434 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isgmt
= readbool(file
);
437 //--------------------------------------------------------------------
438 // Directory and file reading
439 //--------------------------------------------------------------------
442 * Process a single zoneinfo file, adding the data to ZONEINFO
443 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"
444 * @param id the zone ID, e.g., "America/Los_Angeles"
446 void handleFile(string path
, string id
) {
447 // Check for duplicate id
448 if (ZONEINFO
.find(id
) != ZONEINFO
.end()) {
450 os
<< "duplicate zone ID: " << id
;
451 throw invalid_argument(os
.str());
454 ifstream
file(path
.c_str(), ios::in
| ios::binary
);
456 throw invalid_argument("can't open file");
459 readzoneinfo(file
, info
);
463 throw invalid_argument("read error");
466 // Check eof-relative pos (there may be a cleaner way to do this)
467 long eofPos
= (long) file
.tellg();
470 file
.seekg(0, ios::end
);
471 eofPos
= eofPos
- (long) file
.tellg();
473 // 2006c merged 32 and 64 bit versions in a fat binary
474 // 64 version starts at the end of 32 bit version.
475 // Therefore, if the file is *not* consumed, check
476 // if it is maybe being restarted.
477 if (strncmp(buf
, TZ_ICU_MAGIC
, 4) != 0) {
479 os
<< (-eofPos
) << " unprocessed bytes at end";
480 throw invalid_argument(os
.str());
488 * Recursively scan the given directory, calling handleFile() for each
489 * file in the tree. The user should call with the root directory and
490 * a prefix of "". The function will call itself with non-empty
495 void scandir(string dirname
, string prefix
="") {
497 WIN32_FIND_DATA FileData
;
499 // Get the first file
500 hList
= FindFirstFile((dirname
+ "\\*").c_str(), &FileData
);
501 if (hList
== INVALID_HANDLE_VALUE
) {
502 cerr
<< "Error: Invalid directory: " << dirname
<< endl
;
506 string
name(FileData
.cFileName
);
507 string
path(dirname
+ "\\" + name
);
508 if (FileData
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
) {
509 if (name
!= "." && name
!= "..") {
510 scandir(path
, prefix
+ name
+ "/");
514 string id
= prefix
+ name
;
515 handleFile(path
, id
);
516 } catch (const exception
& e
) {
517 cerr
<< "Error: While processing \"" << path
<< "\", "
523 if (!FindNextFile(hList
, &FileData
)) {
524 if (GetLastError() == ERROR_NO_MORE_FILES
) {
534 void scandir(string dir
, string prefix
="") {
536 struct dirent
*dir_entry
;
537 struct stat stat_info
;
539 vector
<string
> subdirs
;
540 vector
<string
> subfiles
;
542 if ((dp
= opendir(dir
.c_str())) == NULL
) {
543 cerr
<< "Error: Invalid directory: " << dir
<< endl
;
546 if (!getcwd(pwd
, sizeof(pwd
))) {
547 cerr
<< "Error: Directory name too long" << endl
;
551 while ((dir_entry
= readdir(dp
)) != NULL
) {
552 string name
= dir_entry
->d_name
;
553 string path
= dir
+ "/" + name
;
554 lstat(dir_entry
->d_name
,&stat_info
);
555 if (S_ISDIR(stat_info
.st_mode
)) {
556 if (name
!= "." && name
!= "..") {
557 subdirs
.push_back(path
);
558 subdirs
.push_back(prefix
+ name
+ "/");
559 // scandir(path, prefix + name + "/");
563 string id
= prefix
+ name
;
564 subfiles
.push_back(path
);
565 subfiles
.push_back(id
);
566 // handleFile(path, id);
567 } catch (const exception
& e
) {
568 cerr
<< "Error: While processing \"" << path
<< "\", "
577 for(int i
=0;i
<(int)subfiles
.size();i
+=2) {
579 handleFile(subfiles
[i
], subfiles
[i
+1]);
580 } catch (const exception
& e
) {
581 cerr
<< "Error: While processing \"" << subfiles
[i
] << "\", "
586 for(int i
=0;i
<(int)subdirs
.size();i
+=2) {
587 scandir(subdirs
[i
], subdirs
[i
+1]);
593 //--------------------------------------------------------------------
594 // Final zone and rule info
595 //--------------------------------------------------------------------
598 * Read and discard the current line.
600 void consumeLine(istream
& in
) {
604 } while (c
!= EOF
&& c
!= '\n');
613 const char* TIME_MODE
[] = {"w", "s", "u"};
615 // Allow 29 days in February because zic outputs February 29
616 // for rules like "last Sunday in February".
617 const int MONTH_LEN
[] = {31,29,31,30,31,30,31,31,30,31,30,31};
619 const int HOUR
= 3600;
622 int offset
; // raw offset
623 int year
; // takes effect for y >= year
626 FinalZone(int _offset
, int _year
, const string
& _ruleid
) :
627 offset(_offset
), year(_year
), ruleid(_ruleid
) {
628 if (offset
<= -16*HOUR
|| offset
>= 16*HOUR
) {
630 os
<< "Invalid input offset " << offset
631 << " for year " << year
632 << " and rule ID " << ruleid
;
633 throw invalid_argument(os
.str());
635 if (year
< 1900 || year
>= 2050) {
637 os
<< "Invalid input year " << year
638 << " with offset " << offset
639 << " and rule ID " << ruleid
;
640 throw invalid_argument(os
.str());
643 FinalZone() : offset(-1), year(-1) {}
644 void addLink(const string
& alias
) {
645 if (aliases
.find(alias
) != aliases
.end()) {
647 os
<< "Duplicate alias " << alias
;
648 throw invalid_argument(os
.str());
650 aliases
.insert(alias
);
654 struct FinalRulePart
{
660 int offset
; // dst offset, usually either 0 or 1:00
662 // Isstd and isgmt only have 3 valid states, corresponding to local
663 // wall time, local standard time, and GMT standard time.
664 // Here is how the isstd & isgmt flags are set by zic:
665 //| case 's': /* Standard */
666 //| rp->r_todisstd = TRUE;
667 //| rp->r_todisgmt = FALSE;
668 //| case 'w': /* Wall */
669 //| rp->r_todisstd = FALSE;
670 //| rp->r_todisgmt = FALSE;
671 //| case 'g': /* Greenwich */
672 //| case 'u': /* Universal */
673 //| case 'z': /* Zulu */
674 //| rp->r_todisstd = TRUE;
675 //| rp->r_todisgmt = TRUE;
679 bool isset
; // used during building; later ignored
681 FinalRulePart() : isset(false) {}
682 void set(const string
& id
,
692 throw invalid_argument("FinalRulePart set twice");
695 if (_mode
== "DOWLEQ") {
697 } else if (_mode
== "DOWGEQ") {
699 } else if (_mode
== "DOM") {
702 throw invalid_argument("Unrecognized FinalRulePart mode");
713 if (month
< 0 || month
>= 12) {
714 os
<< "Invalid input month " << month
;
716 if (dom
< 1 || dom
> MONTH_LEN
[month
]) {
717 os
<< "Invalid input day of month " << dom
;
719 if (mode
!= DOM
&& (dow
< 0 || dow
>= 7)) {
720 os
<< "Invalid input day of week " << dow
;
722 if (offset
< 0 || offset
> HOUR
) {
723 os
<< "Invalid input offset " << offset
;
725 if (isgmt
&& !isstd
) {
726 os
<< "Invalid input isgmt && !isstd";
728 if (!os
.str().empty()) {
732 << month
<< dom
<< dow
<< time
735 throw invalid_argument(os
.str());
740 * Return the time mode as an ICU SimpleTimeZone int from 0..2;
743 int timemode() const {
746 return 2; // gmt standard
749 return 1; // local standard
751 return 0; // local wall
754 // The SimpleTimeZone encoding method for rules is as follows:
757 // DOWGEQ: dom -(dow+1)
758 // DOWLEQ: -dom -(dow+1)
759 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2
760 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2
761 // to encode 7, use stz_dowim=7, stz_dow=0
762 // Note that for this program and for SimpleTimeZone, 0==Jan,
763 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.
766 * Return a "dowim" param suitable for SimpleTimeZone.
768 int stz_dowim() const {
769 return (mode
== DOWLEQ
) ? -dom
: dom
;
773 * Return a "dow" param suitable for SimpleTimeZone.
775 int stz_dow() const {
776 return (mode
== DOM
) ? 0 : -(dow
+1);
781 FinalRulePart part
[2];
784 return part
[0].isset
&& part
[1].isset
;
787 void print(ostream
& os
) const;
790 map
<string
,FinalZone
> finalZones
;
791 map
<string
,FinalRule
> finalRules
;
793 map
<string
, set
<string
> > links
;
794 map
<string
, string
> reverseLinks
;
795 map
<string
, string
> linkSource
; // id => "Olson link" or "ICU alias"
798 * Predicate used to find FinalRule objects that do not have both
799 * sub-parts set (indicating an error in the input file).
801 bool isNotSet(const pair
<const string
,FinalRule
>& p
) {
802 return !p
.second
.isset();
806 * Predicate used to find FinalZone objects that do not map to a known
807 * rule (indicating an error in the input file).
809 bool mapsToUnknownRule(const pair
<const string
,FinalZone
>& p
) {
810 return finalRules
.find(p
.second
.ruleid
) == finalRules
.end();
814 * This set is used to make sure each rule in finalRules is used at
815 * least once. First we populate it with all the rules from
816 * finalRules; then we remove all the rules referred to in
819 set
<string
> ruleIDset
;
821 void insertRuleID(const pair
<string
,FinalRule
>& p
) {
822 ruleIDset
.insert(p
.first
);
825 void eraseRuleID(const pair
<string
,FinalZone
>& p
) {
826 ruleIDset
.erase(p
.second
.ruleid
);
830 * Populate finalZones and finalRules from the given istream.
832 void readFinalZonesAndRules(istream
& in
) {
837 if (in
.eof() || !in
) {
839 } else if (token
== "zone") {
840 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0)
843 in
>> id
>> offset
>> year
>> ruleid
;
845 finalZones
[id
] = FinalZone(offset
, year
, ruleid
);
846 } else if (token
== "rule") {
847 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600
848 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0
850 int month
, dom
, dow
, time
, offset
;
852 in
>> id
>> mode
>> month
>> dom
>> dow
>> time
>> isstd
>> isgmt
>> offset
;
854 FinalRule
& fr
= finalRules
[id
];
855 int p
= fr
.part
[0].isset
? 1 : 0;
856 fr
.part
[p
].set(id
, mode
, month
, dom
, dow
, time
, isstd
, isgmt
, offset
);
857 } else if (token
== "link") {
858 string fromid
, toid
; // fromid == "real" zone, toid == alias
859 in
>> fromid
>> toid
;
860 // DO NOT consumeLine(in);
861 if (finalZones
.find(toid
) != finalZones
.end()) {
862 throw invalid_argument("Bad link: `to' id is a \"real\" zone");
865 links
[fromid
].insert(toid
);
866 reverseLinks
[toid
] = fromid
;
868 linkSource
[fromid
] = "Olson link";
869 linkSource
[toid
] = "Olson link";
870 } else if (token
.length() > 0 && token
[0] == '#') {
873 throw invalid_argument("Unrecognized keyword");
877 if (!in
.eof() && !in
) {
878 throw invalid_argument("Parse failure");
881 // Perform validity check: Each rule should have data for 2 parts.
882 if (count_if(finalRules
.begin(), finalRules
.end(), isNotSet
) != 0) {
883 throw invalid_argument("One or more incomplete rule pairs");
886 // Perform validity check: Each zone should map to a known rule.
887 if (count_if(finalZones
.begin(), finalZones
.end(), mapsToUnknownRule
) != 0) {
888 throw invalid_argument("One or more zones refers to an unknown rule");
891 // Perform validity check: Each rule should be referred to by a zone.
893 for_each(finalRules
.begin(), finalRules
.end(), insertRuleID
);
894 for_each(finalZones
.begin(), finalZones
.end(), eraseRuleID
);
895 if (ruleIDset
.size() != 0) {
896 throw invalid_argument("Unused rules");
900 //--------------------------------------------------------------------
901 // Resource bundle output
902 //--------------------------------------------------------------------
904 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT
906 void ZoneInfo::print(ostream
& os
, const string
& id
) const {
907 // Implement compressed format #2:
909 os
<< " /* " << id
<< " */ ";
912 assert(aliases
.size() == 0);
913 os
<< ":int { " << aliasTo
<< " } "; // No endl - save room for comment.
917 os
<< ":array {" << endl
;
919 vector
<Transition
>::const_iterator trn
;
920 vector
<ZoneType
>::const_iterator typ
;
923 os
<< " :intvector { ";
924 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
925 if (!first
) os
<< ", ";
932 os
<< " :intvector { ";
933 for (typ
= types
.begin(); typ
!= types
.end(); ++typ
) {
934 if (!first
) os
<< ", ";
936 os
<< typ
->rawoffset
<< ", " << typ
->dstoffset
;
940 os
<< " :bin { \"" << hex
<< setfill('0');
941 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
942 os
<< setw(2) << trn
->type
;
944 os
<< dec
<< "\" }" << endl
;
946 // Final zone info, if any
947 if (finalYear
!= -1) {
948 os
<< " \"" << finalRuleID
<< "\"" << endl
;
949 os
<< " :intvector { " << finalOffset
<< ", "
950 << finalYear
<< " }" << endl
;
953 // Alias list, if any
954 if (aliases
.size() != 0) {
956 os
<< " :intvector { ";
957 for (set
<int>::const_iterator i
=aliases
.begin(); i
!=aliases
.end(); ++i
) {
958 if (!first
) os
<< ", ";
965 os
<< " } "; // no trailing 'endl', so comments can be placed.
969 operator<<(ostream
& os
, const ZoneMap
& zoneinfo
) {
971 for (ZoneMapIter it
= zoneinfo
.begin();
972 it
!= zoneinfo
.end();
975 it
->second
.print(os
, it
->first
);
976 os
<< "//Z#" << c
++ << endl
;
981 // print the string list
982 ostream
& printStringList( ostream
& os
, const ZoneMap
& zoneinfo
) {
984 int col
= 0; // column
985 os
<< " Names {" << endl
987 for (ZoneMapIter it
= zoneinfo
.begin();
988 it
!= zoneinfo
.end();
994 const string
& id
= it
->first
;
995 os
<< "\"" << id
<< "\"";
996 col
+= id
.length() + 2;
998 os
<< " // " << n
<< endl
1004 os
<< " // " << (n
-1) << endl
1010 //--------------------------------------------------------------------
1012 //--------------------------------------------------------------------
1014 // Unary predicate for finding transitions after a given time
1015 bool isAfter(const Transition t
, long thresh
) {
1016 return t
.time
>= thresh
;
1020 * A zone type that contains only the raw and dst offset. Used by the
1021 * optimizeTypeList() method.
1023 struct SimplifiedZoneType
{
1026 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}
1027 SimplifiedZoneType(const ZoneType
& t
) : rawoffset(t
.rawoffset
),
1028 dstoffset(t
.dstoffset
) {}
1029 bool operator<(const SimplifiedZoneType
& t
) const {
1030 return rawoffset
< t
.rawoffset
||
1031 (rawoffset
== t
.rawoffset
&&
1032 dstoffset
< t
.dstoffset
);
1037 * Construct a ZoneType from a SimplifiedZoneType. Note that this
1038 * discards information; the new ZoneType will have meaningless
1039 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,
1040 * since ignoring these is how we do optimization (we have no use for
1041 * these in historical transitions).
1043 ZoneType::ZoneType(const SimplifiedZoneType
& t
) :
1044 rawoffset(t
.rawoffset
), dstoffset(t
.dstoffset
),
1045 abbr(-1), isdst(false), isstd(false), isgmt(false) {}
1048 * Optimize the type list to remove excess entries. The type list may
1049 * contain entries that are distinct only in terms of their dst, std,
1050 * or gmt flags. Since we don't care about those flags, we can reduce
1051 * the type list to a set of unique raw/dst offset pairs, and remap
1052 * the type indices in the transition list, which stores, for each
1053 * transition, a transition time and a type index.
1055 void ZoneInfo::optimizeTypeList() {
1056 // Assemble set of unique types; only those in the `transitions'
1057 // list, since there may be unused types in the `types' list
1058 // corresponding to transitions that have been trimmed (during
1059 // merging of final data).
1061 if (aliasTo
>= 0) return; // Nothing to do for aliases
1063 // If there are zero transitions and one type, then leave that as-is.
1064 if (transitions
.size() == 0) {
1065 if (types
.size() != 1) {
1066 cerr
<< "Error: transition count = 0, type count = " << types
.size() << endl
;
1071 set
<SimplifiedZoneType
> simpleset
;
1072 for (vector
<Transition
>::const_iterator i
=transitions
.begin();
1073 i
!=transitions
.end(); ++i
) {
1074 assert(i
->type
< (int)types
.size());
1075 simpleset
.insert(types
[i
->type
]);
1078 // Map types to integer indices
1079 map
<SimplifiedZoneType
,int> simplemap
;
1081 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin();
1082 i
!=simpleset
.end(); ++i
) {
1083 simplemap
[*i
] = n
++;
1086 // Remap transitions
1087 for (vector
<Transition
>::iterator i
=transitions
.begin();
1088 i
!=transitions
.end(); ++i
) {
1089 assert(i
->type
< (int)types
.size());
1090 ZoneType oldtype
= types
[i
->type
];
1091 SimplifiedZoneType
newtype(oldtype
);
1092 assert(simplemap
.find(newtype
) != simplemap
.end());
1093 i
->type
= simplemap
[newtype
];
1096 // Replace type list
1098 copy(simpleset
.begin(), simpleset
.end(), back_inserter(types
));
1102 * Merge final zone data into this zone.
1104 void ZoneInfo::mergeFinalData(const FinalZone
& fz
) {
1106 long seconds
= yearToSeconds(year
);
1107 vector
<Transition
>::iterator it
=
1108 find_if(transitions
.begin(), transitions
.end(),
1109 bind2nd(ptr_fun(isAfter
), seconds
));
1110 transitions
.erase(it
, transitions
.end());
1112 if (finalYear
!= -1) {
1113 throw invalid_argument("Final zone already merged in");
1115 finalYear
= fz
.year
;
1116 finalOffset
= fz
.offset
;
1117 finalRuleID
= fz
.ruleid
;
1121 * Merge the data from the given final zone into the core zone data by
1122 * calling the ZoneInfo member function mergeFinalData.
1124 void mergeOne(const string
& zoneid
, const FinalZone
& fz
) {
1125 if (ZONEINFO
.find(zoneid
) == ZONEINFO
.end()) {
1126 throw invalid_argument("Unrecognized final zone ID");
1128 ZONEINFO
[zoneid
].mergeFinalData(fz
);
1132 * Visitor function that merges the final zone data into the main zone
1133 * data structures. It calls mergeOne for each final zone and its
1136 void mergeFinalZone(const pair
<string
,FinalZone
>& p
) {
1137 const string
& id
= p
.first
;
1138 const FinalZone
& fz
= p
.second
;
1144 * Print this rule in resource bundle format to os. ID and enclosing
1145 * braces handled elsewhere.
1147 void FinalRule::print(ostream
& os
) const {
1148 // First print the rule part that enters DST; then the rule part
1150 int whichpart
= (part
[0].offset
!= 0) ? 0 : 1;
1151 assert(part
[whichpart
].offset
!= 0);
1152 assert(part
[1-whichpart
].offset
== 0);
1155 for (int i
=0; i
<2; ++i
) {
1156 const FinalRulePart
& p
= part
[whichpart
];
1157 whichpart
= 1-whichpart
;
1158 os
<< p
.month
<< ", " << p
.stz_dowim() << ", " << p
.stz_dow() << ", "
1159 << p
.time
<< ", " << p
.timemode() << ", ";
1161 os
<< part
[whichpart
].offset
<< endl
;
1164 int main(int argc
, char *argv
[]) {
1165 string rootpath
, zonetab
, version
;
1168 cout
<< "Usage: tz2icu <dir> <cmap> <vers>" << endl
1169 << " <dir> path to zoneinfo file tree generated by" << endl
1170 << " ICU-patched version of zic" << endl
1171 << " <cmap> country map, from tzdata archive," << endl
1172 << " typically named \"zone.tab\"" << endl
1173 << " <vers> version string, such as \"2003e\"" << endl
;
1181 cout
<< "Olson data version: " << version
<< endl
;
1184 ifstream
finals(ICU_ZONE_FILE
);
1186 readFinalZonesAndRules(finals
);
1188 cout
<< "Finished reading " << finalZones
.size()
1189 << " final zones and " << finalRules
.size()
1190 << " final rules from " ICU_ZONE_FILE
<< endl
;
1192 cerr
<< "Error: Unable to open " ICU_ZONE_FILE
<< endl
;
1195 } catch (const exception
& error
) {
1196 cerr
<< "Error: While reading " ICU_ZONE_FILE
": " << error
.what() << endl
;
1200 // Read the legacy alias list and process it. Treat the legacy mappings
1201 // like links, but also record them in the "legacy" hash.
1203 ifstream
aliases(ICU_TZ_ALIAS
);
1205 cerr
<< "Error: Unable to open " ICU_TZ_ALIAS
<< endl
;
1210 while (getline(aliases
, line
)) {
1211 string::size_type lb
= line
.find('#');
1212 if (lb
!= string::npos
) {
1213 line
.resize(lb
); // trim comments
1216 istringstream
is(line
);
1217 copy(istream_iterator
<string
>(is
),istream_iterator
<string
>(),
1219 if (a
.size() == 0) continue; // blank line
1220 if (a
.size() != 2) {
1221 cerr
<< "Error: Can't parse \"" << line
<< "\" in "
1222 ICU_TZ_ALIAS
<< endl
;
1227 string
alias(a
[0]), olson(a
[1]);
1228 if (links
.find(alias
) != links
.end()) {
1229 cerr
<< "Error: Alias \"" << alias
1230 << "\" is an Olson zone in "
1231 ICU_TZ_ALIAS
<< endl
;
1234 if (reverseLinks
.find(alias
) != reverseLinks
.end()) {
1235 cerr
<< "Error: Alias \"" << alias
1236 << "\" is an Olson link to \"" << reverseLinks
[olson
]
1237 << "\" in " << ICU_TZ_ALIAS
<< endl
;
1241 // Record source for error reporting
1242 if (linkSource
.find(olson
) == linkSource
.end()) {
1243 linkSource
[olson
] = "ICU alias";
1245 assert(linkSource
.find(alias
) == linkSource
.end());
1246 linkSource
[alias
] = "ICU alias";
1248 links
[olson
].insert(alias
);
1249 reverseLinks
[alias
] = olson
;
1251 cout
<< "Finished reading " << n
1252 << " aliases from " ICU_TZ_ALIAS
<< endl
;
1253 } catch (const exception
& error
) {
1254 cerr
<< "Error: While reading " ICU_TZ_ALIAS
": " << error
.what() << endl
;
1259 // Recursively scan all files below the given path, accumulating
1260 // their data into ZONEINFO. All files must be TZif files. Any
1261 // failure along the way will result in a call to exit(1).
1263 } catch (const exception
& error
) {
1264 cerr
<< "Error: While scanning " << rootpath
<< ": " << error
.what() << endl
;
1268 cout
<< "Finished reading " << ZONEINFO
.size() << " zoneinfo files ["
1269 << (ZONEINFO
.begin())->first
<< ".."
1270 << (--ZONEINFO
.end())->first
<< "]" << endl
;
1273 for_each(finalZones
.begin(), finalZones
.end(), mergeFinalZone
);
1274 } catch (const exception
& error
) {
1275 cerr
<< "Error: While merging final zone data: " << error
.what() << endl
;
1279 // Process links (including ICU aliases). For each link set we have
1280 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more
1281 // aliases (e.g., PST, PST8PDT, ...).
1283 // 1. Add all aliases as zone objects in ZONEINFO
1284 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1285 i
!=links
.end(); ++i
) {
1286 const string
& olson
= i
->first
;
1287 const set
<string
>& aliases
= i
->second
;
1288 if (ZONEINFO
.find(olson
) == ZONEINFO
.end()) {
1289 cerr
<< "Error: Invalid " << linkSource
[olson
] << " to non-existent \""
1290 << olson
<< "\"" << endl
;
1293 for (set
<string
>::const_iterator j
=aliases
.begin();
1294 j
!=aliases
.end(); ++j
) {
1295 ZONEINFO
[*j
] = ZoneInfo();
1299 // 2. Create a mapping from zones to index numbers 0..n-1.
1300 map
<string
,int> zoneIDs
;
1301 vector
<string
> zoneIDlist
;
1303 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1304 zoneIDs
[i
->first
] = z
++;
1305 zoneIDlist
.push_back(i
->first
);
1307 assert(z
== (int) ZONEINFO
.size());
1309 // 3. Merge aliases. Sometimes aliases link to other aliases; we
1310 // resolve these into simplest possible sets.
1311 map
<string
,set
<string
> > links2
;
1312 map
<string
,string
> reverse2
;
1313 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1314 i
!=links
.end(); ++i
) {
1315 string olson
= i
->first
;
1316 while (reverseLinks
.find(olson
) != reverseLinks
.end()) {
1317 olson
= reverseLinks
[olson
];
1319 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1320 links2
[olson
].insert(*j
);
1321 reverse2
[*j
] = olson
;
1325 reverseLinks
= reverse2
;
1327 if (false) { // Debugging: Emit link map
1328 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1329 i
!=links
.end(); ++i
) {
1330 cout
<< i
->first
<< ": ";
1331 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1338 // 4. Update aliases
1339 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1340 i
!=links
.end(); ++i
) {
1341 const string
& olson
= i
->first
;
1342 const set
<string
>& aliases
= i
->second
;
1343 ZONEINFO
[olson
].clearAliases();
1344 ZONEINFO
[olson
].addAlias(zoneIDs
[olson
]);
1345 for (set
<string
>::const_iterator j
=aliases
.begin();
1346 j
!=aliases
.end(); ++j
) {
1347 assert(zoneIDs
.find(olson
) != zoneIDs
.end());
1348 assert(zoneIDs
.find(*j
) != zoneIDs
.end());
1349 assert(ZONEINFO
.find(*j
) != ZONEINFO
.end());
1350 ZONEINFO
[*j
].setAliasTo(zoneIDs
[olson
]);
1351 ZONEINFO
[olson
].addAlias(zoneIDs
[*j
]);
1355 // Once merging of final data is complete, we can optimize the type list
1356 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1357 i
->second
.optimizeTypeList();
1360 // Create the country map
1361 map
<string
, set
<string
> > countryMap
; // country -> set of zones
1362 map
<string
, string
> reverseCountryMap
; // zone -> country
1364 ifstream
f(zonetab
.c_str());
1366 cerr
<< "Error: Unable to open " << zonetab
<< endl
;
1371 while (getline(f
, line
)) {
1372 string::size_type lb
= line
.find('#');
1373 if (lb
!= string::npos
) {
1374 line
.resize(lb
); // trim comments
1376 string country
, coord
, zone
;
1377 istringstream
is(line
);
1378 is
>> country
>> coord
>> zone
;
1379 if (country
.size() == 0) continue;
1380 if (country
.size() != 2 || zone
.size() < 1) {
1381 cerr
<< "Error: Can't parse " << line
<< " in " << zonetab
<< endl
;
1384 if (ZONEINFO
.find(zone
) == ZONEINFO
.end()) {
1385 cerr
<< "Error: Country maps to invalid zone " << zone
1386 << " in " << zonetab
<< endl
;
1389 countryMap
[country
].insert(zone
);
1390 reverseCountryMap
[zone
] = country
;
1391 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;
1394 cout
<< "Finished reading " << n
1395 << " country entries from " << zonetab
<< endl
;
1396 } catch (const exception
& error
) {
1397 cerr
<< "Error: While reading " << zonetab
<< ": " << error
.what() << endl
;
1401 // Merge ICU aliases into country map. Don't merge any alias
1402 // that already has a country map, since that doesn't make sense.
1403 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we
1404 // should cross-map the countries between these two zones.
1405 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1406 i
!=links
.end(); ++i
) {
1407 const string
& olson(i
->first
);
1408 if (reverseCountryMap
.find(olson
) == reverseCountryMap
.end()) {
1411 string c
= reverseCountryMap
[olson
];
1412 const set
<string
>& aliases(i
->second
);
1413 for (set
<string
>::const_iterator j
=aliases
.begin();
1414 j
!= aliases
.end(); ++j
) {
1415 if (reverseCountryMap
.find(*j
) == reverseCountryMap
.end()) {
1416 countryMap
[c
].insert(*j
);
1417 reverseCountryMap
[*j
] = c
;
1418 //cerr << "Aliased country: " << c << " <=> " << *j << endl;
1423 // Create a pseudo-country containing all zones belonging to no country
1424 set
<string
> nocountry
;
1425 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1426 if (reverseCountryMap
.find(i
->first
) == reverseCountryMap
.end()) {
1427 nocountry
.insert(i
->first
);
1430 countryMap
[""] = nocountry
;
1432 // Get local time & year for below
1435 struct tm
* now
= localtime(&sec
);
1436 int thisYear
= now
->tm_year
+ 1900;
1438 // Write out a resource-bundle source file containing data for
1440 ofstream
file(ICU_TZ_RESOURCE
".txt");
1442 file
<< "//---------------------------------------------------------" << endl
1443 << "// Copyright (C) 2003";
1444 if (thisYear
> 2003) {
1445 file
<< "-" << thisYear
;
1447 file
<< ", International Business Machines" << endl
1448 << "// Corporation and others. All Rights Reserved." << endl
1449 << "//---------------------------------------------------------" << endl
1450 << "// Build tool: tz2icu" << endl
1451 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1452 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1453 << "// Olson version: " << version
<< endl
1454 << "//---------------------------------------------------------" << endl
1455 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1456 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1457 << "//---------------------------------------------------------" << endl
1459 << ICU_TZ_RESOURCE
" {" << endl
1460 << " Zones:array { " << endl
1461 << ZONEINFO
// Zones (the actual data)
1464 // Names correspond to the Zones list, used for binary searching.
1465 printStringList ( file
, ZONEINFO
); // print the Names list
1467 // Final Rules are used if requested by the zone
1468 file
<< " Rules { " << endl
;
1471 for(map
<string
,FinalRule
>::iterator i
=finalRules
.begin();
1472 i
!=finalRules
.end(); ++i
) {
1473 const string
& id
= i
->first
;
1474 const FinalRule
& r
= i
->second
;
1475 file
<< " " << id
<< ":intvector {" << endl
;
1477 file
<< " } //_#" << frc
++ << endl
;
1479 file
<< " }" << endl
;
1481 // Emit country (region) map. Emitting the string zone IDs results
1482 // in a 188 kb binary resource; emitting the zone index numbers
1483 // trims this to 171 kb. More work for the runtime code, but
1484 // a smaller data footprint.
1485 file
<< " Regions { " << endl
;
1487 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1488 i
!= countryMap
.end(); ++i
) {
1489 string country
= i
->first
;
1490 const set
<string
>& zones(i
->second
);
1495 file
<< country
<< ":intvector { ";
1497 for (set
<string
>::const_iterator j
=zones
.begin();
1498 j
!= zones
.end(); ++j
) {
1499 if (!first
) file
<< ", ";
1501 if (zoneIDs
.find(*j
) == zoneIDs
.end()) {
1502 cerr
<< "Error: Nonexistent zone in country map: " << *j
<< endl
;
1505 file
<< zoneIDs
[*j
]; // emit the zone's index number
1507 file
<< " } //R#" << rc
++ << endl
;
1509 file
<< " }" << endl
;
1511 file
<< "}" << endl
;
1516 if (file
) { // recheck error bit
1517 cout
<< "Finished writing " ICU_TZ_RESOURCE
".txt" << endl
;
1519 cerr
<< "Error: Unable to open/write to " ICU_TZ_RESOURCE
".txt" << endl
;
1523 #define ICU4J_TZ_CLASS "ZoneMetaData"
1525 // Write out a Java source file containing only a few pieces of
1526 // meta-data missing from the core JDK: the equivalency lists and
1528 ofstream
java(ICU4J_TZ_CLASS
".java");
1530 java
<< "//---------------------------------------------------------" << endl
1531 << "// Copyright (C) 2003";
1532 if (thisYear
> 2003) {
1533 java
<< "-" << thisYear
;
1535 java
<< ", International Business Machines" << endl
1536 << "// Corporation and others. All Rights Reserved." << endl
1537 << "//---------------------------------------------------------" << endl
1538 << "// Build tool: tz2icu" << endl
1539 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1540 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1541 << "// Olson version: " << version
<< endl
1542 << "// ICU version: " << U_ICU_VERSION
<< endl
1543 << "//---------------------------------------------------------" << endl
1544 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1545 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1546 << "//---------------------------------------------------------" << endl
1548 << "package com.ibm.icu.impl;" << endl
1550 << "public final class " ICU4J_TZ_CLASS
" {" << endl
;
1552 // Emit equivalency lists
1554 java
<< " public static final String VERSION = \"" + version
+ "\";" << endl
;
1555 java
<< " public static final String[][] EQUIV = {" << endl
;
1556 for (ZoneMap::const_iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1557 if (i
->second
.isAlias() || i
->second
.getAliases().size() == 0) {
1560 if (!first1
) java
<< "," << endl
;
1562 // The ID of this zone (the canonical zone, to which the
1563 // aliases point) will be sorted into the list, so it
1564 // won't be at position 0. If we want to know which is
1565 // the canonical zone, we should move it to position 0.
1568 const set
<int>& s
= i
->second
.getAliases();
1569 for (set
<int>::const_iterator j
=s
.begin(); j
!=s
.end(); ++j
) {
1570 if (!first2
) java
<< ", ";
1571 java
<< '"' << zoneIDlist
[*j
] << '"';
1579 // Emit country map.
1581 java
<< " public static final String[][] COUNTRY = {" << endl
;
1582 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1583 i
!= countryMap
.end(); ++i
) {
1584 if (!first1
) java
<< "," << endl
;
1586 string country
= i
->first
;
1587 const set
<string
>& zones(i
->second
);
1588 java
<< " { \"" << country
<< '"';
1589 for (set
<string
>::const_iterator j
=zones
.begin();
1590 j
!= zones
.end(); ++j
) {
1591 java
<< ", \"" << *j
<< '"';
1598 java
<< "}" << endl
;
1603 if (java
) { // recheck error bit
1604 cout
<< "Finished writing " ICU4J_TZ_CLASS
".java" << endl
;
1606 cerr
<< "Error: Unable to open/write to " ICU4J_TZ_CLASS
".java" << endl
;