1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2003-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
9 * Created: July 10 2003
11 **********************************************************************
13 #include "tzfile.h" // from Olson tzcode archive, copied to this dir
18 #undef min // windows.h/STL conflict
19 #undef max // windows.h/STL conflict
20 // "identifier was truncated to 'number' characters" warning
21 #pragma warning(disable: 4786)
50 #include "unicode/uversion.h"
54 bool ICU44PLUS
= TRUE
;
55 string TZ_RESOURCE_NAME
= ICU_TZ_RESOURCE
;
57 //--------------------------------------------------------------------
59 //--------------------------------------------------------------------
61 const int64_t SECS_PER_YEAR
= 31536000; // 365 days
62 const int64_t SECS_PER_LEAP_YEAR
= 31622400; // 366 days
63 const int64_t LOWEST_TIME32
= (int64_t)((int32_t)0x80000000);
64 const int64_t HIGHEST_TIME32
= (int64_t)((int32_t)0x7fffffff);
66 bool isLeap(int32_t y
) {
67 return (y%4
== 0) && ((y%100
!= 0) || (y%400
== 0)); // Gregorian
70 int64_t secsPerYear(int32_t y
) {
71 return isLeap(y
) ? SECS_PER_LEAP_YEAR
: SECS_PER_YEAR
;
75 * Given a calendar year, return the GMT epoch seconds for midnight
76 * GMT of January 1 of that year. yearToSeconds(1970) == 0.
78 int64_t yearToSeconds(int32_t year
) {
79 // inefficient but foolproof
83 s
+= secsPerYear(y
++);
86 s
-= secsPerYear(--y
);
92 * Given 1970 GMT epoch seconds, return the calendar year containing
93 * that time. secondsToYear(0) == 1970.
95 int32_t secondsToYear(int64_t seconds
) {
96 // inefficient but foolproof
101 s
+= secsPerYear(y
++);
102 if (s
> seconds
) break;
107 s
-= secsPerYear(--y
);
108 if (s
<= seconds
) break;
114 //--------------------------------------------------------------------
116 //--------------------------------------------------------------------
120 struct SimplifiedZoneType
;
122 // A transition from one ZoneType to another
123 // Minimal size = 5 bytes (4+1)
125 int64_t time
; // seconds, 1970 epoch
126 int32_t type
; // index into 'ZoneInfo.types' 0..255
127 Transition(int64_t _time
, int32_t _type
) {
133 // A behavior mode (what zic calls a 'type') of a time zone.
134 // Minimal size = 6 bytes (4+1+3bits)
135 // SEE: SimplifiedZoneType
137 int64_t rawoffset
; // raw seconds offset from GMT
138 int64_t dstoffset
; // dst seconds offset from GMT
140 // We don't really need any of the following, but they are
141 // retained for possible future use. See SimplifiedZoneType.
142 int32_t abbr
; // index into ZoneInfo.abbrs 0..n-1
147 ZoneType(const SimplifiedZoneType
&); // used by optimizeTypeList
149 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}
151 // A restricted equality, of just the raw and dst offset
152 bool matches(const ZoneType
& other
) {
153 return rawoffset
== other
.rawoffset
&&
154 dstoffset
== other
.dstoffset
;
158 // A collection of transitions from one ZoneType to another, together
159 // with a list of the ZoneTypes. A ZoneInfo object may have a long
160 // list of transitions between a smaller list of ZoneTypes.
162 // This object represents the contents of a single zic-created
165 vector
<Transition
> transitions
;
166 vector
<ZoneType
> types
;
167 vector
<string
> abbrs
;
171 int32_t finalYear
; // -1 if none
173 // If this is an alias, then all other fields are meaningless, and
174 // this field will point to the "real" zone 0..n-1.
175 int32_t aliasTo
; // -1 if this is a "real" zone
177 // If there are aliases TO this zone, then the following set will
178 // contain their index numbers (each index >= 0).
179 set
<int32_t> aliases
;
181 ZoneInfo() : finalYear(-1), aliasTo(-1) {}
183 void mergeFinalData(const FinalZone
& fz
);
185 void optimizeTypeList();
187 // Set this zone to be an alias TO another zone.
188 void setAliasTo(int32_t index
);
190 // Clear the list of aliases OF this zone.
193 // Add an alias to the list of aliases OF this zone.
194 void addAlias(int32_t index
);
196 // Is this an alias to another zone?
197 bool isAlias() const {
201 // Retrieve alias list
202 const set
<int32_t>& getAliases() const {
206 void print(ostream
& os
, const string
& id
) const;
209 void ZoneInfo::clearAliases() {
214 void ZoneInfo::addAlias(int32_t index
) {
215 assert(aliasTo
< 0 && index
>= 0 && aliases
.find(index
) == aliases
.end());
216 aliases
.insert(index
);
219 void ZoneInfo::setAliasTo(int32_t index
) {
221 assert(aliases
.size() == 0);
225 typedef map
<string
, ZoneInfo
> ZoneMap
;
227 typedef ZoneMap::const_iterator ZoneMapIter
;
229 //--------------------------------------------------------------------
231 //--------------------------------------------------------------------
233 // Global map holding all our ZoneInfo objects, indexed by id.
236 //--------------------------------------------------------------------
237 // zoneinfo file parsing
238 //--------------------------------------------------------------------
240 // Read zic-coded 32-bit integer from file
241 int64_t readcoded(ifstream
& file
, int64_t minv
=numeric_limits
<int64_t>::min(),
242 int64_t maxv
=numeric_limits
<int64_t>::max()) {
243 unsigned char buf
[4]; // must be UNSIGNED
245 file
.read((char*)buf
, 4);
246 for(int32_t i
=0,shift
=24;i
<4;++i
,shift
-=8) {
247 val
|= buf
[i
] << shift
;
249 if (val
< minv
|| val
> maxv
) {
251 os
<< "coded value out-of-range: " << val
<< ", expected ["
252 << minv
<< ", " << maxv
<< "]";
253 throw out_of_range(os
.str());
258 // Read zic-coded 64-bit integer from file
259 int64_t readcoded64(ifstream
& file
, int64_t minv
=numeric_limits
<int64_t>::min(),
260 int64_t maxv
=numeric_limits
<int64_t>::max()) {
261 unsigned char buf
[8]; // must be UNSIGNED
263 file
.read((char*)buf
, 8);
264 for(int32_t i
=0,shift
=56;i
<8;++i
,shift
-=8) {
265 val
|= (int64_t)buf
[i
] << shift
;
267 if (val
< minv
|| val
> maxv
) {
269 os
<< "coded value out-of-range: " << val
<< ", expected ["
270 << minv
<< ", " << maxv
<< "]";
271 throw out_of_range(os
.str());
276 // Read a boolean value
277 bool readbool(ifstream
& file
) {
282 os
<< "boolean value out-of-range: " << (int32_t)c
;
283 throw out_of_range(os
.str());
289 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
290 * @param file an already-open file stream
292 void readzoneinfo(ifstream
& file
, ZoneInfo
& info
, bool is64bitData
) {
295 // Check for TZ_ICU_MAGIC signature at file start. If we get a
296 // signature mismatch, it means we're trying to read a file which
297 // isn't a ICU-modified-zic-created zoneinfo file. Typically this
298 // means the user is passing in a "normal" zoneinfo directory, or
299 // a zoneinfo directory that is polluted with other files, or that
300 // the user passed in the wrong directory.
303 if (strncmp(buf
, TZ_ICU_MAGIC
, 4) != 0) {
304 throw invalid_argument("TZ_ICU_MAGIC signature missing");
306 // skip additional Olson byte version
308 // if '\0', we have just one copy of data, if '2' or '3', there is additional
309 // 64 bit version at the end.
310 if(buf
[0]!=0 && buf
[0]!='2' && buf
[0]!='3') {
311 throw invalid_argument("Bad Olson version info");
314 // Read reserved bytes. The first of these will be a version byte.
316 if (*(ICUZoneinfoVersion
*)&buf
!= TZ_ICU_VERSION
) {
317 throw invalid_argument("File version mismatch");
321 int64_t isgmtcnt
= readcoded(file
, 0);
322 int64_t isdstcnt
= readcoded(file
, 0);
323 int64_t leapcnt
= readcoded(file
, 0);
324 int64_t timecnt
= readcoded(file
, 0);
325 int64_t typecnt
= readcoded(file
, 0);
326 int64_t charcnt
= readcoded(file
, 0);
328 // Confirm sizes that we assume to be equal. These assumptions
329 // are drawn from a reading of the zic source (2003a), so they
330 // should hold unless the zic source changes.
331 if (isgmtcnt
!= typecnt
|| isdstcnt
!= typecnt
) {
332 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt");
335 // Used temporarily to store transition times and types. We need
336 // to do this because the times and types are stored in two
338 vector
<int64_t> transitionTimes(timecnt
, -1); // temporary
339 vector
<int32_t> transitionTypes(timecnt
, -1); // temporary
341 // Read transition times
342 for (i
=0; i
<timecnt
; ++i
) {
344 transitionTimes
[i
] = readcoded64(file
);
346 transitionTimes
[i
] = readcoded(file
);
350 // Read transition types
351 for (i
=0; i
<timecnt
; ++i
) {
353 file
.read((char*) &c
, 1);
354 int32_t t
= (int32_t) c
;
355 if (t
< 0 || t
>= typecnt
) {
357 os
<< "illegal type: " << t
<< ", expected [0, " << (typecnt
-1) << "]";
358 throw out_of_range(os
.str());
360 transitionTypes
[i
] = t
;
363 // Build transitions vector out of corresponding times and types.
364 bool insertInitial
= false;
365 if (is64bitData
&& !ICU44PLUS
) {
368 for (i
=0; i
<timecnt
; ++i
) {
369 if (transitionTimes
[i
] < LOWEST_TIME32
) {
370 if (minidx
== -1 || transitionTimes
[i
] > transitionTimes
[minidx
]) {
371 // Preserve the latest transition before the 32bit minimum time
374 } else if (transitionTimes
[i
] > HIGHEST_TIME32
) {
375 // Skipping the rest of the transition data. We cannot put such
376 // transitions into zoneinfo.res, because data is limited to singed
377 // 32bit int by the ICU resource bundle.
380 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
385 // If there are any transitions before the 32bit minimum time,
386 // put the type information with the 32bit minimum time
387 vector
<Transition
>::iterator itr
= info
.transitions
.begin();
388 info
.transitions
.insert(itr
, Transition(LOWEST_TIME32
, transitionTypes
[minidx
]));
390 // Otherwise, we need insert the initial type later
391 insertInitial
= true;
395 for (i
=0; i
<timecnt
; ++i
) {
396 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
400 // Read types (except for the isdst and isgmt flags, which come later (why??))
401 for (i
=0; i
<typecnt
; ++i
) {
404 type
.rawoffset
= readcoded(file
);
405 type
.dstoffset
= readcoded(file
);
406 type
.isdst
= readbool(file
);
409 file
.read((char*) &c
, 1);
410 type
.abbr
= (int32_t) c
;
412 if (type
.isdst
!= (type
.dstoffset
!= 0)) {
413 throw invalid_argument("isdst does not reflect dstoffset");
416 info
.types
.push_back(type
);
419 assert(info
.types
.size() == (unsigned) typecnt
);
425 int32_t initialTypeIdx
= -1;
427 // Check if the first type is not dst
428 if (info
.types
.at(0).dstoffset
!= 0) {
429 // Initial type's rawoffset is same with the rawoffset after the
430 // first transition, but no DST is observed.
431 int64_t rawoffset0
= (info
.types
.at(info
.transitions
.at(0).type
)).rawoffset
;
432 // Look for matching type
433 for (i
=0; i
<(int32_t)info
.types
.size(); ++i
) {
434 if (info
.types
.at(i
).rawoffset
== rawoffset0
435 && info
.types
.at(i
).dstoffset
== 0) {
443 assert(initialTypeIdx
>= 0);
444 // Add the initial type associated with the lowest int32 time
445 vector
<Transition
>::iterator itr
= info
.transitions
.begin();
446 info
.transitions
.insert(itr
, Transition(LOWEST_TIME32
, initialTypeIdx
));
450 // Read the abbreviation string
452 // All abbreviations are concatenated together, with a 0 at
453 // the end of each abbr.
454 char* str
= new char[charcnt
+ 8];
455 file
.read(str
, charcnt
);
457 // Split abbreviations apart into individual strings. Record
458 // offset of each abbr in a vector.
459 vector
<int32_t> abbroffset
;
460 char *limit
=str
+charcnt
;
461 for (char* p
=str
; p
<limit
; ++p
) {
464 info
.abbrs
.push_back(string(start
, p
-start
));
465 abbroffset
.push_back(start
-str
);
468 // Remap all the abbrs. Old value is offset into concatenated
469 // raw abbr strings. New value is index into vector of
470 // strings. E.g., 0,5,10,14 => 0,1,2,3.
472 // Keep track of which abbreviations get used.
473 vector
<bool> abbrseen(abbroffset
.size(), false);
475 for (vector
<ZoneType
>::iterator it
=info
.types
.begin();
476 it
!=info
.types
.end();
478 vector
<int32_t>::const_iterator x
=
479 find(abbroffset
.begin(), abbroffset
.end(), it
->abbr
);
480 if (x
==abbroffset
.end()) {
481 // TODO: Modify code to add a new string to the end of
482 // the abbr list when a middle offset is given, e.g.,
483 // "abc*def*" where * == '\0', take offset of 1 and
484 // make the array "abc", "def", "bc", and translate 1
485 // => 2. NOT CRITICAL since we don't even use the
486 // abbr at this time.
488 // TODO: Re-enable this warning if we start using
489 // the Olson abbr data, or if the above TODO is completed.
491 os
<< "Warning: unusual abbr offset " << it
->abbr
492 << ", expected one of";
493 for (vector
<int32_t>::const_iterator y
=abbroffset
.begin();
494 y
!=abbroffset
.end(); ++y
) {
497 cerr
<< os
.str() << "; using 0" << endl
;
501 int32_t index
= x
- abbroffset
.begin();
503 abbrseen
[index
] = true;
507 for (int32_t ii
=0;ii
<(int32_t) abbrseen
.size();++ii
) {
509 cerr
<< "Warning: unused abbreviation: " << ii
<< endl
;
514 // Read leap second info, if any.
515 // *** We discard leap second data. ***
516 for (i
=0; i
<leapcnt
; ++i
) {
517 readcoded(file
); // transition time
518 readcoded(file
); // total correction after above
522 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isstd
= readbool(file
);
525 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isgmt
= readbool(file
);
528 //--------------------------------------------------------------------
529 // Directory and file reading
530 //--------------------------------------------------------------------
533 * Process a single zoneinfo file, adding the data to ZONEINFO
534 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"
535 * @param id the zone ID, e.g., "America/Los_Angeles"
537 void handleFile(string path
, string id
) {
538 // Check for duplicate id
539 if (ZONEINFO
.find(id
) != ZONEINFO
.end()) {
541 os
<< "duplicate zone ID: " << id
;
542 throw invalid_argument(os
.str());
545 ifstream
file(path
.c_str(), ios::in
| ios::binary
);
547 throw invalid_argument("can't open file");
550 // eat 32bit data part
552 readzoneinfo(file
, info
, false);
556 throw invalid_argument("read error");
559 // we only use 64bit part
561 readzoneinfo(file
, info64
, true);
563 bool alldone
= false;
564 int64_t eofPos
= (int64_t) file
.tellg();
566 // '\n' + <envvar string> + '\n' after the 64bit version data
567 char ch
= file
.get();
569 bool invalidchar
= false;
570 while (file
.get(ch
)) {
575 // must be printable ascii
581 eofPos
= (int64_t) file
.tellg();
582 file
.seekg(0, ios::end
);
583 eofPos
= eofPos
- (int64_t) file
.tellg();
591 os
<< (-eofPos
) << " unprocessed bytes at end";
592 throw invalid_argument(os
.str());
595 ZONEINFO
[id
] = info64
;
599 * Recursively scan the given directory, calling handleFile() for each
600 * file in the tree. The user should call with the root directory and
601 * a prefix of "". The function will call itself with non-empty
606 void scandir(string dirname
, string prefix
="") {
608 WIN32_FIND_DATA FileData
;
610 // Get the first file
611 hList
= FindFirstFile((dirname
+ "\\*").c_str(), &FileData
);
612 if (hList
== INVALID_HANDLE_VALUE
) {
613 cerr
<< "Error: Invalid directory: " << dirname
<< endl
;
617 string
name(FileData
.cFileName
);
618 string
path(dirname
+ "\\" + name
);
619 if (FileData
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
) {
620 if (name
!= "." && name
!= "..") {
621 scandir(path
, prefix
+ name
+ "/");
625 string id
= prefix
+ name
;
626 handleFile(path
, id
);
627 } catch (const exception
& e
) {
628 cerr
<< "Error: While processing \"" << path
<< "\", "
634 if (!FindNextFile(hList
, &FileData
)) {
635 if (GetLastError() == ERROR_NO_MORE_FILES
) {
645 void scandir(string dir
, string prefix
="") {
647 struct dirent
*dir_entry
;
648 struct stat stat_info
;
650 vector
<string
> subdirs
;
651 vector
<string
> subfiles
;
653 if ((dp
= opendir(dir
.c_str())) == NULL
) {
654 cerr
<< "Error: Invalid directory: " << dir
<< endl
;
657 if (!getcwd(pwd
, sizeof(pwd
))) {
658 cerr
<< "Error: Directory name too long" << endl
;
662 while ((dir_entry
= readdir(dp
)) != NULL
) {
663 string name
= dir_entry
->d_name
;
664 string path
= dir
+ "/" + name
;
665 lstat(dir_entry
->d_name
,&stat_info
);
666 if (S_ISDIR(stat_info
.st_mode
)) {
667 if (name
!= "." && name
!= "..") {
668 subdirs
.push_back(path
);
669 subdirs
.push_back(prefix
+ name
+ "/");
670 // scandir(path, prefix + name + "/");
674 string id
= prefix
+ name
;
675 subfiles
.push_back(path
);
676 subfiles
.push_back(id
);
677 // handleFile(path, id);
678 } catch (const exception
& e
) {
679 cerr
<< "Error: While processing \"" << path
<< "\", "
688 for(int32_t i
=0;i
<(int32_t)subfiles
.size();i
+=2) {
690 handleFile(subfiles
[i
], subfiles
[i
+1]);
691 } catch (const exception
& e
) {
692 cerr
<< "Error: While processing \"" << subfiles
[i
] << "\", "
697 for(int32_t i
=0;i
<(int32_t)subdirs
.size();i
+=2) {
698 scandir(subdirs
[i
], subdirs
[i
+1]);
704 //--------------------------------------------------------------------
705 // Final zone and rule info
706 //--------------------------------------------------------------------
709 * Read and discard the current line.
711 void consumeLine(istream
& in
) {
715 } while (c
!= EOF
&& c
!= '\n');
724 const char* TIME_MODE
[] = {"w", "s", "u"};
726 // Allow 29 days in February because zic outputs February 29
727 // for rules like "last Sunday in February".
728 const int32_t MONTH_LEN
[] = {31,29,31,30,31,30,31,31,30,31,30,31};
730 const int32_t HOUR
= 3600;
733 int32_t offset
; // raw offset
734 int32_t year
; // takes effect for y >= year
737 FinalZone(int32_t _offset
, int32_t _year
, const string
& _ruleid
) :
738 offset(_offset
), year(_year
), ruleid(_ruleid
) {
739 if (offset
<= -16*HOUR
|| offset
>= 16*HOUR
) {
741 os
<< "Invalid input offset " << offset
742 << " for year " << year
743 << " and rule ID " << ruleid
;
744 throw invalid_argument(os
.str());
748 os
<< "Invalid input year " << year
749 << " with offset " << offset
750 << " and rule ID " << ruleid
;
751 throw invalid_argument(os
.str());
754 FinalZone() : offset(-1), year(-1) {}
755 void addLink(const string
& alias
) {
756 if (aliases
.find(alias
) != aliases
.end()) {
758 os
<< "Duplicate alias " << alias
;
759 throw invalid_argument(os
.str());
761 aliases
.insert(alias
);
765 struct FinalRulePart
{
771 int32_t offset
; // dst offset, usually either 0 or 1:00
773 // Isstd and isgmt only have 3 valid states, corresponding to local
774 // wall time, local standard time, and GMT standard time.
775 // Here is how the isstd & isgmt flags are set by zic:
776 //| case 's': /* Standard */
777 //| rp->r_todisstd = TRUE;
778 //| rp->r_todisgmt = FALSE;
779 //| case 'w': /* Wall */
780 //| rp->r_todisstd = FALSE;
781 //| rp->r_todisgmt = FALSE;
782 //| case 'g': /* Greenwich */
783 //| case 'u': /* Universal */
784 //| case 'z': /* Zulu */
785 //| rp->r_todisstd = TRUE;
786 //| rp->r_todisgmt = TRUE;
790 bool isset
; // used during building; later ignored
792 FinalRulePart() : isset(false) {}
793 void set(const string
& id
,
803 throw invalid_argument("FinalRulePart set twice");
806 if (_mode
== "DOWLEQ") {
808 } else if (_mode
== "DOWGEQ") {
810 } else if (_mode
== "DOM") {
813 throw invalid_argument("Unrecognized FinalRulePart mode");
824 if (month
< 0 || month
>= 12) {
825 os
<< "Invalid input month " << month
;
827 if (dom
< 1 || dom
> MONTH_LEN
[month
]) {
828 os
<< "Invalid input day of month " << dom
;
830 if (mode
!= DOM
&& (dow
< 0 || dow
>= 7)) {
831 os
<< "Invalid input day of week " << dow
;
833 if (offset
< (-1 * HOUR
) || offset
> (2 * HOUR
)) {
834 os
<< "Invalid input offset " << offset
;
836 if (isgmt
&& !isstd
) {
837 os
<< "Invalid input isgmt && !isstd";
839 if (!os
.str().empty()) {
843 << month
<< dom
<< dow
<< time
846 throw invalid_argument(os
.str());
851 * Return the time mode as an ICU SimpleTimeZone int from 0..2;
854 int32_t timemode() const {
857 return 2; // gmt standard
860 return 1; // local standard
862 return 0; // local wall
865 // The SimpleTimeZone encoding method for rules is as follows:
868 // DOWGEQ: dom -(dow+1)
869 // DOWLEQ: -dom -(dow+1)
870 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2
871 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2
872 // to encode 7, use stz_dowim=7, stz_dow=0
873 // Note that for this program and for SimpleTimeZone, 0==Jan,
874 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.
877 * Return a "dowim" param suitable for SimpleTimeZone.
879 int32_t stz_dowim() const {
880 return (mode
== DOWLEQ
) ? -dom
: dom
;
884 * Return a "dow" param suitable for SimpleTimeZone.
886 int32_t stz_dow() const {
887 return (mode
== DOM
) ? 0 : -(dow
+1);
892 FinalRulePart part
[2];
895 return part
[0].isset
&& part
[1].isset
;
898 void print(ostream
& os
) const;
901 map
<string
,FinalZone
> finalZones
;
902 map
<string
,FinalRule
> finalRules
;
904 map
<string
, set
<string
> > links
;
905 map
<string
, string
> reverseLinks
;
908 * Predicate used to find FinalRule objects that do not have both
909 * sub-parts set (indicating an error in the input file).
911 bool isNotSet(const pair
<const string
,FinalRule
>& p
) {
912 return !p
.second
.isset();
916 * Predicate used to find FinalZone objects that do not map to a known
917 * rule (indicating an error in the input file).
919 bool mapsToUnknownRule(const pair
<const string
,FinalZone
>& p
) {
920 return finalRules
.find(p
.second
.ruleid
) == finalRules
.end();
924 * This set is used to make sure each rule in finalRules is used at
925 * least once. First we populate it with all the rules from
926 * finalRules; then we remove all the rules referred to in
929 set
<string
> ruleIDset
;
931 void insertRuleID(const pair
<string
,FinalRule
>& p
) {
932 ruleIDset
.insert(p
.first
);
935 void eraseRuleID(const pair
<string
,FinalZone
>& p
) {
936 ruleIDset
.erase(p
.second
.ruleid
);
940 * Populate finalZones and finalRules from the given istream.
942 void readFinalZonesAndRules(istream
& in
) {
947 if (in
.eof() || !in
) {
949 } else if (token
== "zone") {
950 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0)
952 int32_t offset
, year
;
953 in
>> id
>> offset
>> year
>> ruleid
;
955 finalZones
[id
] = FinalZone(offset
, year
, ruleid
);
956 } else if (token
== "rule") {
957 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600
958 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0
960 int32_t month
, dom
, dow
, time
, offset
;
962 in
>> id
>> mode
>> month
>> dom
>> dow
>> time
>> isstd
>> isgmt
>> offset
;
964 FinalRule
& fr
= finalRules
[id
];
965 int32_t p
= fr
.part
[0].isset
? 1 : 0;
966 fr
.part
[p
].set(id
, mode
, month
, dom
, dow
, time
, isstd
, isgmt
, offset
);
967 } else if (token
== "link") {
968 string fromid
, toid
; // fromid == "real" zone, toid == alias
969 in
>> fromid
>> toid
;
970 // DO NOT consumeLine(in);
971 if (finalZones
.find(toid
) != finalZones
.end()) {
972 throw invalid_argument("Bad link: `to' id is a \"real\" zone");
975 links
[fromid
].insert(toid
);
976 reverseLinks
[toid
] = fromid
;
977 } else if (token
.length() > 0 && token
[0] == '#') {
980 throw invalid_argument("Unrecognized keyword");
984 if (!in
.eof() && !in
) {
985 throw invalid_argument("Parse failure");
988 // Perform validity check: Each rule should have data for 2 parts.
989 if (count_if(finalRules
.begin(), finalRules
.end(), isNotSet
) != 0) {
990 throw invalid_argument("One or more incomplete rule pairs");
993 // Perform validity check: Each zone should map to a known rule.
994 if (count_if(finalZones
.begin(), finalZones
.end(), mapsToUnknownRule
) != 0) {
995 throw invalid_argument("One or more zones refers to an unknown rule");
998 // Perform validity check: Each rule should be referred to by a zone.
1000 for_each(finalRules
.begin(), finalRules
.end(), insertRuleID
);
1001 for_each(finalZones
.begin(), finalZones
.end(), eraseRuleID
);
1002 if (ruleIDset
.size() != 0) {
1003 throw invalid_argument("Unused rules");
1007 //--------------------------------------------------------------------
1008 // Resource bundle output
1009 //--------------------------------------------------------------------
1011 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT
1013 void ZoneInfo::print(ostream
& os
, const string
& id
) const {
1014 // Implement compressed format #2:
1015 os
<< " /* " << id
<< " */ ";
1018 assert(aliases
.size() == 0);
1019 os
<< ":int { " << aliasTo
<< " } "; // No endl - save room for comment.
1024 os
<< ":table {" << endl
;
1026 os
<< ":array {" << endl
;
1029 vector
<Transition
>::const_iterator trn
;
1030 vector
<ZoneType
>::const_iterator typ
;
1035 trn
= transitions
.begin();
1037 // pre 32bit transitions
1038 if (trn
!= transitions
.end() && trn
->time
< LOWEST_TIME32
) {
1039 os
<< " transPre32:intvector { ";
1040 for (first
= true; trn
!= transitions
.end() && trn
->time
< LOWEST_TIME32
; ++trn
) {
1045 os
<< (int32_t)(trn
->time
>> 32) << ", " << (int32_t)(trn
->time
& 0x00000000ffffffff);
1051 if (trn
!= transitions
.end() && trn
->time
< HIGHEST_TIME32
) {
1052 os
<< " trans:intvector { ";
1053 for (first
= true; trn
!= transitions
.end() && trn
->time
< HIGHEST_TIME32
; ++trn
) {
1063 // post 32bit transitons
1064 if (trn
!= transitions
.end()) {
1065 os
<< " transPost32:intvector { ";
1066 for (first
= true; trn
!= transitions
.end(); ++trn
) {
1071 os
<< (int32_t)(trn
->time
>> 32) << ", " << (int32_t)(trn
->time
& 0x00000000ffffffff);
1076 os
<< " :intvector { ";
1077 for (trn
= transitions
.begin(), first
= true; trn
!= transitions
.end(); ++trn
) {
1078 if (!first
) os
<< ", ";
1088 os
<< " typeOffsets:intvector { ";
1090 os
<< " :intvector { ";
1092 for (typ
= types
.begin(); typ
!= types
.end(); ++typ
) {
1093 if (!first
) os
<< ", ";
1095 os
<< typ
->rawoffset
<< ", " << typ
->dstoffset
;
1100 if (transitions
.size() != 0) {
1101 os
<< " typeMap:bin { \"" << hex
<< setfill('0');
1102 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
1103 os
<< setw(2) << trn
->type
;
1105 os
<< dec
<< "\" }" << endl
;
1108 os
<< " :bin { \"" << hex
<< setfill('0');
1109 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
1110 os
<< setw(2) << trn
->type
;
1112 os
<< dec
<< "\" }" << endl
;
1115 // Final zone info, if any
1116 if (finalYear
!= -1) {
1118 os
<< " finalRule { \"" << finalRuleID
<< "\" }" << endl
;
1119 os
<< " finalRaw:int { " << finalOffset
<< " }" << endl
;
1120 os
<< " finalYear:int { " << finalYear
<< " }" << endl
;
1122 os
<< " \"" << finalRuleID
<< "\"" << endl
;
1123 os
<< " :intvector { " << finalOffset
<< ", "
1124 << finalYear
<< " }" << endl
;
1128 // Alias list, if any
1129 if (aliases
.size() != 0) {
1132 os
<< " links:intvector { ";
1134 os
<< " :intvector { ";
1136 for (set
<int32_t>::const_iterator i
=aliases
.begin(); i
!=aliases
.end(); ++i
) {
1137 if (!first
) os
<< ", ";
1144 os
<< " } "; // no trailing 'endl', so comments can be placed.
1148 operator<<(ostream
& os
, const ZoneMap
& zoneinfo
) {
1150 for (ZoneMapIter it
= zoneinfo
.begin();
1151 it
!= zoneinfo
.end();
1153 if(c
&& !ICU44PLUS
) os
<< ",";
1154 it
->second
.print(os
, it
->first
);
1155 os
<< "//Z#" << c
++ << endl
;
1160 // print the string list
1161 ostream
& printStringList( ostream
& os
, const ZoneMap
& zoneinfo
) {
1162 int32_t n
= 0; // count
1163 int32_t col
= 0; // column
1164 os
<< " Names {" << endl
1166 for (ZoneMapIter it
= zoneinfo
.begin();
1167 it
!= zoneinfo
.end();
1173 const string
& id
= it
->first
;
1174 os
<< "\"" << id
<< "\"";
1175 col
+= id
.length() + 2;
1177 os
<< " // " << n
<< endl
1183 os
<< " // " << (n
-1) << endl
1189 //--------------------------------------------------------------------
1191 //--------------------------------------------------------------------
1193 // Unary predicate for finding transitions after a given time
1194 bool isAfter(const Transition t
, int64_t thresh
) {
1195 return t
.time
>= thresh
;
1199 * A zone type that contains only the raw and dst offset. Used by the
1200 * optimizeTypeList() method.
1202 struct SimplifiedZoneType
{
1205 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}
1206 SimplifiedZoneType(const ZoneType
& t
) : rawoffset(t
.rawoffset
),
1207 dstoffset(t
.dstoffset
) {}
1208 bool operator<(const SimplifiedZoneType
& t
) const {
1209 return rawoffset
< t
.rawoffset
||
1210 (rawoffset
== t
.rawoffset
&&
1211 dstoffset
< t
.dstoffset
);
1216 * Construct a ZoneType from a SimplifiedZoneType. Note that this
1217 * discards information; the new ZoneType will have meaningless
1218 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,
1219 * since ignoring these is how we do optimization (we have no use for
1220 * these in historical transitions).
1222 ZoneType::ZoneType(const SimplifiedZoneType
& t
) :
1223 rawoffset(t
.rawoffset
), dstoffset(t
.dstoffset
),
1224 abbr(-1), isdst(false), isstd(false), isgmt(false) {}
1227 * Optimize the type list to remove excess entries. The type list may
1228 * contain entries that are distinct only in terms of their dst, std,
1229 * or gmt flags. Since we don't care about those flags, we can reduce
1230 * the type list to a set of unique raw/dst offset pairs, and remap
1231 * the type indices in the transition list, which stores, for each
1232 * transition, a transition time and a type index.
1234 void ZoneInfo::optimizeTypeList() {
1235 // Assemble set of unique types; only those in the `transitions'
1236 // list, since there may be unused types in the `types' list
1237 // corresponding to transitions that have been trimmed (during
1238 // merging of final data).
1240 if (aliasTo
>= 0) return; // Nothing to do for aliases
1243 // This is the old logic which has a bug, which occasionally removes
1244 // the type before the first transition. The problem was fixed
1245 // by inserting the dummy transition indirectly.
1247 // If there are zero transitions and one type, then leave that as-is.
1248 if (transitions
.size() == 0) {
1249 if (types
.size() != 1) {
1250 cerr
<< "Error: transition count = 0, type count = " << types
.size() << endl
;
1255 set
<SimplifiedZoneType
> simpleset
;
1256 for (vector
<Transition
>::const_iterator i
=transitions
.begin();
1257 i
!=transitions
.end(); ++i
) {
1258 assert(i
->type
< (int32_t)types
.size());
1259 simpleset
.insert(types
[i
->type
]);
1262 // Map types to integer indices
1263 map
<SimplifiedZoneType
,int32_t> simplemap
;
1265 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin();
1266 i
!=simpleset
.end(); ++i
) {
1267 simplemap
[*i
] = n
++;
1270 // Remap transitions
1271 for (vector
<Transition
>::iterator i
=transitions
.begin();
1272 i
!=transitions
.end(); ++i
) {
1273 assert(i
->type
< (int32_t)types
.size());
1274 ZoneType oldtype
= types
[i
->type
];
1275 SimplifiedZoneType
newtype(oldtype
);
1276 assert(simplemap
.find(newtype
) != simplemap
.end());
1277 i
->type
= simplemap
[newtype
];
1280 // Replace type list
1282 copy(simpleset
.begin(), simpleset
.end(), back_inserter(types
));
1285 if (types
.size() > 1) {
1286 // Note: localtime uses the very first non-dst type as initial offsets.
1287 // If all types are DSTs, the very first type is treated as the initial offsets.
1289 // Decide a type used as the initial offsets. ICU put the type at index 0.
1290 ZoneType initialType
= types
[0];
1291 for (vector
<ZoneType
>::const_iterator i
=types
.begin(); i
!=types
.end(); ++i
) {
1292 if (i
->dstoffset
== 0) {
1298 SimplifiedZoneType
initialSimplifiedType(initialType
);
1300 // create a set of unique types, but ignoring fields which we're not interested in
1301 set
<SimplifiedZoneType
> simpleset
;
1302 simpleset
.insert(initialSimplifiedType
);
1303 for (vector
<Transition
>::const_iterator i
=transitions
.begin(); i
!=transitions
.end(); ++i
) {
1304 assert(i
->type
< (int32_t)types
.size());
1305 simpleset
.insert(types
[i
->type
]);
1308 // Map types to integer indices, however, keeping the first type at offset 0
1309 map
<SimplifiedZoneType
,int32_t> simplemap
;
1310 simplemap
[initialSimplifiedType
] = 0;
1312 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin(); i
!=simpleset
.end(); ++i
) {
1313 if (*i
< initialSimplifiedType
|| initialSimplifiedType
< *i
) {
1314 simplemap
[*i
] = n
++;
1318 // Remap transitions
1319 for (vector
<Transition
>::iterator i
=transitions
.begin();
1320 i
!=transitions
.end(); ++i
) {
1321 assert(i
->type
< (int32_t)types
.size());
1322 ZoneType oldtype
= types
[i
->type
];
1323 SimplifiedZoneType
newtype(oldtype
);
1324 assert(simplemap
.find(newtype
) != simplemap
.end());
1325 i
->type
= simplemap
[newtype
];
1328 // Replace type list
1330 types
.push_back(initialSimplifiedType
);
1331 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin(); i
!=simpleset
.end(); ++i
) {
1332 if (*i
< initialSimplifiedType
|| initialSimplifiedType
< *i
) {
1333 types
.push_back(*i
);
1337 // Reiterating transitions to remove any transitions which
1338 // do not actually change the raw/dst offsets
1339 int32_t prevTypeIdx
= 0;
1340 for (vector
<Transition
>::iterator i
=transitions
.begin(); i
!=transitions
.end();) {
1341 if (i
->type
== prevTypeIdx
) {
1342 // this is not a time transition, probably just name change
1343 // e.g. America/Resolute after 2006 in 2010b
1344 transitions
.erase(i
);
1346 prevTypeIdx
= i
->type
;
1356 * Merge final zone data into this zone.
1358 void ZoneInfo::mergeFinalData(const FinalZone
& fz
) {
1359 int32_t year
= fz
.year
;
1360 int64_t seconds
= yearToSeconds(year
);
1363 if (seconds
> HIGHEST_TIME32
) {
1364 // Avoid transitions beyond signed 32bit max second.
1365 // This may result incorrect offset computation around
1366 // HIGHEST_TIME32. This is a limitation of ICU
1368 seconds
= HIGHEST_TIME32
;
1372 vector
<Transition
>::iterator it
=
1373 find_if(transitions
.begin(), transitions
.end(),
1374 bind2nd(ptr_fun(isAfter
), seconds
));
1375 transitions
.erase(it
, transitions
.end());
1377 if (finalYear
!= -1) {
1378 throw invalid_argument("Final zone already merged in");
1380 finalYear
= fz
.year
;
1381 finalOffset
= fz
.offset
;
1382 finalRuleID
= fz
.ruleid
;
1386 * Merge the data from the given final zone into the core zone data by
1387 * calling the ZoneInfo member function mergeFinalData.
1389 void mergeOne(const string
& zoneid
, const FinalZone
& fz
) {
1390 if (ZONEINFO
.find(zoneid
) == ZONEINFO
.end()) {
1391 throw invalid_argument("Unrecognized final zone ID");
1393 ZONEINFO
[zoneid
].mergeFinalData(fz
);
1397 * Visitor function that merges the final zone data into the main zone
1398 * data structures. It calls mergeOne for each final zone and its
1401 void mergeFinalZone(const pair
<string
,FinalZone
>& p
) {
1402 const string
& id
= p
.first
;
1403 const FinalZone
& fz
= p
.second
;
1409 * Print this rule in resource bundle format to os. ID and enclosing
1410 * braces handled elsewhere.
1412 void FinalRule::print(ostream
& os
) const {
1413 // First print the rule part that enters DST; then the rule part
1415 int32_t whichpart
= (part
[0].offset
!= 0) ? 0 : 1;
1416 assert(part
[whichpart
].offset
!= 0);
1417 assert(part
[1-whichpart
].offset
== 0);
1420 for (int32_t i
=0; i
<2; ++i
) {
1421 const FinalRulePart
& p
= part
[whichpart
];
1422 whichpart
= 1-whichpart
;
1423 os
<< p
.month
<< ", " << p
.stz_dowim() << ", " << p
.stz_dow() << ", "
1424 << p
.time
<< ", " << p
.timemode() << ", ";
1426 os
<< part
[whichpart
].offset
<< endl
;
1429 #define ICU_ZONE_OVERRIDE_SUFFIX "--ICU"
1430 #define ICU_ZONE_OVERRIDE_SUFFIX_LEN 5
1432 int main(int argc
, char *argv
[]) {
1433 string rootpath
, zonetab
, version
;
1434 bool validArgs
= FALSE
;
1436 if (argc
== 4 || argc
== 5) {
1442 if (strcmp(argv
[4], "--old") == 0) {
1444 TZ_RESOURCE_NAME
= ICU_TZ_RESOURCE_OLD
;
1451 cout
<< "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl
1452 << " <dir> path to zoneinfo file tree generated by" << endl
1453 << " ICU-patched version of zic" << endl
1454 << " <cmap> country map, from tzdata archive," << endl
1455 << " typically named \"zone.tab\"" << endl
1456 << " <tzver> version string, such as \"2003e\"" << endl
1457 << " --old generating resource format before ICU4.4" << endl
;
1461 cout
<< "Olson data version: " << version
<< endl
;
1462 cout
<< "ICU 4.4+ format: " << (ICU44PLUS
? "Yes" : "No") << endl
;
1465 ifstream
finals(ICU_ZONE_FILE
);
1467 readFinalZonesAndRules(finals
);
1469 cout
<< "Finished reading " << finalZones
.size()
1470 << " final zones and " << finalRules
.size()
1471 << " final rules from " ICU_ZONE_FILE
<< endl
;
1473 cerr
<< "Error: Unable to open " ICU_ZONE_FILE
<< endl
;
1476 } catch (const exception
& error
) {
1477 cerr
<< "Error: While reading " ICU_ZONE_FILE
": " << error
.what() << endl
;
1482 // Recursively scan all files below the given path, accumulating
1483 // their data into ZONEINFO. All files must be TZif files. Any
1484 // failure along the way will result in a call to exit(1).
1486 } catch (const exception
& error
) {
1487 cerr
<< "Error: While scanning " << rootpath
<< ": " << error
.what() << endl
;
1491 cout
<< "Finished reading " << ZONEINFO
.size() << " zoneinfo files ["
1492 << (ZONEINFO
.begin())->first
<< ".."
1493 << (--ZONEINFO
.end())->first
<< "]" << endl
;
1495 // Overrides TZ database zones with ICU custom zone definition.
1496 // These ICU zone overrides are defined in icuzones, with suffix --ICU.
1497 // If there is a matching TZ database zone, the zoneinfo is replaced
1498 // with the ICU definition. Then, the zone ID with --ICU suffix
1499 // will be deleted from the final list.
1500 // For example, zoneinfo for Europe/Dublin imported from the TZ database
1501 // will be replaced with the zone definition for Europe/Dublin--ICU
1504 // Collect zone IDs to be modified with ICU definition.
1505 vector
<string
> customZones
;
1506 for (ZoneMapIter i
= ZONEINFO
.begin(); i
!= ZONEINFO
.end(); ++i
) {
1507 const string
& id
= i
->first
;
1508 size_t idx
= id
.rfind(ICU_ZONE_OVERRIDE_SUFFIX
);
1509 if (idx
!= string::npos
&& idx
== id
.length() - ICU_ZONE_OVERRIDE_SUFFIX_LEN
) {
1510 cout
<< "ICU zone override: " << id
<< endl
;
1511 customZones
.push_back(id
.substr(0, idx
));
1516 // BEGIN ICU Custom ZoneInfo Override Handling
1519 // Replace zoneinfo with ICU definition, then remove ICU zone ID with
1520 // the special suffix.
1521 for (vector
<string
>::iterator i
= customZones
.begin(); i
!= customZones
.end(); i
++) {
1522 string
& origId
= *i
;
1523 string custId
= origId
+ ICU_ZONE_OVERRIDE_SUFFIX
;
1525 map
<string
,ZoneInfo
>::iterator origZi
= ZONEINFO
.find(origId
);
1526 map
<string
,ZoneInfo
>::iterator custZi
= ZONEINFO
.find(custId
);
1527 if (origZi
!= ZONEINFO
.end() && custZi
!= ZONEINFO
.end()) {
1528 // replace original zone info with custom override,
1529 // then delete one custom ID
1530 cout
<< "Replacing ZoneInfo " << origId
<< " with " << custId
<< endl
;
1531 origZi
->second
= custZi
->second
;
1532 ZONEINFO
.erase(custZi
);
1535 // Also replace final rule
1536 map
<string
,FinalZone
>::iterator origFz
= finalZones
.find(origId
);
1537 map
<string
,FinalZone
>::iterator custFz
= finalZones
.find(custId
);
1538 if (origFz
!= finalZones
.end() && custFz
!= finalZones
.end()) {
1539 // replace original final zone with custom override,
1540 // then delete one for custom ID
1541 cout
<< "Replacing FinalZone for " << origId
<< " with " << custId
<< endl
;
1542 origFz
->second
= custFz
->second
;
1543 finalZones
.erase(custFz
);
1547 // Also remove aliases for ICU custom zoneinfo overrides.
1548 for (map
<string
,set
<string
>>::const_iterator i
= links
.begin(); i
!= links
.end(); ) {
1549 const string
& id
= i
->first
;
1550 size_t idx
= id
.rfind(ICU_ZONE_OVERRIDE_SUFFIX
);
1551 if (idx
!= string::npos
&& idx
== id
.length() - ICU_ZONE_OVERRIDE_SUFFIX_LEN
) {
1552 const set
<string
>& aliases
= i
->second
;
1553 // Also remove all revserse links
1554 for (set
<string
>::const_iterator j
= aliases
.begin(); j
!= aliases
.end(); j
++) {
1555 const string
& alias
= *j
;
1556 cout
<< "Removing alias " << alias
<< endl
;
1557 reverseLinks
.erase(alias
);
1568 // END ICU Custom ZoneInfo Override Handling
1572 for_each(finalZones
.begin(), finalZones
.end(), mergeFinalZone
);
1573 } catch (const exception
& error
) {
1574 cerr
<< "Error: While merging final zone data: " << error
.what() << endl
;
1578 // Process links (including ICU aliases). For each link set we have
1579 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more
1580 // aliases (e.g., PST, PST8PDT, ...).
1582 // 1. Add all aliases as zone objects in ZONEINFO
1583 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1584 i
!=links
.end(); ++i
) {
1585 const string
& olson
= i
->first
;
1586 const set
<string
>& aliases
= i
->second
;
1587 if (ZONEINFO
.find(olson
) == ZONEINFO
.end()) {
1588 cerr
<< "Error: Invalid 'Link' to non-existent \""
1589 << olson
<< "\"" << endl
;
1592 for (set
<string
>::const_iterator j
=aliases
.begin();
1593 j
!=aliases
.end(); ++j
) {
1594 ZONEINFO
[*j
] = ZoneInfo();
1598 // 2. Create a mapping from zones to index numbers 0..n-1.
1599 map
<string
,int32_t> zoneIDs
;
1600 vector
<string
> zoneIDlist
;
1602 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1603 zoneIDs
[i
->first
] = z
++;
1604 zoneIDlist
.push_back(i
->first
);
1606 assert(z
== (int32_t) ZONEINFO
.size());
1608 // 3. Merge aliases. Sometimes aliases link to other aliases; we
1609 // resolve these into simplest possible sets.
1610 map
<string
,set
<string
> > links2
;
1611 map
<string
,string
> reverse2
;
1612 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1613 i
!=links
.end(); ++i
) {
1614 string olson
= i
->first
;
1615 while (reverseLinks
.find(olson
) != reverseLinks
.end()) {
1616 olson
= reverseLinks
[olson
];
1618 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1619 links2
[olson
].insert(*j
);
1620 reverse2
[*j
] = olson
;
1624 reverseLinks
= reverse2
;
1626 if (false) { // Debugging: Emit link map
1627 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1628 i
!=links
.end(); ++i
) {
1629 cout
<< i
->first
<< ": ";
1630 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1637 // 4. Update aliases
1638 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1639 i
!=links
.end(); ++i
) {
1640 const string
& olson
= i
->first
;
1641 const set
<string
>& aliases
= i
->second
;
1642 ZONEINFO
[olson
].clearAliases();
1643 ZONEINFO
[olson
].addAlias(zoneIDs
[olson
]);
1644 for (set
<string
>::const_iterator j
=aliases
.begin();
1645 j
!=aliases
.end(); ++j
) {
1646 assert(zoneIDs
.find(olson
) != zoneIDs
.end());
1647 assert(zoneIDs
.find(*j
) != zoneIDs
.end());
1648 assert(ZONEINFO
.find(*j
) != ZONEINFO
.end());
1649 ZONEINFO
[*j
].setAliasTo(zoneIDs
[olson
]);
1650 ZONEINFO
[olson
].addAlias(zoneIDs
[*j
]);
1654 // Once merging of final data is complete, we can optimize the type list
1655 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1656 i
->second
.optimizeTypeList();
1659 // Create the country map
1660 map
<string
, string
> icuRegions
; // ICU's custom zone -> country override
1661 map
<string
, set
<string
> > countryMap
; // country -> set of zones
1662 map
<string
, string
> reverseCountryMap
; // zone -> country
1665 // Read icuregions file to collect ICU's own zone-region mapping data.
1666 ifstream
frg(ICU_REGIONS
);
1669 while (getline(frg
, line
)) {
1670 if (line
[0] == '#') continue;
1672 string zone
, country
;
1673 istringstream
is(line
);
1674 is
>> zone
>> country
;
1675 if (zone
.size() == 0) continue;
1676 if (country
.size() < 2) {
1677 cerr
<< "Error: Can't parse " << line
<< " in " << ICU_REGIONS
<< endl
;
1680 icuRegions
[zone
] = country
;
1683 cout
<< "No custom region map [icuregions]" << endl
;
1685 } catch (const exception
& error
) {
1686 cerr
<< "Error: While reading " << ICU_REGIONS
<< ": " << error
.what() << endl
;
1691 ifstream
f(zonetab
.c_str());
1693 cerr
<< "Error: Unable to open " << zonetab
<< endl
;
1698 while (getline(f
, line
)) {
1699 string::size_type lb
= line
.find('#');
1700 if (lb
!= string::npos
) {
1701 line
.resize(lb
); // trim comments
1703 string country
, coord
, zone
;
1704 istringstream
is(line
);
1705 is
>> country
>> coord
>> zone
;
1706 if (country
.size() == 0) continue;
1707 if (country
.size() != 2 || zone
.size() < 1) {
1708 cerr
<< "Error: Can't parse " << line
<< " in " << zonetab
<< endl
;
1711 if (ZONEINFO
.find(zone
) == ZONEINFO
.end()) {
1712 cerr
<< "Error: Country maps to invalid zone " << zone
1713 << " in " << zonetab
<< endl
;
1716 if (icuRegions
.find(zone
) != icuRegions
.end()) {
1718 string customCountry
= icuRegions
[zone
];
1719 cout
<< "Region Mapping: custom override for " << zone
1720 << " " << country
<< " -> " << customCountry
<< endl
;
1721 country
= customCountry
;
1723 countryMap
[country
].insert(zone
);
1724 reverseCountryMap
[zone
] = country
;
1725 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;
1728 cout
<< "Finished reading " << n
1729 << " country entries from " << zonetab
<< endl
;
1730 } catch (const exception
& error
) {
1731 cerr
<< "Error: While reading " << zonetab
<< ": " << error
.what() << endl
;
1735 // Merge ICU's own zone-region mapping data
1736 for (map
<string
,string
>::const_iterator i
= icuRegions
.begin();
1737 i
!= icuRegions
.end(); ++i
) {
1738 const string
& zid(i
->first
);
1739 if (reverseCountryMap
.find(zid
) != reverseCountryMap
.end()) {
1742 cout
<< "Region Mapping: custom data zone=" << zid
1743 << ", region=" << i
->second
<< endl
;
1745 reverseCountryMap
[zid
] = i
->second
;
1746 countryMap
[i
->second
].insert(zid
);
1749 // Merge ICU aliases into country map. Don't merge any alias
1750 // that already has a country map, since that doesn't make sense.
1751 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we
1752 // should cross-map the countries between these two zones.
1753 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1754 i
!=links
.end(); ++i
) {
1755 const string
& olson(i
->first
);
1756 if (reverseCountryMap
.find(olson
) == reverseCountryMap
.end()) {
1759 string c
= reverseCountryMap
[olson
];
1760 const set
<string
>& aliases(i
->second
);
1761 for (set
<string
>::const_iterator j
=aliases
.begin();
1762 j
!= aliases
.end(); ++j
) {
1763 if (reverseCountryMap
.find(*j
) == reverseCountryMap
.end()) {
1764 countryMap
[c
].insert(*j
);
1765 reverseCountryMap
[*j
] = c
;
1766 //cerr << "Aliased country: " << c << " <=> " << *j << endl;
1771 // Create a pseudo-country containing all zones belonging to no country
1772 set
<string
> nocountry
;
1773 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1774 if (reverseCountryMap
.find(i
->first
) == reverseCountryMap
.end()) {
1775 nocountry
.insert(i
->first
);
1778 countryMap
[""] = nocountry
;
1780 // Get local time & year for below
1783 struct tm
* now
= localtime(&sec
);
1785 string filename
= TZ_RESOURCE_NAME
+ ".txt";
1786 // Write out a resource-bundle source file containing data for
1788 ofstream
file(filename
.c_str());
1790 file
<< "//---------------------------------------------------------" << endl
1791 << "// Copyright (C) 2016 and later: Unicode, Inc. and others." << endl
1792 << "// License & terms of use: http://www.unicode.org/copyright.html#License" << endl
1793 << "//---------------------------------------------------------" << endl
1794 << "// Build tool: tz2icu" << endl
1795 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1796 << "// tz database: ftp://ftp.iana.org/tz/" << endl
1797 << "// tz version: " << version
<< endl
1798 << "// ICU version: " << U_ICU_VERSION
<< endl
1799 << "//---------------------------------------------------------" << endl
1800 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1801 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1802 << "//---------------------------------------------------------" << endl
1804 << TZ_RESOURCE_NAME
<< ":table(nofallback) {" << endl
1805 << " TZVersion { \"" << version
<< "\" }" << endl
1806 << " Zones:array { " << endl
1807 << ZONEINFO
// Zones (the actual data)
1810 // Names correspond to the Zones list, used for binary searching.
1811 printStringList ( file
, ZONEINFO
); // print the Names list
1813 // Final Rules are used if requested by the zone
1814 file
<< " Rules { " << endl
;
1817 for(map
<string
,FinalRule
>::iterator i
=finalRules
.begin();
1818 i
!=finalRules
.end(); ++i
) {
1819 const string
& id
= i
->first
;
1820 const FinalRule
& r
= i
->second
;
1821 file
<< " " << id
<< ":intvector {" << endl
;
1823 file
<< " } //_#" << frc
++ << endl
;
1825 file
<< " }" << endl
;
1827 // Emit country (region) map.
1829 file
<< " Regions:array {" << endl
;
1831 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1832 map
<string
, string
>::iterator cit
= reverseCountryMap
.find(i
->first
);
1833 if (cit
== reverseCountryMap
.end()) {
1834 file
<< " \"001\",";
1836 file
<< " \"" << cit
->second
<< "\", ";
1838 file
<< "//Z#" << zn
++ << " " << i
->first
<< endl
;
1840 file
<< " }" << endl
;
1842 file
<< " Regions { " << endl
;
1844 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1845 i
!= countryMap
.end(); ++i
) {
1846 string country
= i
->first
;
1847 const set
<string
>& zones(i
->second
);
1852 file
<< country
<< ":intvector { ";
1854 for (set
<string
>::const_iterator j
=zones
.begin();
1855 j
!= zones
.end(); ++j
) {
1856 if (!first
) file
<< ", ";
1858 if (zoneIDs
.find(*j
) == zoneIDs
.end()) {
1859 cerr
<< "Error: Nonexistent zone in country map: " << *j
<< endl
;
1862 file
<< zoneIDs
[*j
]; // emit the zone's index number
1864 file
<< " } //R#" << rc
++ << endl
;
1866 file
<< " }" << endl
;
1869 file
<< "}" << endl
;
1874 if (file
) { // recheck error bit
1875 cout
<< "Finished writing " << TZ_RESOURCE_NAME
<< ".txt" << endl
;
1877 cerr
<< "Error: Unable to open/write to " << TZ_RESOURCE_NAME
<< ".txt" << endl
;