3 **********************************************************************
4 * Copyright (c) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 **********************************************************************
8 * Created: July 10 2003
10 **********************************************************************
12 #include "tzfile.h" // from Olson tzcode archive, copied to this dir
17 #undef min // windows.h/STL conflict
18 #undef max // windows.h/STL conflict
19 // "identifier was truncated to 'number' characters" warning
20 #pragma warning(disable: 4786)
49 #include "unicode/uversion.h"
53 bool ICU44PLUS
= TRUE
;
54 string TZ_RESOURCE_NAME
= ICU_TZ_RESOURCE
;
56 //--------------------------------------------------------------------
58 //--------------------------------------------------------------------
60 const int64_t SECS_PER_YEAR
= 31536000; // 365 days
61 const int64_t SECS_PER_LEAP_YEAR
= 31622400; // 366 days
62 const int64_t LOWEST_TIME32
= (int64_t)((int32_t)0x80000000);
63 const int64_t HIGHEST_TIME32
= (int64_t)((int32_t)0x7fffffff);
65 bool isLeap(int32_t y
) {
66 return (y%4
== 0) && ((y%100
!= 0) || (y%400
== 0)); // Gregorian
69 int64_t secsPerYear(int32_t y
) {
70 return isLeap(y
) ? SECS_PER_LEAP_YEAR
: SECS_PER_YEAR
;
74 * Given a calendar year, return the GMT epoch seconds for midnight
75 * GMT of January 1 of that year. yearToSeconds(1970) == 0.
77 int64_t yearToSeconds(int32_t year
) {
78 // inefficient but foolproof
82 s
+= secsPerYear(y
++);
85 s
-= secsPerYear(--y
);
91 * Given 1970 GMT epoch seconds, return the calendar year containing
92 * that time. secondsToYear(0) == 1970.
94 int32_t secondsToYear(int64_t seconds
) {
95 // inefficient but foolproof
100 s
+= secsPerYear(y
++);
101 if (s
> seconds
) break;
106 s
-= secsPerYear(--y
);
107 if (s
<= seconds
) break;
113 //--------------------------------------------------------------------
115 //--------------------------------------------------------------------
119 struct SimplifiedZoneType
;
121 // A transition from one ZoneType to another
122 // Minimal size = 5 bytes (4+1)
124 int64_t time
; // seconds, 1970 epoch
125 int32_t type
; // index into 'ZoneInfo.types' 0..255
126 Transition(int64_t _time
, int32_t _type
) {
132 // A behavior mode (what zic calls a 'type') of a time zone.
133 // Minimal size = 6 bytes (4+1+3bits)
134 // SEE: SimplifiedZoneType
136 int64_t rawoffset
; // raw seconds offset from GMT
137 int64_t dstoffset
; // dst seconds offset from GMT
139 // We don't really need any of the following, but they are
140 // retained for possible future use. See SimplifiedZoneType.
141 int32_t abbr
; // index into ZoneInfo.abbrs 0..n-1
146 ZoneType(const SimplifiedZoneType
&); // used by optimizeTypeList
148 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}
150 // A restricted equality, of just the raw and dst offset
151 bool matches(const ZoneType
& other
) {
152 return rawoffset
== other
.rawoffset
&&
153 dstoffset
== other
.dstoffset
;
157 // A collection of transitions from one ZoneType to another, together
158 // with a list of the ZoneTypes. A ZoneInfo object may have a long
159 // list of transitions between a smaller list of ZoneTypes.
161 // This object represents the contents of a single zic-created
164 vector
<Transition
> transitions
;
165 vector
<ZoneType
> types
;
166 vector
<string
> abbrs
;
170 int32_t finalYear
; // -1 if none
172 // If this is an alias, then all other fields are meaningless, and
173 // this field will point to the "real" zone 0..n-1.
174 int32_t aliasTo
; // -1 if this is a "real" zone
176 // If there are aliases TO this zone, then the following set will
177 // contain their index numbers (each index >= 0).
178 set
<int32_t> aliases
;
180 ZoneInfo() : finalYear(-1), aliasTo(-1) {}
182 void mergeFinalData(const FinalZone
& fz
);
184 void optimizeTypeList();
186 // Set this zone to be an alias TO another zone.
187 void setAliasTo(int32_t index
);
189 // Clear the list of aliases OF this zone.
192 // Add an alias to the list of aliases OF this zone.
193 void addAlias(int32_t index
);
195 // Is this an alias to another zone?
196 bool isAlias() const {
200 // Retrieve alias list
201 const set
<int32_t>& getAliases() const {
205 void print(ostream
& os
, const string
& id
) const;
208 void ZoneInfo::clearAliases() {
213 void ZoneInfo::addAlias(int32_t index
) {
214 assert(aliasTo
< 0 && index
>= 0 && aliases
.find(index
) == aliases
.end());
215 aliases
.insert(index
);
218 void ZoneInfo::setAliasTo(int32_t index
) {
220 assert(aliases
.size() == 0);
224 typedef map
<string
, ZoneInfo
> ZoneMap
;
226 typedef ZoneMap::const_iterator ZoneMapIter
;
228 //--------------------------------------------------------------------
230 //--------------------------------------------------------------------
232 // Global map holding all our ZoneInfo objects, indexed by id.
235 //--------------------------------------------------------------------
236 // zoneinfo file parsing
237 //--------------------------------------------------------------------
239 // Read zic-coded 32-bit integer from file
240 int64_t readcoded(ifstream
& file
, int64_t minv
=numeric_limits
<int64_t>::min(),
241 int64_t maxv
=numeric_limits
<int64_t>::max()) {
242 unsigned char buf
[4]; // must be UNSIGNED
244 file
.read((char*)buf
, 4);
245 for(int32_t i
=0,shift
=24;i
<4;++i
,shift
-=8) {
246 val
|= buf
[i
] << shift
;
248 if (val
< minv
|| val
> maxv
) {
250 os
<< "coded value out-of-range: " << val
<< ", expected ["
251 << minv
<< ", " << maxv
<< "]";
252 throw out_of_range(os
.str());
257 // Read zic-coded 64-bit integer from file
258 int64_t readcoded64(ifstream
& file
, int64_t minv
=numeric_limits
<int64_t>::min(),
259 int64_t maxv
=numeric_limits
<int64_t>::max()) {
260 unsigned char buf
[8]; // must be UNSIGNED
262 file
.read((char*)buf
, 8);
263 for(int32_t i
=0,shift
=56;i
<8;++i
,shift
-=8) {
264 val
|= (int64_t)buf
[i
] << shift
;
266 if (val
< minv
|| val
> maxv
) {
268 os
<< "coded value out-of-range: " << val
<< ", expected ["
269 << minv
<< ", " << maxv
<< "]";
270 throw out_of_range(os
.str());
275 // Read a boolean value
276 bool readbool(ifstream
& file
) {
281 os
<< "boolean value out-of-range: " << (int32_t)c
;
282 throw out_of_range(os
.str());
288 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
289 * @param file an already-open file stream
291 void readzoneinfo(ifstream
& file
, ZoneInfo
& info
, bool is64bitData
) {
294 // Check for TZ_ICU_MAGIC signature at file start. If we get a
295 // signature mismatch, it means we're trying to read a file which
296 // isn't a ICU-modified-zic-created zoneinfo file. Typically this
297 // means the user is passing in a "normal" zoneinfo directory, or
298 // a zoneinfo directory that is polluted with other files, or that
299 // the user passed in the wrong directory.
302 if (strncmp(buf
, TZ_ICU_MAGIC
, 4) != 0) {
303 throw invalid_argument("TZ_ICU_MAGIC signature missing");
305 // skip additional Olson byte version
307 // if '\0', we have just one copy of data, if '2' or '3', there is additional
308 // 64 bit version at the end.
309 if(buf
[0]!=0 && buf
[0]!='2' && buf
[0]!='3') {
310 throw invalid_argument("Bad Olson version info");
313 // Read reserved bytes. The first of these will be a version byte.
315 if (*(ICUZoneinfoVersion
*)&buf
!= TZ_ICU_VERSION
) {
316 throw invalid_argument("File version mismatch");
320 int64_t isgmtcnt
= readcoded(file
, 0);
321 int64_t isdstcnt
= readcoded(file
, 0);
322 int64_t leapcnt
= readcoded(file
, 0);
323 int64_t timecnt
= readcoded(file
, 0);
324 int64_t typecnt
= readcoded(file
, 0);
325 int64_t charcnt
= readcoded(file
, 0);
327 // Confirm sizes that we assume to be equal. These assumptions
328 // are drawn from a reading of the zic source (2003a), so they
329 // should hold unless the zic source changes.
330 if (isgmtcnt
!= typecnt
|| isdstcnt
!= typecnt
) {
331 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt");
334 // Used temporarily to store transition times and types. We need
335 // to do this because the times and types are stored in two
337 vector
<int64_t> transitionTimes(timecnt
, -1); // temporary
338 vector
<int32_t> transitionTypes(timecnt
, -1); // temporary
340 // Read transition times
341 for (i
=0; i
<timecnt
; ++i
) {
343 transitionTimes
[i
] = readcoded64(file
);
345 transitionTimes
[i
] = readcoded(file
);
349 // Read transition types
350 for (i
=0; i
<timecnt
; ++i
) {
352 file
.read((char*) &c
, 1);
353 int32_t t
= (int32_t) c
;
354 if (t
< 0 || t
>= typecnt
) {
356 os
<< "illegal type: " << t
<< ", expected [0, " << (typecnt
-1) << "]";
357 throw out_of_range(os
.str());
359 transitionTypes
[i
] = t
;
362 // Build transitions vector out of corresponding times and types.
363 bool insertInitial
= false;
364 if (is64bitData
&& !ICU44PLUS
) {
367 for (i
=0; i
<timecnt
; ++i
) {
368 if (transitionTimes
[i
] < LOWEST_TIME32
) {
369 if (minidx
== -1 || transitionTimes
[i
] > transitionTimes
[minidx
]) {
370 // Preserve the latest transition before the 32bit minimum time
373 } else if (transitionTimes
[i
] > HIGHEST_TIME32
) {
374 // Skipping the rest of the transition data. We cannot put such
375 // transitions into zoneinfo.res, because data is limited to singed
376 // 32bit int by the ICU resource bundle.
379 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
384 // If there are any transitions before the 32bit minimum time,
385 // put the type information with the 32bit minimum time
386 vector
<Transition
>::iterator itr
= info
.transitions
.begin();
387 info
.transitions
.insert(itr
, Transition(LOWEST_TIME32
, transitionTypes
[minidx
]));
389 // Otherwise, we need insert the initial type later
390 insertInitial
= true;
394 for (i
=0; i
<timecnt
; ++i
) {
395 info
.transitions
.push_back(Transition(transitionTimes
[i
], transitionTypes
[i
]));
399 // Read types (except for the isdst and isgmt flags, which come later (why??))
400 for (i
=0; i
<typecnt
; ++i
) {
403 type
.rawoffset
= readcoded(file
);
404 type
.dstoffset
= readcoded(file
);
405 type
.isdst
= readbool(file
);
408 file
.read((char*) &c
, 1);
409 type
.abbr
= (int32_t) c
;
411 if (type
.isdst
!= (type
.dstoffset
!= 0)) {
412 throw invalid_argument("isdst does not reflect dstoffset");
415 info
.types
.push_back(type
);
418 assert(info
.types
.size() == (unsigned) typecnt
);
424 int32_t initialTypeIdx
= -1;
426 // Check if the first type is not dst
427 if (info
.types
.at(0).dstoffset
!= 0) {
428 // Initial type's rawoffset is same with the rawoffset after the
429 // first transition, but no DST is observed.
430 int64_t rawoffset0
= (info
.types
.at(info
.transitions
.at(0).type
)).rawoffset
;
431 // Look for matching type
432 for (i
=0; i
<(int32_t)info
.types
.size(); ++i
) {
433 if (info
.types
.at(i
).rawoffset
== rawoffset0
434 && info
.types
.at(i
).dstoffset
== 0) {
442 assert(initialTypeIdx
>= 0);
443 // Add the initial type associated with the lowest int32 time
444 vector
<Transition
>::iterator itr
= info
.transitions
.begin();
445 info
.transitions
.insert(itr
, Transition(LOWEST_TIME32
, initialTypeIdx
));
449 // Read the abbreviation string
451 // All abbreviations are concatenated together, with a 0 at
452 // the end of each abbr.
453 char* str
= new char[charcnt
+ 8];
454 file
.read(str
, charcnt
);
456 // Split abbreviations apart into individual strings. Record
457 // offset of each abbr in a vector.
458 vector
<int32_t> abbroffset
;
459 char *limit
=str
+charcnt
;
460 for (char* p
=str
; p
<limit
; ++p
) {
463 info
.abbrs
.push_back(string(start
, p
-start
));
464 abbroffset
.push_back(start
-str
);
467 // Remap all the abbrs. Old value is offset into concatenated
468 // raw abbr strings. New value is index into vector of
469 // strings. E.g., 0,5,10,14 => 0,1,2,3.
471 // Keep track of which abbreviations get used.
472 vector
<bool> abbrseen(abbroffset
.size(), false);
474 for (vector
<ZoneType
>::iterator it
=info
.types
.begin();
475 it
!=info
.types
.end();
477 vector
<int32_t>::const_iterator x
=
478 find(abbroffset
.begin(), abbroffset
.end(), it
->abbr
);
479 if (x
==abbroffset
.end()) {
480 // TODO: Modify code to add a new string to the end of
481 // the abbr list when a middle offset is given, e.g.,
482 // "abc*def*" where * == '\0', take offset of 1 and
483 // make the array "abc", "def", "bc", and translate 1
484 // => 2. NOT CRITICAL since we don't even use the
485 // abbr at this time.
487 // TODO: Re-enable this warning if we start using
488 // the Olson abbr data, or if the above TODO is completed.
490 os
<< "Warning: unusual abbr offset " << it
->abbr
491 << ", expected one of";
492 for (vector
<int32_t>::const_iterator y
=abbroffset
.begin();
493 y
!=abbroffset
.end(); ++y
) {
496 cerr
<< os
.str() << "; using 0" << endl
;
500 int32_t index
= x
- abbroffset
.begin();
502 abbrseen
[index
] = true;
506 for (int32_t ii
=0;ii
<(int32_t) abbrseen
.size();++ii
) {
508 cerr
<< "Warning: unused abbreviation: " << ii
<< endl
;
513 // Read leap second info, if any.
514 // *** We discard leap second data. ***
515 for (i
=0; i
<leapcnt
; ++i
) {
516 readcoded(file
); // transition time
517 readcoded(file
); // total correction after above
521 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isstd
= readbool(file
);
524 for (i
=0; i
<typecnt
; ++i
) info
.types
[i
].isgmt
= readbool(file
);
527 //--------------------------------------------------------------------
528 // Directory and file reading
529 //--------------------------------------------------------------------
532 * Process a single zoneinfo file, adding the data to ZONEINFO
533 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"
534 * @param id the zone ID, e.g., "America/Los_Angeles"
536 void handleFile(string path
, string id
) {
537 // Check for duplicate id
538 if (ZONEINFO
.find(id
) != ZONEINFO
.end()) {
540 os
<< "duplicate zone ID: " << id
;
541 throw invalid_argument(os
.str());
544 ifstream
file(path
.c_str(), ios::in
| ios::binary
);
546 throw invalid_argument("can't open file");
549 // eat 32bit data part
551 readzoneinfo(file
, info
, false);
555 throw invalid_argument("read error");
558 // we only use 64bit part
560 readzoneinfo(file
, info64
, true);
562 bool alldone
= false;
563 int64_t eofPos
= (int64_t) file
.tellg();
565 // '\n' + <envvar string> + '\n' after the 64bit version data
566 char ch
= file
.get();
568 bool invalidchar
= false;
569 while (file
.get(ch
)) {
574 // must be printable ascii
580 eofPos
= (int64_t) file
.tellg();
581 file
.seekg(0, ios::end
);
582 eofPos
= eofPos
- (int64_t) file
.tellg();
590 os
<< (-eofPos
) << " unprocessed bytes at end";
591 throw invalid_argument(os
.str());
594 ZONEINFO
[id
] = info64
;
598 * Recursively scan the given directory, calling handleFile() for each
599 * file in the tree. The user should call with the root directory and
600 * a prefix of "". The function will call itself with non-empty
605 void scandir(string dirname
, string prefix
="") {
607 WIN32_FIND_DATA FileData
;
609 // Get the first file
610 hList
= FindFirstFile((dirname
+ "\\*").c_str(), &FileData
);
611 if (hList
== INVALID_HANDLE_VALUE
) {
612 cerr
<< "Error: Invalid directory: " << dirname
<< endl
;
616 string
name(FileData
.cFileName
);
617 string
path(dirname
+ "\\" + name
);
618 if (FileData
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
) {
619 if (name
!= "." && name
!= "..") {
620 scandir(path
, prefix
+ name
+ "/");
624 string id
= prefix
+ name
;
625 handleFile(path
, id
);
626 } catch (const exception
& e
) {
627 cerr
<< "Error: While processing \"" << path
<< "\", "
633 if (!FindNextFile(hList
, &FileData
)) {
634 if (GetLastError() == ERROR_NO_MORE_FILES
) {
644 void scandir(string dir
, string prefix
="") {
646 struct dirent
*dir_entry
;
647 struct stat stat_info
;
649 vector
<string
> subdirs
;
650 vector
<string
> subfiles
;
652 if ((dp
= opendir(dir
.c_str())) == NULL
) {
653 cerr
<< "Error: Invalid directory: " << dir
<< endl
;
656 if (!getcwd(pwd
, sizeof(pwd
))) {
657 cerr
<< "Error: Directory name too long" << endl
;
661 while ((dir_entry
= readdir(dp
)) != NULL
) {
662 string name
= dir_entry
->d_name
;
663 string path
= dir
+ "/" + name
;
664 lstat(dir_entry
->d_name
,&stat_info
);
665 if (S_ISDIR(stat_info
.st_mode
)) {
666 if (name
!= "." && name
!= "..") {
667 subdirs
.push_back(path
);
668 subdirs
.push_back(prefix
+ name
+ "/");
669 // scandir(path, prefix + name + "/");
673 string id
= prefix
+ name
;
674 subfiles
.push_back(path
);
675 subfiles
.push_back(id
);
676 // handleFile(path, id);
677 } catch (const exception
& e
) {
678 cerr
<< "Error: While processing \"" << path
<< "\", "
687 for(int32_t i
=0;i
<(int32_t)subfiles
.size();i
+=2) {
689 handleFile(subfiles
[i
], subfiles
[i
+1]);
690 } catch (const exception
& e
) {
691 cerr
<< "Error: While processing \"" << subfiles
[i
] << "\", "
696 for(int32_t i
=0;i
<(int32_t)subdirs
.size();i
+=2) {
697 scandir(subdirs
[i
], subdirs
[i
+1]);
703 //--------------------------------------------------------------------
704 // Final zone and rule info
705 //--------------------------------------------------------------------
708 * Read and discard the current line.
710 void consumeLine(istream
& in
) {
714 } while (c
!= EOF
&& c
!= '\n');
723 const char* TIME_MODE
[] = {"w", "s", "u"};
725 // Allow 29 days in February because zic outputs February 29
726 // for rules like "last Sunday in February".
727 const int32_t MONTH_LEN
[] = {31,29,31,30,31,30,31,31,30,31,30,31};
729 const int32_t HOUR
= 3600;
732 int32_t offset
; // raw offset
733 int32_t year
; // takes effect for y >= year
736 FinalZone(int32_t _offset
, int32_t _year
, const string
& _ruleid
) :
737 offset(_offset
), year(_year
), ruleid(_ruleid
) {
738 if (offset
<= -16*HOUR
|| offset
>= 16*HOUR
) {
740 os
<< "Invalid input offset " << offset
741 << " for year " << year
742 << " and rule ID " << ruleid
;
743 throw invalid_argument(os
.str());
747 os
<< "Invalid input year " << year
748 << " with offset " << offset
749 << " and rule ID " << ruleid
;
750 throw invalid_argument(os
.str());
753 FinalZone() : offset(-1), year(-1) {}
754 void addLink(const string
& alias
) {
755 if (aliases
.find(alias
) != aliases
.end()) {
757 os
<< "Duplicate alias " << alias
;
758 throw invalid_argument(os
.str());
760 aliases
.insert(alias
);
764 struct FinalRulePart
{
770 int32_t offset
; // dst offset, usually either 0 or 1:00
772 // Isstd and isgmt only have 3 valid states, corresponding to local
773 // wall time, local standard time, and GMT standard time.
774 // Here is how the isstd & isgmt flags are set by zic:
775 //| case 's': /* Standard */
776 //| rp->r_todisstd = TRUE;
777 //| rp->r_todisgmt = FALSE;
778 //| case 'w': /* Wall */
779 //| rp->r_todisstd = FALSE;
780 //| rp->r_todisgmt = FALSE;
781 //| case 'g': /* Greenwich */
782 //| case 'u': /* Universal */
783 //| case 'z': /* Zulu */
784 //| rp->r_todisstd = TRUE;
785 //| rp->r_todisgmt = TRUE;
789 bool isset
; // used during building; later ignored
791 FinalRulePart() : isset(false) {}
792 void set(const string
& id
,
802 throw invalid_argument("FinalRulePart set twice");
805 if (_mode
== "DOWLEQ") {
807 } else if (_mode
== "DOWGEQ") {
809 } else if (_mode
== "DOM") {
812 throw invalid_argument("Unrecognized FinalRulePart mode");
823 if (month
< 0 || month
>= 12) {
824 os
<< "Invalid input month " << month
;
826 if (dom
< 1 || dom
> MONTH_LEN
[month
]) {
827 os
<< "Invalid input day of month " << dom
;
829 if (mode
!= DOM
&& (dow
< 0 || dow
>= 7)) {
830 os
<< "Invalid input day of week " << dow
;
832 if (offset
< 0 || offset
> (2 * HOUR
)) {
833 os
<< "Invalid input offset " << offset
;
835 if (isgmt
&& !isstd
) {
836 os
<< "Invalid input isgmt && !isstd";
838 if (!os
.str().empty()) {
842 << month
<< dom
<< dow
<< time
845 throw invalid_argument(os
.str());
850 * Return the time mode as an ICU SimpleTimeZone int from 0..2;
853 int32_t timemode() const {
856 return 2; // gmt standard
859 return 1; // local standard
861 return 0; // local wall
864 // The SimpleTimeZone encoding method for rules is as follows:
867 // DOWGEQ: dom -(dow+1)
868 // DOWLEQ: -dom -(dow+1)
869 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2
870 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2
871 // to encode 7, use stz_dowim=7, stz_dow=0
872 // Note that for this program and for SimpleTimeZone, 0==Jan,
873 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.
876 * Return a "dowim" param suitable for SimpleTimeZone.
878 int32_t stz_dowim() const {
879 return (mode
== DOWLEQ
) ? -dom
: dom
;
883 * Return a "dow" param suitable for SimpleTimeZone.
885 int32_t stz_dow() const {
886 return (mode
== DOM
) ? 0 : -(dow
+1);
891 FinalRulePart part
[2];
894 return part
[0].isset
&& part
[1].isset
;
897 void print(ostream
& os
) const;
900 map
<string
,FinalZone
> finalZones
;
901 map
<string
,FinalRule
> finalRules
;
903 map
<string
, set
<string
> > links
;
904 map
<string
, string
> reverseLinks
;
905 map
<string
, string
> linkSource
; // id => "Olson link" or "ICU alias"
908 * Predicate used to find FinalRule objects that do not have both
909 * sub-parts set (indicating an error in the input file).
911 bool isNotSet(const pair
<const string
,FinalRule
>& p
) {
912 return !p
.second
.isset();
916 * Predicate used to find FinalZone objects that do not map to a known
917 * rule (indicating an error in the input file).
919 bool mapsToUnknownRule(const pair
<const string
,FinalZone
>& p
) {
920 return finalRules
.find(p
.second
.ruleid
) == finalRules
.end();
924 * This set is used to make sure each rule in finalRules is used at
925 * least once. First we populate it with all the rules from
926 * finalRules; then we remove all the rules referred to in
929 set
<string
> ruleIDset
;
931 void insertRuleID(const pair
<string
,FinalRule
>& p
) {
932 ruleIDset
.insert(p
.first
);
935 void eraseRuleID(const pair
<string
,FinalZone
>& p
) {
936 ruleIDset
.erase(p
.second
.ruleid
);
940 * Populate finalZones and finalRules from the given istream.
942 void readFinalZonesAndRules(istream
& in
) {
947 if (in
.eof() || !in
) {
949 } else if (token
== "zone") {
950 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0)
952 int32_t offset
, year
;
953 in
>> id
>> offset
>> year
>> ruleid
;
955 finalZones
[id
] = FinalZone(offset
, year
, ruleid
);
956 } else if (token
== "rule") {
957 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600
958 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0
960 int32_t month
, dom
, dow
, time
, offset
;
962 in
>> id
>> mode
>> month
>> dom
>> dow
>> time
>> isstd
>> isgmt
>> offset
;
964 FinalRule
& fr
= finalRules
[id
];
965 int32_t p
= fr
.part
[0].isset
? 1 : 0;
966 fr
.part
[p
].set(id
, mode
, month
, dom
, dow
, time
, isstd
, isgmt
, offset
);
967 } else if (token
== "link") {
968 string fromid
, toid
; // fromid == "real" zone, toid == alias
969 in
>> fromid
>> toid
;
970 // DO NOT consumeLine(in);
971 if (finalZones
.find(toid
) != finalZones
.end()) {
972 throw invalid_argument("Bad link: `to' id is a \"real\" zone");
975 links
[fromid
].insert(toid
);
976 reverseLinks
[toid
] = fromid
;
978 linkSource
[fromid
] = "Olson link";
979 linkSource
[toid
] = "Olson link";
980 } else if (token
.length() > 0 && token
[0] == '#') {
983 throw invalid_argument("Unrecognized keyword");
987 if (!in
.eof() && !in
) {
988 throw invalid_argument("Parse failure");
991 // Perform validity check: Each rule should have data for 2 parts.
992 if (count_if(finalRules
.begin(), finalRules
.end(), isNotSet
) != 0) {
993 throw invalid_argument("One or more incomplete rule pairs");
996 // Perform validity check: Each zone should map to a known rule.
997 if (count_if(finalZones
.begin(), finalZones
.end(), mapsToUnknownRule
) != 0) {
998 throw invalid_argument("One or more zones refers to an unknown rule");
1001 // Perform validity check: Each rule should be referred to by a zone.
1003 for_each(finalRules
.begin(), finalRules
.end(), insertRuleID
);
1004 for_each(finalZones
.begin(), finalZones
.end(), eraseRuleID
);
1005 if (ruleIDset
.size() != 0) {
1006 throw invalid_argument("Unused rules");
1010 //--------------------------------------------------------------------
1011 // Resource bundle output
1012 //--------------------------------------------------------------------
1014 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT
1016 void ZoneInfo::print(ostream
& os
, const string
& id
) const {
1017 // Implement compressed format #2:
1018 os
<< " /* " << id
<< " */ ";
1021 assert(aliases
.size() == 0);
1022 os
<< ":int { " << aliasTo
<< " } "; // No endl - save room for comment.
1027 os
<< ":table {" << endl
;
1029 os
<< ":array {" << endl
;
1032 vector
<Transition
>::const_iterator trn
;
1033 vector
<ZoneType
>::const_iterator typ
;
1038 trn
= transitions
.begin();
1040 // pre 32bit transitions
1041 if (trn
!= transitions
.end() && trn
->time
< LOWEST_TIME32
) {
1042 os
<< " transPre32:intvector { ";
1043 for (first
= true; trn
!= transitions
.end() && trn
->time
< LOWEST_TIME32
; ++trn
) {
1048 os
<< (int32_t)(trn
->time
>> 32) << ", " << (int32_t)(trn
->time
& 0x00000000ffffffff);
1054 if (trn
!= transitions
.end() && trn
->time
< HIGHEST_TIME32
) {
1055 os
<< " trans:intvector { ";
1056 for (first
= true; trn
!= transitions
.end() && trn
->time
< HIGHEST_TIME32
; ++trn
) {
1066 // post 32bit transitons
1067 if (trn
!= transitions
.end()) {
1068 os
<< " transPost32:intvector { ";
1069 for (first
= true; trn
!= transitions
.end(); ++trn
) {
1074 os
<< (int32_t)(trn
->time
>> 32) << ", " << (int32_t)(trn
->time
& 0x00000000ffffffff);
1079 os
<< " :intvector { ";
1080 for (trn
= transitions
.begin(), first
= true; trn
!= transitions
.end(); ++trn
) {
1081 if (!first
) os
<< ", ";
1091 os
<< " typeOffsets:intvector { ";
1093 os
<< " :intvector { ";
1095 for (typ
= types
.begin(); typ
!= types
.end(); ++typ
) {
1096 if (!first
) os
<< ", ";
1098 os
<< typ
->rawoffset
<< ", " << typ
->dstoffset
;
1103 if (transitions
.size() != 0) {
1104 os
<< " typeMap:bin { \"" << hex
<< setfill('0');
1105 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
1106 os
<< setw(2) << trn
->type
;
1108 os
<< dec
<< "\" }" << endl
;
1111 os
<< " :bin { \"" << hex
<< setfill('0');
1112 for (trn
= transitions
.begin(); trn
!= transitions
.end(); ++trn
) {
1113 os
<< setw(2) << trn
->type
;
1115 os
<< dec
<< "\" }" << endl
;
1118 // Final zone info, if any
1119 if (finalYear
!= -1) {
1121 os
<< " finalRule { \"" << finalRuleID
<< "\" }" << endl
;
1122 os
<< " finalRaw:int { " << finalOffset
<< " }" << endl
;
1123 os
<< " finalYear:int { " << finalYear
<< " }" << endl
;
1125 os
<< " \"" << finalRuleID
<< "\"" << endl
;
1126 os
<< " :intvector { " << finalOffset
<< ", "
1127 << finalYear
<< " }" << endl
;
1131 // Alias list, if any
1132 if (aliases
.size() != 0) {
1135 os
<< " links:intvector { ";
1137 os
<< " :intvector { ";
1139 for (set
<int32_t>::const_iterator i
=aliases
.begin(); i
!=aliases
.end(); ++i
) {
1140 if (!first
) os
<< ", ";
1147 os
<< " } "; // no trailing 'endl', so comments can be placed.
1151 operator<<(ostream
& os
, const ZoneMap
& zoneinfo
) {
1153 for (ZoneMapIter it
= zoneinfo
.begin();
1154 it
!= zoneinfo
.end();
1156 if(c
&& !ICU44PLUS
) os
<< ",";
1157 it
->second
.print(os
, it
->first
);
1158 os
<< "//Z#" << c
++ << endl
;
1163 // print the string list
1164 ostream
& printStringList( ostream
& os
, const ZoneMap
& zoneinfo
) {
1165 int32_t n
= 0; // count
1166 int32_t col
= 0; // column
1167 os
<< " Names {" << endl
1169 for (ZoneMapIter it
= zoneinfo
.begin();
1170 it
!= zoneinfo
.end();
1176 const string
& id
= it
->first
;
1177 os
<< "\"" << id
<< "\"";
1178 col
+= id
.length() + 2;
1180 os
<< " // " << n
<< endl
1186 os
<< " // " << (n
-1) << endl
1192 //--------------------------------------------------------------------
1194 //--------------------------------------------------------------------
1196 // Unary predicate for finding transitions after a given time
1197 bool isAfter(const Transition t
, int64_t thresh
) {
1198 return t
.time
>= thresh
;
1202 * A zone type that contains only the raw and dst offset. Used by the
1203 * optimizeTypeList() method.
1205 struct SimplifiedZoneType
{
1208 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}
1209 SimplifiedZoneType(const ZoneType
& t
) : rawoffset(t
.rawoffset
),
1210 dstoffset(t
.dstoffset
) {}
1211 bool operator<(const SimplifiedZoneType
& t
) const {
1212 return rawoffset
< t
.rawoffset
||
1213 (rawoffset
== t
.rawoffset
&&
1214 dstoffset
< t
.dstoffset
);
1219 * Construct a ZoneType from a SimplifiedZoneType. Note that this
1220 * discards information; the new ZoneType will have meaningless
1221 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,
1222 * since ignoring these is how we do optimization (we have no use for
1223 * these in historical transitions).
1225 ZoneType::ZoneType(const SimplifiedZoneType
& t
) :
1226 rawoffset(t
.rawoffset
), dstoffset(t
.dstoffset
),
1227 abbr(-1), isdst(false), isstd(false), isgmt(false) {}
1230 * Optimize the type list to remove excess entries. The type list may
1231 * contain entries that are distinct only in terms of their dst, std,
1232 * or gmt flags. Since we don't care about those flags, we can reduce
1233 * the type list to a set of unique raw/dst offset pairs, and remap
1234 * the type indices in the transition list, which stores, for each
1235 * transition, a transition time and a type index.
1237 void ZoneInfo::optimizeTypeList() {
1238 // Assemble set of unique types; only those in the `transitions'
1239 // list, since there may be unused types in the `types' list
1240 // corresponding to transitions that have been trimmed (during
1241 // merging of final data).
1243 if (aliasTo
>= 0) return; // Nothing to do for aliases
1246 // This is the old logic which has a bug, which occasionally removes
1247 // the type before the first transition. The problem was fixed
1248 // by inserting the dummy transition indirectly.
1250 // If there are zero transitions and one type, then leave that as-is.
1251 if (transitions
.size() == 0) {
1252 if (types
.size() != 1) {
1253 cerr
<< "Error: transition count = 0, type count = " << types
.size() << endl
;
1258 set
<SimplifiedZoneType
> simpleset
;
1259 for (vector
<Transition
>::const_iterator i
=transitions
.begin();
1260 i
!=transitions
.end(); ++i
) {
1261 assert(i
->type
< (int32_t)types
.size());
1262 simpleset
.insert(types
[i
->type
]);
1265 // Map types to integer indices
1266 map
<SimplifiedZoneType
,int32_t> simplemap
;
1268 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin();
1269 i
!=simpleset
.end(); ++i
) {
1270 simplemap
[*i
] = n
++;
1273 // Remap transitions
1274 for (vector
<Transition
>::iterator i
=transitions
.begin();
1275 i
!=transitions
.end(); ++i
) {
1276 assert(i
->type
< (int32_t)types
.size());
1277 ZoneType oldtype
= types
[i
->type
];
1278 SimplifiedZoneType
newtype(oldtype
);
1279 assert(simplemap
.find(newtype
) != simplemap
.end());
1280 i
->type
= simplemap
[newtype
];
1283 // Replace type list
1285 copy(simpleset
.begin(), simpleset
.end(), back_inserter(types
));
1288 if (types
.size() > 1) {
1289 // Note: localtime uses the very first non-dst type as initial offsets.
1290 // If all types are DSTs, the very first type is treated as the initial offsets.
1292 // Decide a type used as the initial offsets. ICU put the type at index 0.
1293 ZoneType initialType
= types
[0];
1294 for (vector
<ZoneType
>::const_iterator i
=types
.begin(); i
!=types
.end(); ++i
) {
1295 if (i
->dstoffset
== 0) {
1301 SimplifiedZoneType
initialSimplifiedType(initialType
);
1303 // create a set of unique types, but ignoring fields which we're not interested in
1304 set
<SimplifiedZoneType
> simpleset
;
1305 simpleset
.insert(initialSimplifiedType
);
1306 for (vector
<Transition
>::const_iterator i
=transitions
.begin(); i
!=transitions
.end(); ++i
) {
1307 assert(i
->type
< (int32_t)types
.size());
1308 simpleset
.insert(types
[i
->type
]);
1311 // Map types to integer indices, however, keeping the first type at offset 0
1312 map
<SimplifiedZoneType
,int32_t> simplemap
;
1313 simplemap
[initialSimplifiedType
] = 0;
1315 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin(); i
!=simpleset
.end(); ++i
) {
1316 if (*i
< initialSimplifiedType
|| initialSimplifiedType
< *i
) {
1317 simplemap
[*i
] = n
++;
1321 // Remap transitions
1322 for (vector
<Transition
>::iterator i
=transitions
.begin();
1323 i
!=transitions
.end(); ++i
) {
1324 assert(i
->type
< (int32_t)types
.size());
1325 ZoneType oldtype
= types
[i
->type
];
1326 SimplifiedZoneType
newtype(oldtype
);
1327 assert(simplemap
.find(newtype
) != simplemap
.end());
1328 i
->type
= simplemap
[newtype
];
1331 // Replace type list
1333 types
.push_back(initialSimplifiedType
);
1334 for (set
<SimplifiedZoneType
>::const_iterator i
=simpleset
.begin(); i
!=simpleset
.end(); ++i
) {
1335 if (*i
< initialSimplifiedType
|| initialSimplifiedType
< *i
) {
1336 types
.push_back(*i
);
1340 // Reiterating transitions to remove any transitions which
1341 // do not actually change the raw/dst offsets
1342 int32_t prevTypeIdx
= 0;
1343 for (vector
<Transition
>::iterator i
=transitions
.begin(); i
!=transitions
.end();) {
1344 if (i
->type
== prevTypeIdx
) {
1345 // this is not a time transition, probably just name change
1346 // e.g. America/Resolute after 2006 in 2010b
1347 transitions
.erase(i
);
1349 prevTypeIdx
= i
->type
;
1359 * Merge final zone data into this zone.
1361 void ZoneInfo::mergeFinalData(const FinalZone
& fz
) {
1362 int32_t year
= fz
.year
;
1363 int64_t seconds
= yearToSeconds(year
);
1366 if (seconds
> HIGHEST_TIME32
) {
1367 // Avoid transitions beyond signed 32bit max second.
1368 // This may result incorrect offset computation around
1369 // HIGHEST_TIME32. This is a limitation of ICU
1371 seconds
= HIGHEST_TIME32
;
1375 vector
<Transition
>::iterator it
=
1376 find_if(transitions
.begin(), transitions
.end(),
1377 bind2nd(ptr_fun(isAfter
), seconds
));
1378 transitions
.erase(it
, transitions
.end());
1380 if (finalYear
!= -1) {
1381 throw invalid_argument("Final zone already merged in");
1383 finalYear
= fz
.year
;
1384 finalOffset
= fz
.offset
;
1385 finalRuleID
= fz
.ruleid
;
1389 * Merge the data from the given final zone into the core zone data by
1390 * calling the ZoneInfo member function mergeFinalData.
1392 void mergeOne(const string
& zoneid
, const FinalZone
& fz
) {
1393 if (ZONEINFO
.find(zoneid
) == ZONEINFO
.end()) {
1394 throw invalid_argument("Unrecognized final zone ID");
1396 ZONEINFO
[zoneid
].mergeFinalData(fz
);
1400 * Visitor function that merges the final zone data into the main zone
1401 * data structures. It calls mergeOne for each final zone and its
1404 void mergeFinalZone(const pair
<string
,FinalZone
>& p
) {
1405 const string
& id
= p
.first
;
1406 const FinalZone
& fz
= p
.second
;
1412 * Print this rule in resource bundle format to os. ID and enclosing
1413 * braces handled elsewhere.
1415 void FinalRule::print(ostream
& os
) const {
1416 // First print the rule part that enters DST; then the rule part
1418 int32_t whichpart
= (part
[0].offset
!= 0) ? 0 : 1;
1419 assert(part
[whichpart
].offset
!= 0);
1420 assert(part
[1-whichpart
].offset
== 0);
1423 for (int32_t i
=0; i
<2; ++i
) {
1424 const FinalRulePart
& p
= part
[whichpart
];
1425 whichpart
= 1-whichpart
;
1426 os
<< p
.month
<< ", " << p
.stz_dowim() << ", " << p
.stz_dow() << ", "
1427 << p
.time
<< ", " << p
.timemode() << ", ";
1429 os
<< part
[whichpart
].offset
<< endl
;
1432 int main(int argc
, char *argv
[]) {
1433 string rootpath
, zonetab
, version
;
1434 bool validArgs
= FALSE
;
1436 if (argc
== 4 || argc
== 5) {
1442 if (strcmp(argv
[4], "--old") == 0) {
1444 TZ_RESOURCE_NAME
= ICU_TZ_RESOURCE_OLD
;
1451 cout
<< "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl
1452 << " <dir> path to zoneinfo file tree generated by" << endl
1453 << " ICU-patched version of zic" << endl
1454 << " <cmap> country map, from tzdata archive," << endl
1455 << " typically named \"zone.tab\"" << endl
1456 << " <tzver> version string, such as \"2003e\"" << endl
1457 << " --old generating resource format before ICU4.4" << endl
;
1461 cout
<< "Olson data version: " << version
<< endl
;
1462 cout
<< "ICU 4.4+ format: " << (ICU44PLUS
? "Yes" : "No") << endl
;
1465 ifstream
finals(ICU_ZONE_FILE
);
1467 readFinalZonesAndRules(finals
);
1469 cout
<< "Finished reading " << finalZones
.size()
1470 << " final zones and " << finalRules
.size()
1471 << " final rules from " ICU_ZONE_FILE
<< endl
;
1473 cerr
<< "Error: Unable to open " ICU_ZONE_FILE
<< endl
;
1476 } catch (const exception
& error
) {
1477 cerr
<< "Error: While reading " ICU_ZONE_FILE
": " << error
.what() << endl
;
1482 // Recursively scan all files below the given path, accumulating
1483 // their data into ZONEINFO. All files must be TZif files. Any
1484 // failure along the way will result in a call to exit(1).
1486 } catch (const exception
& error
) {
1487 cerr
<< "Error: While scanning " << rootpath
<< ": " << error
.what() << endl
;
1491 cout
<< "Finished reading " << ZONEINFO
.size() << " zoneinfo files ["
1492 << (ZONEINFO
.begin())->first
<< ".."
1493 << (--ZONEINFO
.end())->first
<< "]" << endl
;
1496 for_each(finalZones
.begin(), finalZones
.end(), mergeFinalZone
);
1497 } catch (const exception
& error
) {
1498 cerr
<< "Error: While merging final zone data: " << error
.what() << endl
;
1502 // Process links (including ICU aliases). For each link set we have
1503 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more
1504 // aliases (e.g., PST, PST8PDT, ...).
1506 // 1. Add all aliases as zone objects in ZONEINFO
1507 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1508 i
!=links
.end(); ++i
) {
1509 const string
& olson
= i
->first
;
1510 const set
<string
>& aliases
= i
->second
;
1511 if (ZONEINFO
.find(olson
) == ZONEINFO
.end()) {
1512 cerr
<< "Error: Invalid " << linkSource
[olson
] << " to non-existent \""
1513 << olson
<< "\"" << endl
;
1516 for (set
<string
>::const_iterator j
=aliases
.begin();
1517 j
!=aliases
.end(); ++j
) {
1518 ZONEINFO
[*j
] = ZoneInfo();
1522 // 2. Create a mapping from zones to index numbers 0..n-1.
1523 map
<string
,int32_t> zoneIDs
;
1524 vector
<string
> zoneIDlist
;
1526 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1527 zoneIDs
[i
->first
] = z
++;
1528 zoneIDlist
.push_back(i
->first
);
1530 assert(z
== (int32_t) ZONEINFO
.size());
1532 // 3. Merge aliases. Sometimes aliases link to other aliases; we
1533 // resolve these into simplest possible sets.
1534 map
<string
,set
<string
> > links2
;
1535 map
<string
,string
> reverse2
;
1536 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1537 i
!=links
.end(); ++i
) {
1538 string olson
= i
->first
;
1539 while (reverseLinks
.find(olson
) != reverseLinks
.end()) {
1540 olson
= reverseLinks
[olson
];
1542 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1543 links2
[olson
].insert(*j
);
1544 reverse2
[*j
] = olson
;
1548 reverseLinks
= reverse2
;
1550 if (false) { // Debugging: Emit link map
1551 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1552 i
!=links
.end(); ++i
) {
1553 cout
<< i
->first
<< ": ";
1554 for (set
<string
>::const_iterator j
=i
->second
.begin(); j
!=i
->second
.end(); ++j
) {
1561 // 4. Update aliases
1562 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1563 i
!=links
.end(); ++i
) {
1564 const string
& olson
= i
->first
;
1565 const set
<string
>& aliases
= i
->second
;
1566 ZONEINFO
[olson
].clearAliases();
1567 ZONEINFO
[olson
].addAlias(zoneIDs
[olson
]);
1568 for (set
<string
>::const_iterator j
=aliases
.begin();
1569 j
!=aliases
.end(); ++j
) {
1570 assert(zoneIDs
.find(olson
) != zoneIDs
.end());
1571 assert(zoneIDs
.find(*j
) != zoneIDs
.end());
1572 assert(ZONEINFO
.find(*j
) != ZONEINFO
.end());
1573 ZONEINFO
[*j
].setAliasTo(zoneIDs
[olson
]);
1574 ZONEINFO
[olson
].addAlias(zoneIDs
[*j
]);
1578 // Once merging of final data is complete, we can optimize the type list
1579 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1580 i
->second
.optimizeTypeList();
1583 // Create the country map
1584 map
<string
, string
> icuRegions
; // ICU's custom zone -> country override
1585 map
<string
, set
<string
> > countryMap
; // country -> set of zones
1586 map
<string
, string
> reverseCountryMap
; // zone -> country
1589 // Read icuregions file to collect ICU's own zone-region mapping data.
1590 ifstream
frg(ICU_REGIONS
);
1593 while (getline(frg
, line
)) {
1594 if (line
[0] == '#') continue;
1596 string zone
, country
;
1597 istringstream
is(line
);
1598 is
>> zone
>> country
;
1599 if (zone
.size() == 0) continue;
1600 if (country
.size() < 2) {
1601 cerr
<< "Error: Can't parse " << line
<< " in " << ICU_REGIONS
<< endl
;
1604 icuRegions
[zone
] = country
;
1607 cout
<< "No custom region map [icuregions]" << endl
;
1609 } catch (const exception
& error
) {
1610 cerr
<< "Error: While reading " << ICU_REGIONS
<< ": " << error
.what() << endl
;
1615 ifstream
f(zonetab
.c_str());
1617 cerr
<< "Error: Unable to open " << zonetab
<< endl
;
1622 while (getline(f
, line
)) {
1623 string::size_type lb
= line
.find('#');
1624 if (lb
!= string::npos
) {
1625 line
.resize(lb
); // trim comments
1627 string country
, coord
, zone
;
1628 istringstream
is(line
);
1629 is
>> country
>> coord
>> zone
;
1630 if (country
.size() == 0) continue;
1631 if (country
.size() != 2 || zone
.size() < 1) {
1632 cerr
<< "Error: Can't parse " << line
<< " in " << zonetab
<< endl
;
1635 if (ZONEINFO
.find(zone
) == ZONEINFO
.end()) {
1636 cerr
<< "Error: Country maps to invalid zone " << zone
1637 << " in " << zonetab
<< endl
;
1640 if (icuRegions
.find(zone
) != icuRegions
.end()) {
1642 string customCountry
= icuRegions
[zone
];
1643 cout
<< "Region Mapping: custom override for " << zone
1644 << " " << country
<< " -> " << customCountry
<< endl
;
1645 country
= customCountry
;
1647 countryMap
[country
].insert(zone
);
1648 reverseCountryMap
[zone
] = country
;
1649 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;
1652 cout
<< "Finished reading " << n
1653 << " country entries from " << zonetab
<< endl
;
1654 } catch (const exception
& error
) {
1655 cerr
<< "Error: While reading " << zonetab
<< ": " << error
.what() << endl
;
1659 // Merge ICU's own zone-region mapping data
1660 for (map
<string
,string
>::const_iterator i
= icuRegions
.begin();
1661 i
!= icuRegions
.end(); ++i
) {
1662 const string
& zid(i
->first
);
1663 if (reverseCountryMap
.find(zid
) != reverseCountryMap
.end()) {
1666 cout
<< "Region Mapping: custom data zone=" << zid
1667 << ", region=" << i
->second
<< endl
;
1669 reverseCountryMap
[zid
] = i
->second
;
1670 countryMap
[i
->second
].insert(zid
);
1673 // Merge ICU aliases into country map. Don't merge any alias
1674 // that already has a country map, since that doesn't make sense.
1675 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we
1676 // should cross-map the countries between these two zones.
1677 for (map
<string
,set
<string
> >::const_iterator i
= links
.begin();
1678 i
!=links
.end(); ++i
) {
1679 const string
& olson(i
->first
);
1680 if (reverseCountryMap
.find(olson
) == reverseCountryMap
.end()) {
1683 string c
= reverseCountryMap
[olson
];
1684 const set
<string
>& aliases(i
->second
);
1685 for (set
<string
>::const_iterator j
=aliases
.begin();
1686 j
!= aliases
.end(); ++j
) {
1687 if (reverseCountryMap
.find(*j
) == reverseCountryMap
.end()) {
1688 countryMap
[c
].insert(*j
);
1689 reverseCountryMap
[*j
] = c
;
1690 //cerr << "Aliased country: " << c << " <=> " << *j << endl;
1695 // Create a pseudo-country containing all zones belonging to no country
1696 set
<string
> nocountry
;
1697 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1698 if (reverseCountryMap
.find(i
->first
) == reverseCountryMap
.end()) {
1699 nocountry
.insert(i
->first
);
1702 countryMap
[""] = nocountry
;
1704 // Get local time & year for below
1707 struct tm
* now
= localtime(&sec
);
1708 int32_t thisYear
= now
->tm_year
+ 1900;
1710 string filename
= TZ_RESOURCE_NAME
+ ".txt";
1711 // Write out a resource-bundle source file containing data for
1713 ofstream
file(filename
.c_str());
1715 file
<< "//---------------------------------------------------------" << endl
1716 << "// Copyright (C) 2003";
1717 if (thisYear
> 2003) {
1718 file
<< "-" << thisYear
;
1720 file
<< ", International Business Machines" << endl
1721 << "// Corporation and others. All Rights Reserved." << endl
1722 << "//---------------------------------------------------------" << endl
1723 << "// Build tool: tz2icu" << endl
1724 << "// Build date: " << asctime(now
) /* << endl -- asctime emits CR */
1725 << "// tz database: ftp://ftp.iana.org/tz/" << endl
1726 << "// tz version: " << version
<< endl
1727 << "// ICU version: " << U_ICU_VERSION
<< endl
1728 << "//---------------------------------------------------------" << endl
1729 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1730 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1731 << "//---------------------------------------------------------" << endl
1733 << TZ_RESOURCE_NAME
<< ":table(nofallback) {" << endl
1734 << " TZVersion { \"" << version
<< "\" }" << endl
1735 << " Zones:array { " << endl
1736 << ZONEINFO
// Zones (the actual data)
1739 // Names correspond to the Zones list, used for binary searching.
1740 printStringList ( file
, ZONEINFO
); // print the Names list
1742 // Final Rules are used if requested by the zone
1743 file
<< " Rules { " << endl
;
1746 for(map
<string
,FinalRule
>::iterator i
=finalRules
.begin();
1747 i
!=finalRules
.end(); ++i
) {
1748 const string
& id
= i
->first
;
1749 const FinalRule
& r
= i
->second
;
1750 file
<< " " << id
<< ":intvector {" << endl
;
1752 file
<< " } //_#" << frc
++ << endl
;
1754 file
<< " }" << endl
;
1756 // Emit country (region) map.
1758 file
<< " Regions:array {" << endl
;
1760 for (ZoneMap::iterator i
=ZONEINFO
.begin(); i
!=ZONEINFO
.end(); ++i
) {
1761 map
<string
, string
>::iterator cit
= reverseCountryMap
.find(i
->first
);
1762 if (cit
== reverseCountryMap
.end()) {
1763 file
<< " \"001\",";
1765 file
<< " \"" << cit
->second
<< "\", ";
1767 file
<< "//Z#" << zn
++ << " " << i
->first
<< endl
;
1769 file
<< " }" << endl
;
1771 file
<< " Regions { " << endl
;
1773 for (map
<string
, set
<string
> >::const_iterator i
=countryMap
.begin();
1774 i
!= countryMap
.end(); ++i
) {
1775 string country
= i
->first
;
1776 const set
<string
>& zones(i
->second
);
1781 file
<< country
<< ":intvector { ";
1783 for (set
<string
>::const_iterator j
=zones
.begin();
1784 j
!= zones
.end(); ++j
) {
1785 if (!first
) file
<< ", ";
1787 if (zoneIDs
.find(*j
) == zoneIDs
.end()) {
1788 cerr
<< "Error: Nonexistent zone in country map: " << *j
<< endl
;
1791 file
<< zoneIDs
[*j
]; // emit the zone's index number
1793 file
<< " } //R#" << rc
++ << endl
;
1795 file
<< " }" << endl
;
1798 file
<< "}" << endl
;
1803 if (file
) { // recheck error bit
1804 cout
<< "Finished writing " << TZ_RESOURCE_NAME
<< ".txt" << endl
;
1806 cerr
<< "Error: Unable to open/write to " << TZ_RESOURCE_NAME
<< ".txt" << endl
;