From 4162bf987dca731f631c28df29ad400b78d50ea9 Mon Sep 17 00:00:00 2001 From: Apple Date: Fri, 10 Oct 2008 04:30:27 +0000 Subject: [PATCH] ICU-8.11.2.tar.gz --- icuSources/test/intltest/dtfmtrtts.cpp | 18 +- icuSources/test/intltest/incaltst.cpp | 22 +- icuSources/test/intltest/tzregts.cpp | 19 +- icuSources/test/intltest/tztest.cpp | 17 +- icuSources/tools/tzcode/Makefile.in | 102 ++- icuSources/tools/tzcode/icuzdump.cpp | 423 ++++++++++ icuSources/tools/tzcode/icuzdump.vcproj | 212 +++++ icuSources/tools/tzcode/icuzones | 73 ++ icuSources/tools/tzcode/readme.txt | 29 +- icuSources/tools/tzcode/tz.alias | 19 +- icuSources/tools/tzcode/tz2icu.cpp | 346 +++++--- icuSources/tools/tzcode/zdump.c | 1034 +++++++++++++++++++++++ icuSources/tools/tzcode/zic.c | 45 +- 13 files changed, 2199 insertions(+), 160 deletions(-) create mode 100644 icuSources/tools/tzcode/icuzdump.cpp create mode 100644 icuSources/tools/tzcode/icuzdump.vcproj create mode 100644 icuSources/tools/tzcode/icuzones create mode 100644 icuSources/tools/tzcode/zdump.c diff --git a/icuSources/test/intltest/dtfmtrtts.cpp b/icuSources/test/intltest/dtfmtrtts.cpp index 152ec5a5..69394973 100644 --- a/icuSources/test/intltest/dtfmtrtts.cpp +++ b/icuSources/test/intltest/dtfmtrtts.cpp @@ -98,7 +98,23 @@ void DateFormatRoundTripTest::TestCentury() fmt.format(date[1], result[1]); date[2] = fmt.parse(result[1], status); - if (date[1] != date[2] || result[0] != result[1]) { + /* This test case worked OK by accident before.Ê date[1] != date[0], + * because we use -80/+20 year window for 2-digit year parsing. + * (date[0] is in year 1926, date[1] is in year 2026.)Ê result[1] set + * by the first format call returns "07/13/26 07:48:28 p.m. PST", + * which is correct, because DST was not used in year 1926 in zone + * America/Los_Angeles.Ê When this is parsed, date[1] becomes a time + * in 2026, which is "07/13/26 08:48:28 p.m. PDT".Ê There was a zone + * offset calculation bug that observed DST in 1926, which was resolved. + * Before the bug was resolved, result[0] == result[1] was true, + * but after the bug fix, the expected result is actually + * result[0] != result[1]. -Yoshit + */ + /* TODO: We need to review this code and clarify what we really + * want to test here. + */ + //if (date[1] != date[2] || result[0] != result[1]) { + if (date[1] != date[2]) { errln("Round trip failure: \"%S\" (%f), \"%S\" (%f)", result[0].getBuffer(), date[1], result[1].getBuffer(), date[2]); } } diff --git a/icuSources/test/intltest/incaltst.cpp b/icuSources/test/intltest/incaltst.cpp index a765e0c4..114a3c95 100644 --- a/icuSources/test/intltest/incaltst.cpp +++ b/icuSources/test/intltest/incaltst.cpp @@ -463,7 +463,21 @@ void IntlCalendarTest::TestJapaneseFormat() { // Test parse with incomplete information fmt = new SimpleDateFormat(UnicodeString("G y"), Locale("en_US@calendar=japanese"), status); - aDate = -3197120400000.; + /* The test data below should points to 1868-09-08T00:00:00 in America/Los_Angeles. + * The time calculated by original test code uses -7:00 UTC offset, because it assumes + * DST is observed (because of a timezone bug, DST is observed for early 20th century + * day to infinite past time). The bug was fixed and DST is no longer used for time before + * 1900 for any zones. However, ICU timezone transition data is represented by 32-bit integer + * (sec) and cannot represent transitions before 1901 defined in Olson tzdata. For example, + * based on Olson definition, offset -7:52:58 should be used for Nov 18, 1883 or older dates. + * If ICU properly capture entire Olson zone definition, the start time of "Meiji 1" is + * -3197117222000. -Yoshito + */ + /* TODO: When ICU support the Olson LMT offset for America/Los_Angeles, we need to update + * the reference data. + */ + //aDate = -3197120400000.; + aDate = -3197116800000.; CHECK(status, "creating date format instance"); if(!fmt) { errln("Coudln't create en_US instance"); @@ -515,7 +529,8 @@ void IntlCalendarTest::TestJapaneseFormat() { } { UnicodeString expect = CharsToUnicodeString("\\u5b89\\u6c385\\u5e747\\u67084\\u65e5\\u6728\\u66dc\\u65e5"); - UDate expectDate = -6106035600000.0; + //UDate expectDate = -6106035600000.0; + UDate expectDate = -6106032000000.0; // 1776-07-04T00:00:00Z-0800 Locale loc("ja_JP@calendar=japanese"); status = U_ZERO_ERROR; @@ -534,7 +549,8 @@ void IntlCalendarTest::TestJapaneseFormat() { { // This Feb 29th falls on a leap year by gregorian year, but not by Japanese year. UnicodeString expect = CharsToUnicodeString("\\u5EB7\\u6B632\\u5e742\\u670829\\u65e5\\u65e5\\u66dc\\u65e5"); // Add -1:00 to the following for historical TZ - aliu - UDate expectDate = -16214403600000.0; // courtesy of date format round trip test + //UDate expectDate = -16214403600000.0; // courtesy of date format round trip test + UDate expectDate = -16214400000000.0; // 1456-03-09T00:00:00Z-0800 Locale loc("ja_JP@calendar=japanese"); status = U_ZERO_ERROR; diff --git a/icuSources/test/intltest/tzregts.cpp b/icuSources/test/intltest/tzregts.cpp index 5ce419cf..b549b83c 100644 --- a/icuSources/test/intltest/tzregts.cpp +++ b/icuSources/test/intltest/tzregts.cpp @@ -1,7 +1,6 @@ /******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2006, International Business Machines Corporation and - * others. All Rights Reserved. + * Copyright (c) 1997-2007, International Business Machines + * Corporation and others. All Rights Reserved. ********************************************************************/ #include "unicode/utypes.h" @@ -482,8 +481,22 @@ void TimeZoneRegressionTest:: Test4126678() failure(status, "cal->get"); int32_t offset = tz->getOffset((uint8_t)era, year, month, day, (uint8_t)dayOfWeek, millis, status); int32_t raw_offset = tz->getRawOffset(); + /* Because of better historical timezone support based on Olson data, + * DST is not observed in year 98. Thus, the expected result is changed. + * As of Mar 2007, ICU timezone transition data is represented by 32-bit. + * When we support 64-bit Olson transition data, the actual offset in + * AD 98 for America/Los_Angeles will be changed again (-7:52:58). Until + * then, expected result is offset == raw_offset. -Yoshito + */ + /* if (offset == raw_offset) errln("Offsets should not match when in DST"); + */ + /* TODO: When ICU support the Olson LMT offset for America/Los_Angeles, we need to update + * the reference data. + */ + if (offset != raw_offset) + errln("Offsets should match"); delete cal; } diff --git a/icuSources/test/intltest/tztest.cpp b/icuSources/test/intltest/tztest.cpp index 0ad49421..c2060078 100644 --- a/icuSources/test/intltest/tztest.cpp +++ b/icuSources/test/intltest/tztest.cpp @@ -507,6 +507,9 @@ TimeZoneTest::TestGetAvailableIDs913() * definition. This test has been updated to reflect this. * 12/3/99 aliu * + * Added tests for additional zones and aliases from the icuzones file. + * Markus Scherer 2006-nov-06 + * * [srl - from java - 7/5/1998] * @bug 4130885 * Certain short zone IDs, used since 1.1.x, are incorrect. @@ -585,7 +588,7 @@ void TimeZoneTest::TestShortZoneIDs() {"EST", -300, FALSE}, // updated Aug 2003 aliu {"PRT", -240, FALSE}, {"CNT", -210, TRUE}, - {"AGT", -180, FALSE}, // updated 26 Sep 2000 aliu + {"AGT", -180, TRUE}, // updated by tzdata2007k {"BET", -180, TRUE}, // "CAT", -60, FALSE, // Wrong: // As of bug 4130885, fix CAT (Central Africa) @@ -598,7 +601,7 @@ void TimeZoneTest::TestShortZoneIDs() {"EAT", 180, FALSE}, {"MET", 60, TRUE}, // updated 12/3/99 aliu {"NET", 240, TRUE}, // updated 12/3/99 aliu - {"PLT", 300, FALSE}, // updated Aug 2003 aliu + {"PLT", 300, TRUE}, // updated by 2008c {"IST", 330, FALSE}, {"BST", 360, FALSE}, {"VST", 420, FALSE}, @@ -656,14 +659,14 @@ void TimeZoneTest::TestShortZoneIDs() const char* compatibilityMap[] = { // This list is copied from tz.alias. If tz.alias - // changes, this list must be updated. Current as of Aug 2003 + // changes, this list must be updated. Current as of Mar 2007 "ACT", "Australia/Darwin", "AET", "Australia/Sydney", "AGT", "America/Buenos_Aires", "ART", "Africa/Cairo", "AST", "America/Anchorage", "BET", "America/Sao_Paulo", - "BST", "Asia/Dhaka", // Spelling changed in 2000h + "BST", "Asia/Dhaka", // # spelling changed in 2000h; was Asia/Dacca "CAT", "Africa/Harare", "CNT", "America/St_Johns", "CST", "America/Chicago", @@ -671,14 +674,14 @@ void TimeZoneTest::TestShortZoneIDs() "EAT", "Africa/Addis_Ababa", "ECT", "Europe/Paris", // EET Europe/Istanbul # EET is a standard UNIX zone - // "EST", "America/New_York", # EST is an Olson alias now (2003) - "HST", "Pacific/Honolulu", + // "EST", "America/New_York", # Defined as -05:00 + // "HST", "Pacific/Honolulu", # Defined as -10:00 "IET", "America/Indianapolis", "IST", "Asia/Calcutta", "JST", "Asia/Tokyo", // MET Asia/Tehran # MET is a standard UNIX zone "MIT", "Pacific/Apia", - // "MST", "America/Denver", # MST is an Olson alias now (2003) + // "MST", "America/Denver", # Defined as -07:00 "NET", "Asia/Yerevan", "NST", "Pacific/Auckland", "PLT", "Asia/Karachi", diff --git a/icuSources/tools/tzcode/Makefile.in b/icuSources/tools/tzcode/Makefile.in index af9ff6e9..ce7c1a50 100644 --- a/icuSources/tools/tzcode/Makefile.in +++ b/icuSources/tools/tzcode/Makefile.in @@ -1,4 +1,4 @@ -# Some Portions Copyright (c) 2006 IBM and others. All Rights Reserved. +# Some Portions Copyright (c) 2006-2007 IBM and others. All Rights Reserved. srcdir = @srcdir@ top_srcdir = @top_srcdir@ @@ -11,40 +11,109 @@ include $(top_builddir)/icudefs.mk OBJECTS= zic.o localtime.o asctime.o scheck.o ialloc.o TZDATA = $(firstword $(wildcard ./tzdata*.tar.gz) $(wildcard $(srcdir)/tzdata*.tar.gz)) +TZCODE = $(firstword $(wildcard ./tzcode*.tar.gz) $(wildcard $(srcdir)/tzcode*.tar.gz)) + +TZORIG=./tzorig +TZORIG_TZDIR=./tzorig/tzdir +TZORIG_ABS := $(shell pwd)/tzorig +TZORIG_TZDIR_ABS := $(TZORIG_ABS)/tzdir +TZORIG_OPTS := CFLAGS="-D_POSIX_C_SOURCE $(TZORIG_EXTRA_CFLAGS)" TZDIR=$(TZORIG_TZDIR_ABS) + + +## Options for building zdumps +ZDUMPOUT=$(shell pwd)/zdumpout +ICUZDUMPOUT=$(shell pwd)/icuzdumpout + +ZDUMP_OPTS= -v -a -d $(ZDUMPOUT) -c 1902,2038 -i +ICUZDUMP_OPTS= -a -d $(ICUZDUMPOUT) ifeq ($(TZDATA),) all: - @echo ERROR tzdata*.tar.gz can\'t be found. + @echo ERROR "tzdata*.tar.gz" can\'t be found. @false else all: icu_data endif +TZCODE_TARGETS= tzorig check-dump + +ifeq ($(TZCODE),) +# we're broken. +$(TZCODE_TARGETS): + @echo ERROR "tzcode*.tar.gz" can\'t be found. + @false + +else +ifeq ($(TZDATA),) +# we're broken. +$(TZCODE_TARGETS): + @echo ERROR "tzdata*.tar.gz" can\'t be found. + @false +else +tzorig: $(TZCODE) $(TZDATA) + -$(RMV) ./tzorig/ + mkdir $@ + mkdir $(TZORIG_TZDIR) + gunzip -d < $(TZDATA) | ( cd $@ ; tar xf - ) + gunzip -d < $(TZCODE) | ( cd $@ ; tar xf - ) + -mv $(TZORIG)/zdump.c $(TZORIG)/zdump.c.orig + cp $(srcdir)/zdump.c $(TZORIG)/zdump.c + -mv $(TZORIG)/factory $(TZORIG)/factory.orig + cat $(TZORIG)/factory.orig $(srcdir)/icuzones > $(TZORIG)/factory + $(MAKE) -C $@ $(TZORIG_OPTS) zdump zones + +$(ZDUMPOUT): tzorig + ( cd $(TZORIG) ; ./zdump$(EXEEXT) $(ZDUMP_OPTS) ) + + +dump-out: $(ZDUMPOUT) $(ICUZDUMPOUT) + +check-dump: dump-out + diff -r zdumpout icuzdumpout + +endif +endif + +$(ICUZDUMPOUT): icuzdump$(EXEEXT) + -$(RMV) $(ICUZDUMPOUT) + -mkdir $(ICUZDUMPOUT) + $(INVOKE) ./icuzdump $(ICUZDUMP_OPTS) + + +# +# old 'tz' rules start here +# + + PRIMARY_YDATA= africa antarctica asia australasia \ europe northamerica southamerica YDATA= $(PRIMARY_YDATA) pacificnew etcetera factory backward NDATA= systemv SDATA= solar87 solar88 solar89 -TDATA= $(YDATA) $(NDATA) $(SDATA) +TDATA= $(YDATA) $(NDATA) $(SDATA) YEARISTYPE= ./yearistype ZIC = ./zic TZDIR=zoneinfo CFLAGS+=-D_POSIX_C_SOURCE -CPPFLAGS+= -DTZDIR=\"$(TZDIR)\" +CPPFLAGS+= -DTZDIR=\"$(TZDIR)\" # more data XDATA=zone.tab yearistype.sh leapseconds iso3166.tab ICUDATA=ZoneMetaData.java icu_zone.txt tz2icu zoneinfo.txt - - zic: $(OBJECTS) yearistype $(srcdir)/tz2icu.h - $(CC) $(CFLAGS) $(LFLAGS) -I$(srcdir) $(OBJECTS) $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(TZORIG_EXTRA_CFLAGS) $(LFLAGS) -I$(srcdir) $(OBJECTS) $(LDLIBS) -o $@ tz2icu: $(srcdir)/tz2icu.cpp $(srcdir)/tz2icu.h - $(CXX) -W -Wall -I$(srcdir) -pedantic $(srcdir)/tz2icu.cpp -o $@ + $(CXX) -W -Wall -I$(srcdir) -I$(top_srcdir)/common -pedantic $(srcdir)/tz2icu.cpp -o $@ + +icuzdump${EXEEXT}: $(srcdir)/icuzdump.cpp + $(LINK.cc) -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/io -pedantic $(srcdir)/icuzdump.cpp $(LIBICUUC) $(LIBICUDT) $(LIBICUI18N) $(LIBICUIO) $(LIBICUTOOLUTIL) -o $@ + + +# $(CXX) -W -Wall -I$(srcdir) -I$(top_srcdir)/common -pedantic $(srcdir)/icuzdump.cpp -o $@ yearistype.sh: $(TZDATA) gunzip -d < $(TZDATA) | tar xf - @@ -53,28 +122,25 @@ yearistype: yearistype.sh cp yearistype.sh yearistype chmod +x yearistype -tz.alias: $(srcdir)/tz.alias - cp $< . +posix_only: zic $(TDATA) $(srcdir)/icuzones + $(ZIC) -y $(YEARISTYPE) -d $(TZDIR) -L /dev/null $(TDATA) $(srcdir)/icuzones -posix_only: zic $(TDATA) - $(ZIC) -y $(YEARISTYPE) -d $(TZDIR) -L /dev/null $(TDATA) - -icu_data: tz2icu posix_only tz.alias +icu_data: tz2icu posix_only ./tz2icu $(TZDIR) zone.tab `echo $(TZDATA) | sed -e "s/.*\/tzdata//;s/\.tar\.gz$$//"` clean: - -rm -f core *.o *.out zdump zic yearistype date + -rm -f core *.o *.out zdump${EXEEXT} zic${EXEEXT} yearistype date tz2icu${EXEEXT} @echo ICU specific cleanup: -rm -f $(ICUDATA) -rm -rf $(TZDIR) -ifneq ($(srcdir),.) - -$(RMV) tz.alias -endif + -$(RMV) icuzdump${EXEEXT} tzorig ./zdumpout/ ./icuzdumpout/ ifneq ($(TZDATA),) -rm -rf `gunzip -d < $(TZDATA) | tar tf - | grep -o '[^ ]*$$' | tr '\n' ' '` endif +checkclean: + dataclean: clean -rm -f $(TDATA) $(XDATA) diff --git a/icuSources/tools/tzcode/icuzdump.cpp b/icuSources/tools/tzcode/icuzdump.cpp new file mode 100644 index 00000000..ffad9405 --- /dev/null +++ b/icuSources/tools/tzcode/icuzdump.cpp @@ -0,0 +1,423 @@ +/* +******************************************************************************* +* +* Copyright (C) 2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: icuzdump.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007-04-02 +* created by: Yoshito Umaoka +* +* This tool write out timezone transitions for ICU timezone. This tool +* is used as a part of tzdata update process to check if ICU timezone +* code works as well as the corresponding Olson stock localtime/zdump. +*/ + +#include +#include +#include +#include +#include + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/timezone.h" +#include "unicode/simpletz.h" +#include "unicode/smpdtfmt.h" +#include "unicode/decimfmt.h" +#include "unicode/gregocal.h" +#include "unicode/ustream.h" +#include "unicode/putil.h" + +#include "uoptions.h" + +using namespace std; + +class DumpFormatter { +public: + DumpFormatter() { + UErrorCode status = U_ZERO_ERROR; + stz = new SimpleTimeZone(0, ""); + sdf = new SimpleDateFormat((UnicodeString)"yyyy-MM-dd EEE HH:mm:ss", Locale::getEnglish(), status); + DecimalFormatSymbols *symbols = new DecimalFormatSymbols(Locale::getEnglish(), status); + decf = new DecimalFormat("00", symbols, status); + } + ~DumpFormatter() { + } + + UnicodeString& format(UDate time, int32_t offset, UBool isDst, UnicodeString& appendTo) { + stz->setRawOffset(offset); + sdf->setTimeZone(*stz); + UnicodeString str = sdf->format(time, appendTo); + if (offset < 0) { + appendTo += "-"; + offset = -offset; + } else { + appendTo += "+"; + } + + int32_t hour, min, sec; + + offset /= 1000; + sec = offset % 60; + offset = (offset - sec) / 60; + min = offset % 60; + hour = offset / 60; + + decf->format(hour, appendTo); + decf->format(min, appendTo); + decf->format(sec, appendTo); + appendTo += "[DST="; + if (isDst) { + appendTo += "1"; + } else { + appendTo += "0"; + } + appendTo += "]"; + return appendTo; + } +private: + SimpleTimeZone* stz; + SimpleDateFormat* sdf; + DecimalFormat* decf; +}; + +class ICUZDump { +public: + ICUZDump() { + formatter = new DumpFormatter(); + loyear = 1902; + hiyear = 2050; + tick = 1000; + linesep = NULL; + } + + ~ICUZDump() { + } + + void setLowYear(int32_t lo) { + loyear = lo; + } + + void setHighYear(int32_t hi) { + hiyear = hi; + } + + void setTick(int32_t t) { + tick = t; + } + + void setTimeZone(TimeZone* tz) { + timezone = tz; + } + + void setDumpFormatter(DumpFormatter* fmt) { + formatter = fmt; + } + + void setLineSeparator(const char* sep) { + linesep = sep; + } + + void dump(ostream& out) { + UErrorCode status = U_ZERO_ERROR; + UDate SEARCH_INCREMENT = 12 * 60 * 60 * 1000; // half day + UDate t, cutlo, cuthi; + int32_t rawOffset, dstOffset; + UnicodeString str; + + getCutOverTimes(cutlo, cuthi); + t = cutlo; + timezone->getOffset(t, FALSE, rawOffset, dstOffset, status); + while (t < cuthi) { + int32_t newRawOffset, newDstOffset; + UDate newt = t + SEARCH_INCREMENT; + + timezone->getOffset(newt, FALSE, newRawOffset, newDstOffset, status); + + UBool bSameOffset = (rawOffset + dstOffset) == (newRawOffset + newDstOffset); + UBool bSameDst = ((dstOffset != 0) && (newDstOffset != 0)) || ((dstOffset == 0) && (newDstOffset == 0)); + + if (!bSameOffset || !bSameDst) { + // find the boundary + UDate lot = t; + UDate hit = newt; + while (true) { + int32_t diff = (int32_t)(hit - lot); + if (diff <= tick) { + break; + } + UDate medt = lot + ((diff / 2) / tick) * tick; + int32_t medRawOffset, medDstOffset; + timezone->getOffset(medt, FALSE, medRawOffset, medDstOffset, status); + + bSameOffset = (rawOffset + dstOffset) == (medRawOffset + medDstOffset); + bSameDst = ((dstOffset != 0) && (medDstOffset != 0)) || ((dstOffset == 0) && (medDstOffset == 0)); + + if (!bSameOffset || !bSameDst) { + hit = medt; + } else { + lot = medt; + } + } + // write out the boundary + str.remove(); + formatter->format(lot, rawOffset + dstOffset, (dstOffset == 0 ? FALSE : TRUE), str); + out << str << " > "; + str.remove(); + formatter->format(hit, newRawOffset + newDstOffset, (newDstOffset == 0 ? FALSE : TRUE), str); + out << str; + if (linesep != NULL) { + out << linesep; + } else { + out << endl; + } + + rawOffset = newRawOffset; + dstOffset = newDstOffset; + } + t = newt; + } + } + +private: + void getCutOverTimes(UDate& lo, UDate& hi) { + UErrorCode status = U_ZERO_ERROR; + GregorianCalendar* gcal = new GregorianCalendar(timezone, Locale::getEnglish(), status); + gcal->clear(); + gcal->set(loyear, 0, 1, 0, 0, 0); + lo = gcal->getTime(status); + gcal->set(hiyear, 0, 1, 0, 0, 0); + hi = gcal->getTime(status); + } + + void dumpZone(ostream& out, const char* linesep, UnicodeString tzid, int32_t low, int32_t high) { + } + + TimeZone* timezone; + int32_t loyear; + int32_t hiyear; + int32_t tick; + + DumpFormatter* formatter; + const char* linesep; +}; + +class ZoneIterator { +public: + ZoneIterator(UBool bAll = FALSE) { + if (bAll) { + zenum = TimeZone::createEnumeration(); + } + else { + zenum = NULL; + zids = NULL; + idx = 0; + numids = 1; + } + } + + ZoneIterator(const char** ids, int32_t num) { + zenum = NULL; + zids = ids; + idx = 0; + numids = num; + } + + ~ZoneIterator() { + if (zenum != NULL) { + delete zenum; + } + } + + TimeZone* next() { + TimeZone* tz = NULL; + if (zenum != NULL) { + UErrorCode status = U_ZERO_ERROR; + const UnicodeString* zid = zenum->snext(status); + if (zid != NULL) { + tz = TimeZone::createTimeZone(*zid); + } + } + else { + if (idx < numids) { + if (zids != NULL) { + tz = TimeZone::createTimeZone((const UnicodeString&)zids[idx]); + } + else { + tz = TimeZone::createDefault(); + } + idx++; + } + } + return tz; + } + +private: + const char** zids; + StringEnumeration* zenum; + int32_t idx; + int32_t numids; +}; + +enum { + kOptHelpH = 0, + kOptHelpQuestionMark, + kOptAllZones, + kOptCutover, + kOptDestDir, + kOptLineSep +}; + +static UOption options[]={ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, + UOPTION_DEF("allzones", 'a', UOPT_NO_ARG), + UOPTION_DEF("cutover", 'c', UOPT_REQUIRES_ARG), + UOPTION_DEF("destdir", 'd', UOPT_REQUIRES_ARG), + UOPTION_DEF("linesep", 'l', UOPT_REQUIRES_ARG) +}; + +extern int +main(int argc, char *argv[]) { + int32_t low = 1902; + int32_t high = 2038; + UBool bAll = FALSE; + const char *dir = NULL; + const char *linesep = NULL; + + U_MAIN_INIT_ARGS(argc, argv); + argc = u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); + + if (argc < 0) { + cerr << "Illegal command line argument(s)" << endl << endl; + } + + if (argc < 0 || options[kOptHelpH].doesOccur || options[kOptHelpQuestionMark].doesOccur) { + cerr + << "Usage: icuzdump [-options] [zoneid1 zoneid2 ...]" << endl + << endl + << "\tDump all offset transitions for the specified zones." << endl + << endl + << "Options:" << endl + << "\t-a : Dump all available zones." << endl + << "\t-d : When specified, write transitions in a file under" << endl + << "\t the directory for each zone." << endl + << "\t-l : New line code type used in file outputs. CR or LF (default)" + << "\t or CRLF." << endl + << "\t-c [,]" << endl + << "\t : When specified, dump transitions starting " << endl + << "\t (inclusive) up to (exclusive). The default" << endl + << "\t values are 1902(low) and 2038(high)." << endl; + return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; + } + + bAll = options[kOptAllZones].doesOccur; + + if (options[kOptDestDir].doesOccur) { + dir = options[kOptDestDir].value; + } + + if (options[kOptLineSep].doesOccur) { + if (strcmp(options[kOptLineSep].value, "CR") == 0) { + linesep = "\r"; + } else if (strcmp(options[kOptLineSep].value, "CRLF") == 0) { + linesep = "\r\n"; + } else if (strcmp(options[kOptLineSep].value, "LF") == 0) { + linesep = "\n"; + } + } + + if (options[kOptCutover].doesOccur) { + char* comma = (char*)strchr(options[kOptCutover].value, ','); + if (comma == NULL) { + high = atoi(options[kOptCutover].value); + } else { + *comma = 0; + low = atoi(options[kOptCutover].value); + high = atoi(comma + 1); + } + } + + ICUZDump dumper; + dumper.setLowYear(low); + dumper.setHighYear(high); + if (dir != NULL && linesep != NULL) { + // use the specified line separator only for file output + dumper.setLineSeparator((const char*)linesep); + } + + ZoneIterator* zit; + if (bAll) { + zit = new ZoneIterator(TRUE); + } else { + if (argc <= 1) { + zit = new ZoneIterator(); + } else { + zit = new ZoneIterator((const char**)&argv[1], argc - 1); + } + } + + UnicodeString id; + if (dir != NULL) { + // file output + ostringstream path; + ios::openmode mode = ios::out; + if (linesep != NULL) { + mode |= ios::binary; + } + for (;;) { + TimeZone* tz = zit->next(); + if (tz == NULL) { + break; + } + dumper.setTimeZone(tz); + tz->getID(id); + + // target file path + path.str(""); + path << dir << U_FILE_SEP_CHAR; + id = id.findAndReplace("/", "-"); + path << id; + + ofstream* fout = new ofstream(path.str().c_str(), mode); + if (fout->fail()) { + cerr << "Cannot open file " << path << endl; + delete fout; + delete tz; + break; + } + + dumper.dump(*fout); + fout->close(); + delete fout; + delete tz; + } + + } else { + // stdout + UBool bFirst = TRUE; + for (;;) { + TimeZone* tz = zit->next(); + if (tz == NULL) { + break; + } + dumper.setTimeZone(tz); + tz->getID(id); + if (bFirst) { + bFirst = FALSE; + } else { + cout << endl; + } + cout << "ZONE: " << id << endl; + dumper.dump(cout); + delete tz; + } + } + delete zit; +} diff --git a/icuSources/tools/tzcode/icuzdump.vcproj b/icuSources/tools/tzcode/icuzdump.vcproj new file mode 100644 index 00000000..87242b5f --- /dev/null +++ b/icuSources/tools/tzcode/icuzdump.vcproj @@ -0,0 +1,212 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/icuSources/tools/tzcode/icuzones b/icuSources/tools/tzcode/icuzones new file mode 100644 index 00000000..24a2223e --- /dev/null +++ b/icuSources/tools/tzcode/icuzones @@ -0,0 +1,73 @@ +###################################################################### +# Copyright (C) 2007-2008, International Business Machines +# Corporation and others. All Rights Reserved. +###################################################################### +# This is an ICU-specific file with the same format as regular +# tzdata time zone files, for consistent parsing by the tools that +# turn "Olson" tzdata into ICU's zoneinfo.txt. +# The purpose of this file is to give ICU a superset of the time zones +# that are in CLDR and also include legacy ICU time zones originally +# in tz.alias for rataining backward compatibility. + +# Add Etc/Unknown, defined by CLDR. Give it Etc/GMT behavior. + +# Zone NAME GMTOFF RULES FORMAT +Zone Etc/Unknown 0 - Unknown + +# Add SystemV/XXX time zones as aliases according to CLDR. +# The Olson systemv file has these commented out. +# An alternative to making these aliases according to CLDR would be +# to copy and un-comment the data from the systemv file. + +# Link canonical alias +Link America/Halifax SystemV/AST4ADT +Link America/New_York SystemV/EST5EDT +Link America/Chicago SystemV/CST6CDT +Link America/Denver SystemV/MST7MDT +Link America/Los_Angeles SystemV/PST8PDT +Link America/Anchorage SystemV/YST9YDT +Link Etc/GMT+4 SystemV/AST4 +Link Etc/GMT+5 SystemV/EST5 +Link Etc/GMT+6 SystemV/CST6 +Link Etc/GMT+7 SystemV/MST7 +Link Etc/GMT+8 SystemV/PST8 +Link Etc/GMT+9 SystemV/YST9 +Link Etc/GMT+10 SystemV/HST10 + + +# The list below is for supporting legacy ICU zone aliases. +# These definitions were originally defined in tz.alias. + +#### Aliases that conflict with Olson compatibility Zone definition + +Link Australia/Darwin ACT +Link Australia/Sydney AET +Link America/Argentina/Buenos_Aires AGT +Link Africa/Cairo ART +Link America/Anchorage AST +Link America/Sao_Paulo BET +Link Asia/Dhaka BST +Link Africa/Harare CAT +Link America/St_Johns CNT +Link America/Chicago CST +Link Asia/Shanghai CTT +Link Africa/Addis_Ababa EAT +Link Europe/Paris ECT +#Link Europe/Istanbul EET # EET is a standard UNIX zone +####Link EST America/New_York EST # Defined as -05:00 +####Link Pacific/Honolulu HST # Defined as -10:00 +Link America/Indiana/Indianapolis IET +Link Asia/Kolkata IST +Link Asia/Tokyo JST +#Link Asia/Tehran MET # MET is a standard UNIX zone +Link Pacific/Apia MIT +####Link America/Denver MST # Defined as -07:00 +Link Asia/Yerevan NET +Link Pacific/Auckland NST +Link Asia/Karachi PLT +Link America/Phoenix PNT +Link America/Puerto_Rico PRT +Link America/Los_Angeles PST +Link Pacific/Guadalcanal SST +#Link Etc/UTC UTC # Olson LINK +Link Asia/Ho_Chi_Minh VST diff --git a/icuSources/tools/tzcode/readme.txt b/icuSources/tools/tzcode/readme.txt index 656a6488..7ad2cce0 100644 --- a/icuSources/tools/tzcode/readme.txt +++ b/icuSources/tools/tzcode/readme.txt @@ -1,5 +1,5 @@ ********************************************************************** -* Copyright (c) 2003-2006, International Business Machines +* Copyright (c) 2003-2007, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -23,7 +23,7 @@ behavior, as well as the full set of Olson compatibility IDs. References: -ICU4C: http://icu.sourceforge.net/ +ICU4C: http://www.icu-project.org/ Olson: ftp://elsie.nci.nih.gov/pub/ ---------------------------------------------------------------------- @@ -70,19 +70,32 @@ HOWTO 0. Note, these instructions will only work on POSIX type systems. -1. Obtain the current versions of tzdataYYYYV.tar.gz (aka `tzdata') from the FTP site given - above. Either manually download or use wget: +1. Obtain the current versions of tzdataYYYYV.tar.gz (aka `tzdata') from + the FTP site given above. Either manually download or use wget: $ cd {path_to}/icu/source/tools/tzcode $ wget "ftp://elsie.nci.nih.gov/pub/tzdata*.tar.gz" -2. copy only one tzdata*.tar.gz file into the icu/source/data/in/ directory (you may have to create this directory) +2. Copy only one tzdata*.tar.gz file into the icu/source/tools/tzcode/ + directory (this directory). *** Make sure you only have ONE FILE named tzdata*.tar.gz in the directory. 3. Build ICU normally. You will see a notice "updating zoneinfo.txt..." -4. For ICU maintainers, don't forget to check in the new - zoneinfo.txt (from its location at - {path_to}/icu/source/data/misc/zoneinfo.txt) into CVS. +### Following instructions for ICU maintainers only ### + +4. Obtain the current version of tzcodeYYYY.tar.gz from the FTP site to + this directory. + +5. Run make target "check-dump". This target extract makes the original + tzcode and compile the original tzdata with icu supplemental data + (icuzones). Then it makes zdump / icuzdump and dump all time + transitions for all ICU timezone to files under zdumpout / icuzdumpout + directory. When they produce different results, the target returns + the error. + +6. Don't forget to check in the new zoneinfo.txt (from its location at + {path_to}/icu/source/data/misc/zoneinfo.txt) into SVN. + diff --git a/icuSources/tools/tzcode/tz.alias b/icuSources/tools/tzcode/tz.alias index 4494b293..cf984ba0 100644 --- a/icuSources/tools/tzcode/tz.alias +++ b/icuSources/tools/tzcode/tz.alias @@ -1,5 +1,14 @@ +# +# !!!! WARNING !!!!! +# +# This file is OBSOLLETE and no longer used by ICU tzdata build tool. +# Use the file 'icuzones' to specify backward compatibility aliases. +# +# 2007-04-03 Yoshito Umaoka +# + ###################################################################### -# Copyright (C) 1999-2006, International Business Machines +# Copyright (C) 1999-2007, International Business Machines # Corporation and others. All Rights Reserved. ###################################################################### # A simple alias list. We use this to retain backward compatibility. @@ -12,7 +21,7 @@ # # Format: alias_name unix_name # optional comment -#### Aliases that conflict with Olson compatibility links +#### Aliases that conflict with Olson compatibility Zone definition ACT Australia/Darwin AET Australia/Sydney @@ -28,14 +37,14 @@ CTT Asia/Shanghai EAT Africa/Addis_Ababa ECT Europe/Paris # EET Europe/Istanbul # EET is a standard UNIX zone -#### EST America/New_York # Linked to America/Indianapolis in Olson -HST Pacific/Honolulu # Olson LINK - was removed 2005-Nov-21, confirmed gone in 2006a. +#### EST America/New_York # Defined as -05:00 +#### HST Pacific/Honolulu # Defined as -10:00 IET America/Indianapolis IST Asia/Calcutta JST Asia/Tokyo # MET Asia/Tehran # MET is a standard UNIX zone MIT Pacific/Apia -#### MST America/Denver # Linked to America/Phoenix in Olson +#### MST America/Denver # Defined as -07:00 NET Asia/Yerevan NST Pacific/Auckland PLT Asia/Karachi diff --git a/icuSources/tools/tzcode/tz2icu.cpp b/icuSources/tools/tzcode/tz2icu.cpp index eba0d3aa..9cb9f699 100644 --- a/icuSources/tools/tzcode/tz2icu.cpp +++ b/icuSources/tools/tzcode/tz2icu.cpp @@ -1,7 +1,7 @@ /* ********************************************************************** -* Copyright (c) 2003-2006, International Business Machines +* Copyright (c) 2003-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -48,20 +48,24 @@ #include "tz2icu.h" #include "unicode/uversion.h" +#define USE64BITDATA + using namespace std; //-------------------------------------------------------------------- // Time utilities //-------------------------------------------------------------------- -const long SECS_PER_YEAR = 31536000; // 365 days -const long SECS_PER_LEAP_YEAR = 31622400; // 366 days +const int64_t SECS_PER_YEAR = 31536000; // 365 days +const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days +const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000); +const int64_t HIGHEST_TIME32 = (int64_t)((int32_t)0x7fffffff); -bool isLeap(int y) { +bool isLeap(int32_t y) { return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian } -long secsPerYear(int y) { +int64_t secsPerYear(int32_t y) { return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR; } @@ -69,10 +73,10 @@ long secsPerYear(int y) { * Given a calendar year, return the GMT epoch seconds for midnight * GMT of January 1 of that year. yearToSeconds(1970) == 0. */ -long yearToSeconds(int year) { +int64_t yearToSeconds(int32_t year) { // inefficient but foolproof - long s = 0; - int y = 1970; + int64_t s = 0; + int32_t y = 1970; while (y < year) { s += secsPerYear(y++); } @@ -86,10 +90,10 @@ long yearToSeconds(int year) { * Given 1970 GMT epoch seconds, return the calendar year containing * that time. secondsToYear(0) == 1970. */ -int secondsToYear(long seconds) { +int32_t secondsToYear(int64_t seconds) { // inefficient but foolproof - int y = 1970; - long s = 0; + int32_t y = 1970; + int64_t s = 0; if (seconds >= 0) { for (;;) { s += secsPerYear(y++); @@ -116,9 +120,9 @@ struct SimplifiedZoneType; // A transition from one ZoneType to another // Minimal size = 5 bytes (4+1) struct Transition { - long time; // seconds, 1970 epoch - int type; // index into 'ZoneInfo.types' 0..255 - Transition(long _time, int _type) { + int64_t time; // seconds, 1970 epoch + int32_t type; // index into 'ZoneInfo.types' 0..255 + Transition(int64_t _time, int32_t _type) { time = _time; type = _type; } @@ -128,12 +132,12 @@ struct Transition { // Minimal size = 6 bytes (4+1+3bits) // SEE: SimplifiedZoneType struct ZoneType { - long rawoffset; // raw seconds offset from GMT - long dstoffset; // dst seconds offset from GMT + int64_t rawoffset; // raw seconds offset from GMT + int64_t dstoffset; // dst seconds offset from GMT // We don't really need any of the following, but they are // retained for possible future use. See SimplifiedZoneType. - int abbr; // index into ZoneInfo.abbrs 0..n-1 + int32_t abbr; // index into ZoneInfo.abbrs 0..n-1 bool isdst; bool isstd; bool isgmt; @@ -161,16 +165,16 @@ struct ZoneInfo { vector abbrs; string finalRuleID; - int finalOffset; - int finalYear; // -1 if none + int32_t finalOffset; + int32_t finalYear; // -1 if none // If this is an alias, then all other fields are meaningless, and // this field will point to the "real" zone 0..n-1. - int aliasTo; // -1 if this is a "real" zone + int32_t aliasTo; // -1 if this is a "real" zone // If there are aliases TO this zone, then the following set will // contain their index numbers (each index >= 0). - set aliases; + set aliases; ZoneInfo() : finalYear(-1), aliasTo(-1) {} @@ -179,13 +183,13 @@ struct ZoneInfo { void optimizeTypeList(); // Set this zone to be an alias TO another zone. - void setAliasTo(int index); + void setAliasTo(int32_t index); // Clear the list of aliases OF this zone. void clearAliases(); // Add an alias to the list of aliases OF this zone. - void addAlias(int index); + void addAlias(int32_t index); // Is this an alias to another zone? bool isAlias() const { @@ -193,7 +197,7 @@ struct ZoneInfo { } // Retrieve alias list - const set& getAliases() const { + const set& getAliases() const { return aliases; } @@ -205,12 +209,12 @@ void ZoneInfo::clearAliases() { aliases.clear(); } -void ZoneInfo::addAlias(int index) { +void ZoneInfo::addAlias(int32_t index) { assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end()); aliases.insert(index); } -void ZoneInfo::setAliasTo(int index) { +void ZoneInfo::setAliasTo(int32_t index) { assert(index >= 0); assert(aliases.size() == 0); aliasTo = index; @@ -232,12 +236,12 @@ ZoneMap ZONEINFO; //-------------------------------------------------------------------- // Read zic-coded 32-bit integer from file -long readcoded(ifstream& file, long minv=numeric_limits::min(), - long maxv=numeric_limits::max()) { +int64_t readcoded(ifstream& file, int64_t minv=numeric_limits::min(), + int64_t maxv=numeric_limits::max()) { unsigned char buf[4]; // must be UNSIGNED - long val=0; + int64_t val=0; file.read((char*)buf, 4); - for(int i=0,shift=24;i<4;++i,shift-=8) { + for(int32_t i=0,shift=24;i<4;++i,shift-=8) { val |= buf[i] << shift; } if (val < minv || val > maxv) { @@ -249,13 +253,31 @@ long readcoded(ifstream& file, long minv=numeric_limits::min(), return val; } +// Read zic-coded 64-bit integer from file +int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits::min(), + int64_t maxv=numeric_limits::max()) { + unsigned char buf[8]; // must be UNSIGNED + int64_t val=0; + file.read((char*)buf, 8); + for(int32_t i=0,shift=56;i<8;++i,shift-=8) { + val |= (int64_t)buf[i] << shift; + } + if (val < minv || val > maxv) { + ostringstream os; + os << "coded value out-of-range: " << val << ", expected [" + << minv << ", " << maxv << "]"; + throw out_of_range(os.str()); + } + return val; +} + // Read a boolean value bool readbool(ifstream& file) { char c; file.read(&c, 1); if (c!=0 && c!=1) { ostringstream os; - os << "boolean value out-of-range: " << (int)c; + os << "boolean value out-of-range: " << (int32_t)c; throw out_of_range(os.str()); } return (c!=0); @@ -265,8 +287,8 @@ bool readbool(ifstream& file) { * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo * @param file an already-open file stream */ -void readzoneinfo(ifstream& file, ZoneInfo& info) { - int i; +void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) { + int32_t i; // Check for TZ_ICU_MAGIC signature at file start. If we get a // signature mismatch, it means we're trying to read a file which @@ -294,12 +316,12 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { } // Read array sizes - long isgmtcnt = readcoded(file, 0); - long isdstcnt = readcoded(file, 0); - long leapcnt = readcoded(file, 0); - long timecnt = readcoded(file, 0); - long typecnt = readcoded(file, 0); - long charcnt = readcoded(file, 0); + int64_t isgmtcnt = readcoded(file, 0); + int64_t isdstcnt = readcoded(file, 0); + int64_t leapcnt = readcoded(file, 0); + int64_t timecnt = readcoded(file, 0); + int64_t typecnt = readcoded(file, 0); + int64_t charcnt = readcoded(file, 0); // Confirm sizes that we assume to be equal. These assumptions // are drawn from a reading of the zic source (2003a), so they @@ -311,19 +333,23 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { // Used temporarily to store transition times and types. We need // to do this because the times and types are stored in two // separate arrays. - vector transitionTimes(timecnt, -1); // temporary - vector transitionTypes(timecnt, -1); // temporary + vector transitionTimes(timecnt, -1); // temporary + vector transitionTypes(timecnt, -1); // temporary // Read transition times for (i=0; i= typecnt) { ostringstream os; os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]"; @@ -333,8 +359,40 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { } // Build transitions vector out of corresponding times and types. - for (i=0; i 0) { + int32_t minidx = -1; + for (i=0; i transitionTimes[minidx]) { + // Preserve the latest transition before the 32bit minimum time + minidx = i; + } + } else if (transitionTimes[i] > HIGHEST_TIME32) { + // Skipping the rest of the transition data. We cannot put such + // transitions into zoneinfo.res, because data is limited to singed + // 32bit int by the ICU resource bundle. + break; + } else { + info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); + } + } + + if (minidx != -1) { + // If there are any transitions before the 32bit minimum time, + // put the type information with the 32bit minimum time + vector::iterator itr = info.transitions.begin(); + info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx])); + } else { + // Otherwise, we need insert the initial type later + insertInitial = true; + } + } + } else { + for (i=0; i 0); + assert(typecnt > 0); + + int32_t initialTypeIdx = -1; + + // Check if the first type is not dst + if (info.types.at(0).dstoffset != 0) { + // Initial type's rawoffset is same with the rawoffset after the + // first transition, but no DST is observed. + int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset; + // Look for matching type + for (i=0; i<(int32_t)info.types.size(); ++i) { + if (info.types.at(i).rawoffset == rawoffset0 + && info.types.at(i).dstoffset == 0) { + initialTypeIdx = i; + break; + } + } + } else { + initialTypeIdx = 0; + } + assert(initialTypeIdx >= 0); + // Add the initial type associated with the lowest int32 time + vector::iterator itr = info.transitions.begin(); + info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx)); + } + + // Read the abbreviation string if (charcnt) { // All abbreviations are concatenated together, with a 0 at @@ -366,7 +454,7 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { // Split abbreviations apart into individual strings. Record // offset of each abbr in a vector. - vector abbroffset; + vector abbroffset; char *limit=str+charcnt; for (char* p=str; p::iterator it=info.types.begin(); it!=info.types.end(); ++it) { - vector::const_iterator x= + vector::const_iterator x= find(abbroffset.begin(), abbroffset.end(), it->abbr); if (x==abbroffset.end()) { // TODO: Modify code to add a new string to the end of @@ -400,7 +488,7 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { ostringstream os; os << "Warning: unusual abbr offset " << it->abbr << ", expected one of"; - for (vector::const_iterator y=abbroffset.begin(); + for (vector::const_iterator y=abbroffset.begin(); y!=abbroffset.end(); ++y) { os << ' ' << *y; } @@ -408,13 +496,13 @@ void readzoneinfo(ifstream& file, ZoneInfo& info) { #endif it->abbr = 0; } else { - int index = x - abbroffset.begin(); + int32_t index = x - abbroffset.begin(); it->abbr = index; abbrseen[index] = true; } } - for (int ii=0;ii<(int) abbrseen.size();++ii) { + for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) { if (!abbrseen[ii]) { cerr << "Warning: unused abbreviation: " << ii << endl; } @@ -456,6 +544,7 @@ void handleFile(string path, string id) { if (!file) { throw invalid_argument("can't open file"); } + ZoneInfo info; readzoneinfo(file, info); @@ -464,12 +553,51 @@ void handleFile(string path, string id) { throw invalid_argument("read error"); } +#ifdef USE64BITDATA + ZoneInfo info64; + readzoneinfo(file, info64, true); + + bool alldone = false; + int64_t eofPos = (int64_t) file.tellg(); + + // '\n' + + '\n' after the 64bit version data + char ch = file.get(); + if (ch == 0x0a) { + bool invalidchar = false; + while (file.get(ch)) { + if (ch == 0x0a) { + break; + } + if (ch < 0x20) { + // must be printable ascii + invalidchar = true; + break; + } + } + if (!invalidchar) { + eofPos = (int64_t) file.tellg(); + file.seekg(0, ios::end); + eofPos = eofPos - (int64_t) file.tellg(); + if (eofPos == 0) { + alldone = true; + } + } + } + if (!alldone) { + ostringstream os; + os << (-eofPos) << " unprocessed bytes at end"; + throw invalid_argument(os.str()); + } + + ZONEINFO[id] = info64; + +#else // Check eof-relative pos (there may be a cleaner way to do this) - long eofPos = (long) file.tellg(); + int64_t eofPos = (int64_t) file.tellg(); char buf[32]; file.read(buf, 4); file.seekg(0, ios::end); - eofPos = eofPos - (long) file.tellg(); + eofPos = eofPos - (int64_t) file.tellg(); if (eofPos) { // 2006c merged 32 and 64 bit versions in a fat binary // 64 version starts at the end of 32 bit version. @@ -481,8 +609,8 @@ void handleFile(string path, string id) { throw invalid_argument(os.str()); } } - ZONEINFO[id] = info; +#endif } /** @@ -575,7 +703,7 @@ void scandir(string dir, string prefix="") { closedir(dp); chdir(pwd); - for(int i=0;i<(int)subfiles.size();i+=2) { + for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) { try { handleFile(subfiles[i], subfiles[i+1]); } catch (const exception& e) { @@ -584,7 +712,7 @@ void scandir(string dir, string prefix="") { exit(1); } } - for(int i=0;i<(int)subdirs.size();i+=2) { + for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) { scandir(subdirs[i], subdirs[i+1]); } } @@ -599,7 +727,7 @@ void scandir(string dir, string prefix="") { * Read and discard the current line. */ void consumeLine(istream& in) { - int c; + int32_t c; do { c = in.get(); } while (c != EOF && c != '\n'); @@ -615,16 +743,16 @@ const char* TIME_MODE[] = {"w", "s", "u"}; // Allow 29 days in February because zic outputs February 29 // for rules like "last Sunday in February". -const int MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; +const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; -const int HOUR = 3600; +const int32_t HOUR = 3600; struct FinalZone { - int offset; // raw offset - int year; // takes effect for y >= year + int32_t offset; // raw offset + int32_t year; // takes effect for y >= year string ruleid; set aliases; - FinalZone(int _offset, int _year, const string& _ruleid) : + FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) : offset(_offset), year(_year), ruleid(_ruleid) { if (offset <= -16*HOUR || offset >= 16*HOUR) { ostringstream os; @@ -653,12 +781,12 @@ struct FinalZone { }; struct FinalRulePart { - int mode; - int month; - int dom; - int dow; - int time; - int offset; // dst offset, usually either 0 or 1:00 + int32_t mode; + int32_t month; + int32_t dom; + int32_t dow; + int32_t time; + int32_t offset; // dst offset, usually either 0 or 1:00 // Isstd and isgmt only have 3 valid states, corresponding to local // wall time, local standard time, and GMT standard time. @@ -682,13 +810,13 @@ struct FinalRulePart { FinalRulePart() : isset(false) {} void set(const string& id, const string& _mode, - int _month, - int _dom, - int _dow, - int _time, + int32_t _month, + int32_t _dom, + int32_t _dow, + int32_t _time, bool _isstd, bool _isgmt, - int _offset) { + int32_t _offset) { if (isset) { throw invalid_argument("FinalRulePart set twice"); } @@ -741,7 +869,7 @@ struct FinalRulePart { * Return the time mode as an ICU SimpleTimeZone int from 0..2; * see simpletz.h. */ - int timemode() const { + int32_t timemode() const { if (isgmt) { assert(isstd); return 2; // gmt standard @@ -766,14 +894,14 @@ struct FinalRulePart { /** * Return a "dowim" param suitable for SimpleTimeZone. */ - int stz_dowim() const { + int32_t stz_dowim() const { return (mode == DOWLEQ) ? -dom : dom; } /** * Return a "dow" param suitable for SimpleTimeZone. */ - int stz_dow() const { + int32_t stz_dow() const { return (mode == DOM) ? 0 : -(dow+1); } }; @@ -840,7 +968,7 @@ void readFinalZonesAndRules(istream& in) { } else if (token == "zone") { // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0) string id, ruleid; - int offset, year; + int32_t offset, year; in >> id >> offset >> year >> ruleid; consumeLine(in); finalZones[id] = FinalZone(offset, year, ruleid); @@ -848,12 +976,12 @@ void readFinalZonesAndRules(istream& in) { // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0 string id, mode; - int month, dom, dow, time, offset; + int32_t month, dom, dow, time, offset; bool isstd, isgmt; in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset; consumeLine(in); FinalRule& fr = finalRules[id]; - int p = fr.part[0].isset ? 1 : 0; + int32_t p = fr.part[0].isset ? 1 : 0; fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset); } else if (token == "link") { string fromid, toid; // fromid == "real" zone, toid == alias @@ -955,7 +1083,7 @@ void ZoneInfo::print(ostream& os, const string& id) const { if (aliases.size() != 0) { first = true; os << " :intvector { "; - for (set::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { + for (set::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { if (!first) os << ", "; first = false; os << *i; @@ -968,7 +1096,7 @@ void ZoneInfo::print(ostream& os, const string& id) const { inline ostream& operator<<(ostream& os, const ZoneMap& zoneinfo) { - int c = 0; + int32_t c = 0; for (ZoneMapIter it = zoneinfo.begin(); it != zoneinfo.end(); ++it) { @@ -981,8 +1109,8 @@ operator<<(ostream& os, const ZoneMap& zoneinfo) { // print the string list ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { - int n = 0; // count - int col = 0; // column + int32_t n = 0; // count + int32_t col = 0; // column os << " Names {" << endl << " "; for (ZoneMapIter it = zoneinfo.begin(); @@ -1013,7 +1141,7 @@ ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { //-------------------------------------------------------------------- // Unary predicate for finding transitions after a given time -bool isAfter(const Transition t, long thresh) { +bool isAfter(const Transition t, int64_t thresh) { return t.time >= thresh; } @@ -1022,8 +1150,8 @@ bool isAfter(const Transition t, long thresh) { * optimizeTypeList() method. */ struct SimplifiedZoneType { - long rawoffset; - long dstoffset; + int64_t rawoffset; + int64_t dstoffset; SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {} SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset), dstoffset(t.dstoffset) {} @@ -1072,13 +1200,13 @@ void ZoneInfo::optimizeTypeList() { set simpleset; for (vector::const_iterator i=transitions.begin(); i!=transitions.end(); ++i) { - assert(i->type < (int)types.size()); + assert(i->type < (int32_t)types.size()); simpleset.insert(types[i->type]); } // Map types to integer indices - map simplemap; - int n=0; + map simplemap; + int32_t n=0; for (set::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { simplemap[*i] = n++; @@ -1087,7 +1215,7 @@ void ZoneInfo::optimizeTypeList() { // Remap transitions for (vector::iterator i=transitions.begin(); i!=transitions.end(); ++i) { - assert(i->type < (int)types.size()); + assert(i->type < (int32_t)types.size()); ZoneType oldtype = types[i->type]; SimplifiedZoneType newtype(oldtype); assert(simplemap.find(newtype) != simplemap.end()); @@ -1103,8 +1231,8 @@ void ZoneInfo::optimizeTypeList() { * Merge final zone data into this zone. */ void ZoneInfo::mergeFinalData(const FinalZone& fz) { - int year = fz.year; - long seconds = yearToSeconds(year); + int32_t year = fz.year; + int64_t seconds = yearToSeconds(year); vector::iterator it = find_if(transitions.begin(), transitions.end(), bind2nd(ptr_fun(isAfter), seconds)); @@ -1148,12 +1276,12 @@ void mergeFinalZone(const pair& p) { void FinalRule::print(ostream& os) const { // First print the rule part that enters DST; then the rule part // that exits it. - int whichpart = (part[0].offset != 0) ? 0 : 1; + int32_t whichpart = (part[0].offset != 0) ? 0 : 1; assert(part[whichpart].offset != 0); assert(part[1-whichpart].offset == 0); os << " "; - for (int i=0; i<2; ++i) { + for (int32_t i=0; i<2; ++i) { const FinalRulePart& p = part[whichpart]; whichpart = 1-whichpart; os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", " @@ -1198,6 +1326,12 @@ int main(int argc, char *argv[]) { return 1; } +//############################################################################ +//# Note: We no longer use tz.alias to define alias for legacy ICU time zones. +//# The contents of tz.alias were migrated into zic source format and +//# processed by zic as 'Link'. +//############################################################################ +#if 0 // Read the legacy alias list and process it. Treat the legacy mappings // like links, but also record them in the "legacy" hash. try { @@ -1206,7 +1340,7 @@ int main(int argc, char *argv[]) { cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl; return 1; } - int n = 0; + int32_t n = 0; string line; while (getline(aliases, line)) { string::size_type lb = line.find('#'); @@ -1255,7 +1389,7 @@ int main(int argc, char *argv[]) { cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl; return 1; } - +#endif try { // Recursively scan all files below the given path, accumulating // their data into ZONEINFO. All files must be TZif files. Any @@ -1298,14 +1432,14 @@ int main(int argc, char *argv[]) { } // 2. Create a mapping from zones to index numbers 0..n-1. - map zoneIDs; + map zoneIDs; vector zoneIDlist; - int z=0; + int32_t z=0; for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { zoneIDs[i->first] = z++; zoneIDlist.push_back(i->first); } - assert(z == (int) ZONEINFO.size()); + assert(z == (int32_t) ZONEINFO.size()); // 3. Merge aliases. Sometimes aliases link to other aliases; we // resolve these into simplest possible sets. @@ -1367,7 +1501,7 @@ int main(int argc, char *argv[]) { cerr << "Error: Unable to open " << zonetab << endl; return 1; } - int n = 0; + int32_t n = 0; string line; while (getline(f, line)) { string::size_type lb = line.find('#'); @@ -1434,7 +1568,7 @@ int main(int argc, char *argv[]) { time_t sec; time(&sec); struct tm* now = localtime(&sec); - int thisYear = now->tm_year + 1900; + int32_t thisYear = now->tm_year + 1900; // Write out a resource-bundle source file containing data for // all zones. @@ -1470,7 +1604,7 @@ int main(int argc, char *argv[]) { // Final Rules are used if requested by the zone file << " Rules { " << endl; // Emit final rules - int frc = 0; + int32_t frc = 0; for(map::iterator i=finalRules.begin(); i!=finalRules.end(); ++i) { const string& id = i->first; @@ -1486,7 +1620,7 @@ int main(int argc, char *argv[]) { // trims this to 171 kb. More work for the runtime code, but // a smaller data footprint. file << " Regions { " << endl; - int rc = 0; + int32_t rc = 0; for (map >::const_iterator i=countryMap.begin(); i != countryMap.end(); ++i) { string country = i->first; @@ -1554,7 +1688,7 @@ int main(int argc, char *argv[]) { // Emit equivalency lists bool first1 = true; - java << " public static final String VERSION = \"" + version + "\";" << endl; + java << " public static final String VERSION = \"" + version + "\";" << endl; java << " public static final String[][] EQUIV = {" << endl; for (ZoneMap::const_iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { if (i->second.isAlias() || i->second.getAliases().size() == 0) { @@ -1568,8 +1702,8 @@ int main(int argc, char *argv[]) { // the canonical zone, we should move it to position 0. java << " { "; bool first2 = true; - const set& s = i->second.getAliases(); - for (set::const_iterator j=s.begin(); j!=s.end(); ++j) { + const set& s = i->second.getAliases(); + for (set::const_iterator j=s.begin(); j!=s.end(); ++j) { if (!first2) java << ", "; java << '"' << zoneIDlist[*j] << '"'; first2 = false; diff --git a/icuSources/tools/tzcode/zdump.c b/icuSources/tools/tzcode/zdump.c new file mode 100644 index 00000000..c7199acb --- /dev/null +++ b/icuSources/tools/tzcode/zdump.c @@ -0,0 +1,1034 @@ +static char elsieid[] = "@(#)zdump.c 8.3"; + +/* +** This code has been made independent of the rest of the time +** conversion package to increase confidence in the verification it provides. +** You can use this code to help in verifying other implementations. +*/ + +/* + * ICU note: Mr. Arthur David Olson (olsona@dc37a.nci.nih.gov) stated that + * "zdump.c is indeed in the public domain" in e-mail on Feb 22, 2007. + * This version of zdump.c is modified by ICU team to change output format + * and some additional options. + */ + + +#include "stdio.h" /* for stdout, stderr, perror */ +#include "string.h" /* for strcpy */ +#include "sys/types.h" /* for time_t */ +#include "time.h" /* for struct tm */ +#include "stdlib.h" /* for exit, malloc, atoi */ +#include "float.h" /* for FLT_MAX and DBL_MAX */ +#include "ctype.h" /* for isalpha et al. */ + +/* Enable extensions and modifications for ICU. */ +#define ICU + +#ifdef ICU +#include "dirent.h" +#endif + +#ifndef isascii +#define isascii(x) 1 +#endif /* !defined isascii */ + +#ifndef ZDUMP_LO_YEAR +#define ZDUMP_LO_YEAR (-500) +#endif /* !defined ZDUMP_LO_YEAR */ + +#ifndef ZDUMP_HI_YEAR +#define ZDUMP_HI_YEAR 2500 +#endif /* !defined ZDUMP_HI_YEAR */ + +#ifndef MAX_STRING_LENGTH +#define MAX_STRING_LENGTH 1024 +#endif /* !defined MAX_STRING_LENGTH */ + +#ifndef TRUE +#define TRUE 1 +#endif /* !defined TRUE */ + +#ifndef FALSE +#define FALSE 0 +#endif /* !defined FALSE */ + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif /* !defined EXIT_SUCCESS */ + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif /* !defined EXIT_FAILURE */ + +#ifndef SECSPERMIN +#define SECSPERMIN 60 +#endif /* !defined SECSPERMIN */ + +#ifndef MINSPERHOUR +#define MINSPERHOUR 60 +#endif /* !defined MINSPERHOUR */ + +#ifndef SECSPERHOUR +#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR) +#endif /* !defined SECSPERHOUR */ + +#ifndef HOURSPERDAY +#define HOURSPERDAY 24 +#endif /* !defined HOURSPERDAY */ + +#ifndef EPOCH_YEAR +#define EPOCH_YEAR 1970 +#endif /* !defined EPOCH_YEAR */ + +#ifndef TM_YEAR_BASE +#define TM_YEAR_BASE 1900 +#endif /* !defined TM_YEAR_BASE */ + +#ifndef DAYSPERNYEAR +#define DAYSPERNYEAR 365 +#endif /* !defined DAYSPERNYEAR */ + +#ifndef isleap +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) +#endif /* !defined isleap */ + +#ifndef isleap_sum +/* +** See tzfile.h for details on isleap_sum. +*/ +#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400) +#endif /* !defined isleap_sum */ + +#define SECSPERDAY ((long) SECSPERHOUR * HOURSPERDAY) +#define SECSPERNYEAR (SECSPERDAY * DAYSPERNYEAR) +#define SECSPERLYEAR (SECSPERNYEAR + SECSPERDAY) + +#if HAVE_GETTEXT +#include "locale.h" /* for setlocale */ +#include "libintl.h" +#endif /* HAVE_GETTEXT */ + +#ifndef GNUC_or_lint +#ifdef lint +#define GNUC_or_lint +#else /* !defined lint */ +#ifdef __GNUC__ +#define GNUC_or_lint +#endif /* defined __GNUC__ */ +#endif /* !defined lint */ +#endif /* !defined GNUC_or_lint */ + +#ifndef INITIALIZE +#ifdef GNUC_or_lint +#define INITIALIZE(x) ((x) = 0) +#else /* !defined GNUC_or_lint */ +#define INITIALIZE(x) +#endif /* !defined GNUC_or_lint */ +#endif /* !defined INITIALIZE */ + +/* +** For the benefit of GNU folk... +** `_(MSGID)' uses the current locale's message library string for MSGID. +** The default is to use gettext if available, and use MSGID otherwise. +*/ + +#ifndef _ +#if HAVE_GETTEXT +#define _(msgid) gettext(msgid) +#else /* !HAVE_GETTEXT */ +#define _(msgid) msgid +#endif /* !HAVE_GETTEXT */ +#endif /* !defined _ */ + +#ifndef TZ_DOMAIN +#define TZ_DOMAIN "tz" +#endif /* !defined TZ_DOMAIN */ + +#ifndef P +#define P(x) x +#endif /* !defined P */ + +extern char ** environ; +extern int getopt P((int argc, char * const argv[], + const char * options)); +extern char * optarg; +extern int optind; +extern char * tzname[2]; + +static time_t absolute_min_time; +static time_t absolute_max_time; +static size_t longest; +static char * progname; +static int warned; + +static char * abbr P((struct tm * tmp)); +static void abbrok P((const char * abbrp, const char * zone)); +static long delta P((struct tm * newp, struct tm * oldp)); +static void dumptime P((const struct tm * tmp)); +static time_t hunt P((char * name, time_t lot, time_t hit)); +static void setabsolutes P((void)); +static void show P((char * zone, time_t t, int v)); +static const char * tformat P((void)); +static time_t yeartot P((long y)); +#ifdef ICU +typedef struct listentry { + char * name; + struct listentry * next; +} listentry; + +static time_t huntICU P((char * name, time_t lot, time_t hit, FILE *fp)); +static void dumptimeICU P((FILE * fp, time_t t)); +static void showICU P((FILE * fp, char * zone, time_t t1, time_t t2)); +static int getall P((struct listentry ** namelist)); +static void getzones P((char * basedir, char * subdir, struct listentry ** last, int * count)); +#endif + +#ifndef TYPECHECK +#define my_localtime localtime +#else /* !defined TYPECHECK */ +static struct tm * +my_localtime(tp) +time_t * tp; +{ + register struct tm * tmp; + + tmp = localtime(tp); + if (tp != NULL && tmp != NULL) { + struct tm tm; + register time_t t; + + tm = *tmp; + t = mktime(&tm); + if (t - *tp >= 1 || *tp - t >= 1) { + (void) fflush(stdout); + (void) fprintf(stderr, "\n%s: ", progname); + (void) fprintf(stderr, tformat(), *tp); + (void) fprintf(stderr, " ->"); + (void) fprintf(stderr, " year=%d", tmp->tm_year); + (void) fprintf(stderr, " mon=%d", tmp->tm_mon); + (void) fprintf(stderr, " mday=%d", tmp->tm_mday); + (void) fprintf(stderr, " hour=%d", tmp->tm_hour); + (void) fprintf(stderr, " min=%d", tmp->tm_min); + (void) fprintf(stderr, " sec=%d", tmp->tm_sec); + (void) fprintf(stderr, " isdst=%d", tmp->tm_isdst); + (void) fprintf(stderr, " -> "); + (void) fprintf(stderr, tformat(), t); + (void) fprintf(stderr, "\n"); + } + } + return tmp; +} +#endif /* !defined TYPECHECK */ + +static void +abbrok(abbrp, zone) +const char * const abbrp; +const char * const zone; +{ + register const char * cp; + register char * wp; + + if (warned) + return; + cp = abbrp; + wp = NULL; + while (isascii((unsigned char) *cp) && isalpha((unsigned char) *cp)) + ++cp; + if (cp - abbrp == 0) + wp = _("lacks alphabetic at start"); + else if (cp - abbrp < 3) + wp = _("has fewer than 3 alphabetics"); + else if (cp - abbrp > 6) + wp = _("has more than 6 alphabetics"); + if (wp == NULL && (*cp == '+' || *cp == '-')) { + ++cp; + if (isascii((unsigned char) *cp) && + isdigit((unsigned char) *cp)) + if (*cp++ == '1' && *cp >= '0' && *cp <= '4') + ++cp; + if (*cp != '\0') + wp = _("differs from POSIX standard"); + } + if (wp == NULL) + return; + (void) fflush(stdout); + (void) fprintf(stderr, + _("%s: warning: zone \"%s\" abbreviation \"%s\" %s\n"), + progname, zone, abbrp, wp); + warned = TRUE; +} + +int +main(argc, argv) +int argc; +char * argv[]; +{ + register int i; + register int c; + register int vflag; + register char * cutarg; + register long cutloyear = ZDUMP_LO_YEAR; + register long cuthiyear = ZDUMP_HI_YEAR; + register time_t cutlotime; + register time_t cuthitime; + register char ** fakeenv; + time_t now; + time_t t; + time_t newt; + struct tm tm; + struct tm newtm; + register struct tm * tmp; + register struct tm * newtmp; +#ifdef ICU + int nextopt; + char * dirarg; + int aflag; + int iflag; + listentry * namelist = NULL; + FILE * fp = stdout; +#endif + + INITIALIZE(cutlotime); + INITIALIZE(cuthitime); +#if HAVE_GETTEXT + (void) setlocale(LC_ALL, ""); +#ifdef TZ_DOMAINDIR + (void) bindtextdomain(TZ_DOMAIN, TZ_DOMAINDIR); +#endif /* defined TEXTDOMAINDIR */ + (void) textdomain(TZ_DOMAIN); +#endif /* HAVE_GETTEXT */ + progname = argv[0]; + for (i = 1; i < argc; ++i) + if (strcmp(argv[i], "--version") == 0) { + (void) printf("%s\n", elsieid); + exit(EXIT_SUCCESS); + } + vflag = 0; + cutarg = NULL; +#ifdef ICU + aflag = 0; + iflag = 0; + dirarg = NULL; + nextopt = 1; + while(nextopt) { + c = getopt(argc, argv, "ac:d:iv"); + switch(c) { + case 'a': + aflag = 1; + break; + case 'c': + cutarg = optarg; + break; + case 'd': + dirarg = optarg; + break; + case 'i': + iflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + nextopt = 0; + break; + } + } + if ((c != EOF && c != -1) || + (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) { + (void) fprintf(stderr, + _("%s: usage is %s [ --version ] [ -a ] [ -v ] [ -i ] [ -c [loyear,]hiyear ] [ -d dir ] [ zonename ... ]\n"), + progname, progname); + exit(EXIT_FAILURE); + } + + if (dirarg != NULL) { + DIR * dp; + /* create the output directory */ + mkdir(dirarg, 0777); + if ((dp = opendir(dirarg)) == NULL) { + fprintf(stderr, "cannot create the target directory"); + exit(EXIT_FAILURE); + } + closedir(dp); + } +#else + while ((c = getopt(argc, argv, "c:v")) == 'c' || c == 'v') + if (c == 'v') + vflag = 1; + else cutarg = optarg; + if ((c != EOF && c != -1) || + (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) { + (void) fprintf(stderr, +_("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"), + progname, progname); + exit(EXIT_FAILURE); + } +#endif + if (vflag) { + if (cutarg != NULL) { + long lo; + long hi; + char dummy; + + if (sscanf(cutarg, "%ld%c", &hi, &dummy) == 1) { + cuthiyear = hi; + } else if (sscanf(cutarg, "%ld,%ld%c", + &lo, &hi, &dummy) == 2) { + cutloyear = lo; + cuthiyear = hi; + } else { +(void) fprintf(stderr, _("%s: wild -c argument %s\n"), + progname, cutarg); + exit(EXIT_FAILURE); + } + } + setabsolutes(); + cutlotime = yeartot(cutloyear); + cuthitime = yeartot(cuthiyear); + } + +#ifdef ICU + if (aflag) { + /* get all available zones */ + char ** fakeargv; + int i; + int count; + + count = getall(&namelist); + + fakeargv = (char **) malloc((size_t) (argc + count) * sizeof *argv); + /* + if ((fakeargv = (char **) malloc((size_t) (argc + count) * sizeof *argv)) == NULL) { + exit(EXIT_FAILURE); + } + */ + for (i = 0; i < argc; i++) { + fakeargv[i] = argv[i]; + } + for (i = 0; i < count; i++) { + fakeargv[i + argc] = namelist->name; + namelist = namelist->next; + } + argv = fakeargv; + argc += count; + } +#endif + (void) time(&now); + longest = 0; + for (i = optind; i < argc; ++i) + if (strlen(argv[i]) > longest) + longest = strlen(argv[i]); + { + register int from; + register int to; + + for (i = 0; environ[i] != NULL; ++i) + continue; + fakeenv = (char **) malloc((size_t) ((i + 2) * + sizeof *fakeenv)); + if (fakeenv == NULL || + (fakeenv[0] = (char *) malloc(longest + 4)) == NULL) { + (void) perror(progname); + exit(EXIT_FAILURE); + } + to = 0; + (void) strcpy(fakeenv[to++], "TZ="); + for (from = 0; environ[from] != NULL; ++from) + if (strncmp(environ[from], "TZ=", 3) != 0) + fakeenv[to++] = environ[from]; + fakeenv[to] = NULL; + environ = fakeenv; + } + for (i = optind; i < argc; ++i) { + static char buf[MAX_STRING_LENGTH]; + + (void) strcpy(&fakeenv[0][3], argv[i]); + if (!vflag) { + show(argv[i], now, FALSE); + continue; + } +#ifdef ICU + fp = NULL; + if (iflag) { + if (dirarg == NULL) { + /* we want to display a zone name here */ + if (i != optind) { + printf("\n"); + } + printf("ZONE: %s\n", argv[i]); + } else { + int zstart; + char path[FILENAME_MAX + 1]; + strcpy(path, dirarg); + strcat(path, "/"); + zstart = strlen(path); + strcat(path, argv[i]); + /* replace '/' with '-' */ + while(path[++zstart] != 0) { + if (path[zstart] == '/') { + path[zstart] = '-'; + } + } + if ((fp = fopen(path, "w")) == NULL) { + fprintf(stderr, "cannot create output file %s\n", path); + exit(EXIT_FAILURE); + } + } + } +#endif + warned = FALSE; + t = absolute_min_time; +#ifdef ICU + /* skip displaying info for the lowest time, which is actually not + * a transition when -i option is set */ + if (!iflag) { +#endif + show(argv[i], t, TRUE); + t += SECSPERHOUR * HOURSPERDAY; + show(argv[i], t, TRUE); +#ifdef ICU + } +#endif + if (t < cutlotime) + t = cutlotime; + tmp = my_localtime(&t); + if (tmp != NULL) { + tm = *tmp; + (void) strncpy(buf, abbr(&tm), (sizeof buf) - 1); + } + for ( ; ; ) { + if (t >= cuthitime) + break; + newt = t + SECSPERHOUR * 12; + if (newt >= cuthitime) + break; + if (newt <= t) + break; + newtmp = localtime(&newt); + if (newtmp != NULL) + newtm = *newtmp; +#ifdef ICU + if (iflag) { + /* We do not want to capture transitions just for + * abbreviated zone name changes */ + if ((tmp == NULL || newtmp == NULL) ? (tmp != newtmp) : + (delta(&newtm, &tm) != (newt - t) || + newtm.tm_isdst != tm.tm_isdst)) { + newt = huntICU(argv[i], t, newt, fp); + newtmp = localtime(&newt); + if (newtmp != NULL) { + newtm = *newtmp; + (void) strncpy(buf, + abbr(&newtm), + (sizeof buf) - 1); + } + } + } else { +#endif + if ((tmp == NULL || newtmp == NULL) ? (tmp != newtmp) : + (delta(&newtm, &tm) != (newt - t) || + newtm.tm_isdst != tm.tm_isdst || + strcmp(abbr(&newtm), buf) != 0)) { + newt = hunt(argv[i], t, newt); + newtmp = localtime(&newt); + if (newtmp != NULL) { + newtm = *newtmp; + (void) strncpy(buf, + abbr(&newtm), + (sizeof buf) - 1); + } + } +#ifdef ICU + } +#endif + t = newt; + tm = newtm; + tmp = newtmp; + } +#ifdef ICU + if (!iflag) { + /* skip displaying info for the highest time, which is actually not + * a transition when -i option is used*/ +#endif + t = absolute_max_time; + t -= SECSPERHOUR * HOURSPERDAY; + show(argv[i], t, TRUE); + t += SECSPERHOUR * HOURSPERDAY; + show(argv[i], t, TRUE); + +#ifdef ICU + } + /* close file */ + if (fp != NULL) { + fclose(fp); + } +#endif + } + if (fflush(stdout) || ferror(stdout)) { + (void) fprintf(stderr, "%s: ", progname); + (void) perror(_("Error writing to standard output")); + exit(EXIT_FAILURE); + } +#ifdef ICU + if (aflag) { + struct listentry * entry = namelist; + struct listentry * next; + while (entry != NULL) { + free(entry->name); + next = entry->next; + free(entry); + entry = next; + } + } +#endif + exit(EXIT_SUCCESS); + /* If exit fails to exit... */ + return EXIT_FAILURE; +} + +static void +setabsolutes() +{ + if (0.5 == (time_t) 0.5) { + /* + ** time_t is floating. + */ + if (sizeof (time_t) == sizeof (float)) { + absolute_min_time = (time_t) -FLT_MAX; + absolute_max_time = (time_t) FLT_MAX; + } else if (sizeof (time_t) == sizeof (double)) { + absolute_min_time = (time_t) -DBL_MAX; + absolute_max_time = (time_t) DBL_MAX; + } else { + (void) fprintf(stderr, +_("%s: use of -v on system with floating time_t other than float or double\n"), + progname); + exit(EXIT_FAILURE); + } + } else if (0 > (time_t) -1) { + /* + ** time_t is signed. Assume overflow wraps around. + */ + time_t t = 0; + time_t t1 = 1; + + while (t < t1) { + t = t1; + t1 = 2 * t1 + 1; + } + + absolute_max_time = t; + t = -t; + absolute_min_time = t - 1; + if (t < absolute_min_time) + absolute_min_time = t; + } else { + /* + ** time_t is unsigned. + */ + absolute_min_time = 0; + absolute_max_time = absolute_min_time - 1; + } +} + +static time_t +yeartot(y) +const long y; +{ + register long myy; + register long seconds; + register time_t t; + + myy = EPOCH_YEAR; + t = 0; + while (myy != y) { + if (myy < y) { + seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR; + ++myy; + if (t > absolute_max_time - seconds) { + t = absolute_max_time; + break; + } + t += seconds; + } else { + --myy; + seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR; + if (t < absolute_min_time + seconds) { + t = absolute_min_time; + break; + } + t -= seconds; + } + } + return t; +} + +static time_t +hunt(char *name, time_t lot, time_t hit) +{ + time_t t; + long diff; + struct tm lotm; + register struct tm * lotmp; + struct tm tm; + register struct tm * tmp; + char loab[MAX_STRING_LENGTH]; + + lotmp = my_localtime(&lot); + if (lotmp != NULL) { + lotm = *lotmp; + (void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1); + } + for ( ; ; ) { + diff = (long) (hit - lot); + if (diff < 2) + break; + t = lot; + t += diff / 2; + if (t <= lot) + ++t; + else if (t >= hit) + --t; + tmp = my_localtime(&t); + if (tmp != NULL) + tm = *tmp; + if ((lotmp == NULL || tmp == NULL) ? (lotmp == tmp) : + (delta(&tm, &lotm) == (t - lot) && + tm.tm_isdst == lotm.tm_isdst && + strcmp(abbr(&tm), loab) == 0)) { + lot = t; + lotm = tm; + lotmp = tmp; + } else hit = t; + } + show(name, lot, TRUE); + show(name, hit, TRUE); + return hit; +} + +/* +** Thanks to Paul Eggert for logic used in delta. +*/ + +static long +delta(newp, oldp) +struct tm * newp; +struct tm * oldp; +{ + register long result; + register int tmy; + + if (newp->tm_year < oldp->tm_year) + return -delta(oldp, newp); + result = 0; + for (tmy = oldp->tm_year; tmy < newp->tm_year; ++tmy) + result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE); + result += newp->tm_yday - oldp->tm_yday; + result *= HOURSPERDAY; + result += newp->tm_hour - oldp->tm_hour; + result *= MINSPERHOUR; + result += newp->tm_min - oldp->tm_min; + result *= SECSPERMIN; + result += newp->tm_sec - oldp->tm_sec; + return result; +} + +static void +show(char *zone, time_t t, int v) +{ + register struct tm * tmp; + + (void) printf("%-*s ", (int) longest, zone); + if (v) { + tmp = gmtime(&t); + if (tmp == NULL) { + (void) printf(tformat(), t); + } else { + dumptime(tmp); + (void) printf(" UTC"); + } + (void) printf(" = "); + } + tmp = my_localtime(&t); + dumptime(tmp); + if (tmp != NULL) { + if (*abbr(tmp) != '\0') + (void) printf(" %s", abbr(tmp)); + if (v) { + (void) printf(" isdst=%d", tmp->tm_isdst); +#ifdef TM_GMTOFF + (void) printf(" gmtoff=%ld", tmp->TM_GMTOFF); +#endif /* defined TM_GMTOFF */ + } + } + (void) printf("\n"); + if (tmp != NULL && *abbr(tmp) != '\0') + abbrok(abbr(tmp), zone); +} + +static char * +abbr(tmp) +struct tm * tmp; +{ + register char * result; + static char nada; + + if (tmp->tm_isdst != 0 && tmp->tm_isdst != 1) + return &nada; + result = tzname[tmp->tm_isdst]; + return (result == NULL) ? &nada : result; +} + +/* +** The code below can fail on certain theoretical systems; +** it works on all known real-world systems as of 2004-12-30. +*/ + +static const char * +tformat() +{ + if (0.5 == (time_t) 0.5) { /* floating */ + if (sizeof (time_t) > sizeof (double)) + return "%Lg"; + return "%g"; + } + if (0 > (time_t) -1) { /* signed */ + if (sizeof (time_t) > sizeof (long)) + return "%lld"; + if (sizeof (time_t) > sizeof (int)) + return "%ld"; + return "%d"; + } + if (sizeof (time_t) > sizeof (unsigned long)) + return "%llu"; + if (sizeof (time_t) > sizeof (unsigned int)) + return "%lu"; + return "%u"; +} + +static void +dumptime(timeptr) +register const struct tm * timeptr; +{ + static const char wday_name[][3] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" + }; + static const char mon_name[][3] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + register const char * wn; + register const char * mn; + register int lead; + register int trail; + + if (timeptr == NULL) { + (void) printf("NULL"); + return; + } + /* + ** The packaged versions of localtime and gmtime never put out-of-range + ** values in tm_wday or tm_mon, but since this code might be compiled + ** with other (perhaps experimental) versions, paranoia is in order. + */ + if (timeptr->tm_wday < 0 || timeptr->tm_wday >= + (int) (sizeof wday_name / sizeof wday_name[0])) + wn = "???"; + else wn = wday_name[timeptr->tm_wday]; + if (timeptr->tm_mon < 0 || timeptr->tm_mon >= + (int) (sizeof mon_name / sizeof mon_name[0])) + mn = "???"; + else mn = mon_name[timeptr->tm_mon]; + (void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d ", + wn, mn, + timeptr->tm_mday, timeptr->tm_hour, + timeptr->tm_min, timeptr->tm_sec); +#define DIVISOR 10 + trail = timeptr->tm_year % DIVISOR + TM_YEAR_BASE % DIVISOR; + lead = timeptr->tm_year / DIVISOR + TM_YEAR_BASE / DIVISOR + + trail / DIVISOR; + trail %= DIVISOR; + if (trail < 0 && lead > 0) { + trail += DIVISOR; + --lead; + } else if (lead < 0 && trail > 0) { + trail -= DIVISOR; + ++lead; + } + if (lead == 0) + (void) printf("%d", trail); + else (void) printf("%d%d", lead, ((trail < 0) ? -trail : trail)); +} + +#ifdef ICU +static time_t +huntICU(char *name, time_t lot, time_t hit, FILE * fp) +{ + time_t t; + long diff; + struct tm lotm; + register struct tm * lotmp; + struct tm tm; + register struct tm * tmp; + char loab[MAX_STRING_LENGTH]; + + lotmp = my_localtime(&lot); + if (lotmp != NULL) { + lotm = *lotmp; + (void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1); + } + for ( ; ; ) { + diff = (long) (hit - lot); + if (diff < 2) + break; + t = lot; + t += diff / 2; + if (t <= lot) + ++t; + else if (t >= hit) + --t; + tmp = my_localtime(&t); + if (tmp != NULL) + tm = *tmp; + /* We do not want to capture transitions just for + * abbreviated zone name changes */ + if ((lotmp == NULL || tmp == NULL) ? (lotmp == tmp) : + (delta(&tm, &lotm) == (t - lot) && + tm.tm_isdst == lotm.tm_isdst)) { + lot = t; + lotm = tm; + lotmp = tmp; + } else hit = t; + } + showICU(fp, name, lot, hit); + return hit; +} + +static void showICU(FILE * fp, char *zone, time_t t1, time_t t2) +{ + if (fp == NULL) { + fp = stdout; + } + dumptimeICU(fp, t1); + fprintf(fp, " > "); + dumptimeICU(fp, t2); + fprintf(fp, "\n"); +} + +static void dumptimeICU(FILE * fp, time_t t) +{ + static const char wday_name[][3] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" + }; + struct tm gmt; + struct tm loc; + register int lead; + register int trail; + long offset; + long hour, min, sec; + + loc = *my_localtime(&t); + + trail = loc.tm_year % DIVISOR + TM_YEAR_BASE % DIVISOR; + lead = loc.tm_year / DIVISOR + TM_YEAR_BASE / DIVISOR + trail / DIVISOR; + trail %= DIVISOR; + if (trail < 0 && lead > 0) { + trail += DIVISOR; + --lead; + } else if (lead < 0 && trail > 0) { + trail -= DIVISOR; + ++lead; + } + + fprintf(fp, "%04d-%02d-%02d", lead * DIVISOR + trail, loc.tm_mon + 1, loc.tm_mday); + fprintf(fp, " %.3s ", wday_name[loc.tm_wday]); + fprintf(fp, "%02d:%02d:%02d", loc.tm_hour, loc.tm_min, loc.tm_sec); + + gmt = *gmtime(&t); + offset = delta(&loc, &gmt); + if (offset < 0) { + offset = -offset; + fprintf(fp, "-"); + } else { + fprintf(fp, "+"); + } + + sec = offset % 60; + offset = (offset - sec) / 60; + min = offset % 60; + hour = offset / 60; + + fprintf(fp, "%02d", hour); + fprintf(fp, "%02d", min); + fprintf(fp, "%02d", sec); + fprintf(fp, "[DST=%d]", loc.tm_isdst); +} + +static int getall(struct listentry ** namelist) { + int count = 0; + struct listentry dummyentry; + struct listentry * last = &dummyentry; + + getzones(TZDIR, NULL, &last, &count); + if (count > 0) { + *namelist = dummyentry.next; + } + + return count; +} + +static void getzones(char * basedir, char * relpath, struct listentry ** last, int * count) { + char path[FILENAME_MAX + 1]; + struct dirent * dir; + DIR * dp; + + strcpy(path, basedir); + if (relpath != NULL) { + strcat(path, "/"); + strcat(path, relpath); + } + + if ((dp = opendir(path)) == NULL) { + /* file */ + if (strstr(relpath, ".tab") == NULL) { + char * pzonename; + listentry * pentry; + + if ((pzonename = malloc(strlen(relpath) + 1)) == NULL) { + exit(EXIT_FAILURE); + } + strcpy(pzonename, relpath); + + if ((pentry = malloc(sizeof(listentry))) == NULL) { + exit(EXIT_FAILURE); + } + + pentry->name = pzonename; + pentry->next = NULL; + (*last)->next = pentry; + *last = pentry; + (*count)++; + } + } else { + /* directory */ + while ((dir = readdir(dp)) != NULL) { + char subpath[FILENAME_MAX + 1]; + + if (strcmp(dir->d_name, ".") == 0 + || strcmp(dir->d_name, "..") == 0) { + continue; + } + if (relpath != NULL) { + strcpy(subpath, relpath); + strcat(subpath, "/"); + strcat(subpath, dir->d_name); + } else { + strcpy(subpath, dir->d_name); + } + getzones(basedir, subpath, last, count); + } + closedir(dp); + } +} +#endif diff --git a/icuSources/tools/tzcode/zic.c b/icuSources/tools/tzcode/zic.c index 47f92d22..a7535155 100644 --- a/icuSources/tools/tzcode/zic.c +++ b/icuSources/tools/tzcode/zic.c @@ -2321,15 +2321,6 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); break; /* go on to next year */ rp = &zp->z_rules[k]; rp->r_todo = FALSE; -#ifdef ICU - if (year >= finalRuleYear && rp == finalRule1) { - emit_icu_zone(icuFile, - zpfirst->z_name, zp->z_gmtoff, - rp, finalRuleIndex, year); - /* only emit this for the first year */ - finalRule1 = NULL; - } -#endif if (useuntil && ktime >= untiltime) break; stdoff = rp->r_stdoff; @@ -2356,6 +2347,42 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); FALSE); } } +#ifdef ICU + if (year >= finalRuleYear && rp == finalRule1) { + /* We want to shift final year 1 year after + * the actual final rule takes effect (year + 1), + * because the previous type is valid until the first + * transition defined by the final rule. Otherwise + * we may see unexpected offset shift at the + * begining of the year when the final rule takes + * effect. */ + + /* ICU currently can support signed int32 transition + * times. Thus, the transitions in year 2038 may be + * truncated. At this moment (tzdata2008g), only + * Rule Brazil is impacted by this limitation, because + * the final set of rules are starting in 2038. Although + * this code put the first couple of transitions populated + * by the final rules, they will be dropped off when + * collecting transition times. So, we need to keep + * the start year of the final rule in 2038, not 2039. + * Fortunately, the Brazil rules in 2038 and beyond use + * the same base offset/dst saving amount. Thus, even + * we skip the first couple of transitions, the final + * rule set for 2038 works properly. So for now, + * we do not increment the final rule start year only when + * it falls into year 2038. We need to revisit this code + * in future to fix the root cause of this problem (ICU + * resource type limitation - signed int32). + * Oct 7, 2008 - Yoshito */ + int finalStartYear = (year == 2038) ? year : year + 1; + emit_icu_zone(icuFile, + zpfirst->z_name, zp->z_gmtoff, + rp, finalRuleIndex, finalStartYear); + /* only emit this for the first year */ + finalRule1 = NULL; + } +#endif eats(zp->z_filename, zp->z_linenum, rp->r_filename, rp->r_linenum); doabbr(ab, zp->z_format, rp->r_abbrvar, -- 2.45.2