]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/gentz/tzparse.pm
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / tools / gentz / tzparse.pm
CommitLineData
b75a7d8f
A
1######################################################################
2# Copyright (C) 1999-2001, International Business Machines
3# Corporation and others. All Rights Reserved.
4######################################################################
5# See: ftp://elsie.nci.nih.gov/pub/tzdata<year>
6# where <year> is "1999b" or a similar string.
7######################################################################
8# This package handles the parsing of time zone files.
9# Author: Alan Liu
10######################################################################
11# Usage:
12# Call ParseFile for each file to be imported. Then call ParseZoneTab
13# to add country data. Then call Postprocess to remove unused rules.
14
15package TZ;
16use strict;
17use Carp;
18use vars qw(@ISA @EXPORT $VERSION $YEAR $STANDARD);
19require 'dumpvar.pl';
20
21@ISA = qw(Exporter);
22@EXPORT = qw(ParseFile
23 Postprocess
24 ParseZoneTab
25 );
26$VERSION = '0.2';
27
28$STANDARD = '-'; # Name of the Standard Time rule
29
30######################################################################
31# Read the tzdata zone.tab file and add a {country} field to zones
32# in the given hash.
33# Param: File name (<dir>/zone.tab)
34# Param: Ref to hash of zones
35# Param: Ref to hash of links
36sub ParseZoneTab {
37 my ($FILE, $ZONES, $LINKS) = @_;
38
39 my %linkEntries;
40
41 local(*FILE);
42 open(FILE,"<$FILE") or confess "Can't open $FILE: $!";
43 while (<FILE>) {
44 # Handle comments
45 s/\#.*//;
46 next if (!/\S/);
47
48 if (/^\s*([A-Z]{2})\s+[-+0-9]+\s+(\S+)/) {
49 my ($country, $zone) = ($1, $2);
50 if (exists $ZONES->{$zone}) {
51 $ZONES->{$zone}->{country} = $country;
52 } elsif (exists $LINKS->{$zone}) {
53 # We have a country mapping for a zone that isn't in
54 # our hash. This means it is a link entry. Save this
55 # then handle it below.
56 $linkEntries{$zone} = $country;
57 } else {
58 print STDERR "Nonexistent zone $zone in $FILE\n";
59 }
60 } else {
61 confess "Can't parse line \"$_\" of $FILE";
62 }
63 }
64 close(FILE);
65
66 # Now that we have mapped all of the zones in %$ZONES (except
67 # those without country affiliations), process the link entries.
68 # For those zones in the table that differ by country from their
69 # source zone, instantiate a new zone in the new country. An
70 # example is Europe/Vatican, which is linked to Europe/Rome. If
71 # we don't instantiate it, we have nothing for Vatican City.
72 # Another example is America/Shiprock, which links to
73 # America/Denver. These are identical and both in the US, so we
74 # don't instantiate America/Shiprock.
75 foreach my $zone (keys %linkEntries) {
76 my $country = $linkEntries{$zone};
77 my $linkZone = $LINKS->{$zone};
78 my $linkCountry = $ZONES->{$linkZone}->{country};
79 if ($linkCountry ne $country) {
80 # print "Cloning $zone ($country) from $linkZone ($linkCountry)\n";
81 _CloneZone($ZONES, $LINKS->{$zone}, $zone);
82 $ZONES->{$zone}->{country} = $country;
83 }
84 }
85}
86
87######################################################################
88# Param: File name
89# Param: Ref to hash of zones
90# Param: Ref to hash of rules
91# Parma: Ref to hash of links
92# Param: Current year
93sub ParseFile {
94 my ($FILE, $ZONES, $RULES, $LINKS, $YEAR) = @_;
95
96 local(*FILE);
97 open(FILE,"<$FILE") or confess "Can't open $FILE: $!";
98 my $zone; # Current zone
99 my $badLineCount = 0;
100 while (<FILE>) {
101 # Handle comments and blanks
102 s/\#.*//;
103 next if (!/\S/);
104
105 #|# Zone NAME GMTOFF RULES FORMAT [UNTIL]
106 #|Zone America/Montreal -4:54:16 - LMT 1884
107 #| -5:00 Mont E%sT
108 #|Zone America/Thunder_Bay -5:57:00 - LMT 1895
109 #| -5:00 Canada E%sT 1970
110 #| -5:00 Mont E%sT 1973
111 #| -5:00 - EST 1974
112 #| -5:00 Canada E%sT
113 my ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil);
114 if (/^zone/i) {
115 # Zone block start
116 if (/^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/i
117 || /^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)()/i) {
118 $zone = $1;
119 ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) =
120 ($2, $3, $4, $5);
121 } else {
122 print STDERR "Can't parse in $FILE: $_";
123 ++$badLineCount;
124 }
125 } elsif (/^\s/ && $zone) {
126 # Zone continuation
127 if (/^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/
128 || /^\s+(\S+)\s+(\S+)\s+(\S+)()/) {
129 ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) =
130 ($1, $2, $3, $4);
131 } else {
132 print STDERR "Can't parse in $FILE: $_";
133 ++$badLineCount;
134 }
135 } elsif (/^rule/i) {
136 # Here is where we parse a single line of the rule table.
137 # Our goal is to accept only rules applying to the current
138 # year. This is normally a matter of accepting rules
139 # that match the current year. However, in some cases this
140 # is more complicated. For example:
141 #|# Tonga
142 #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
143 #|Rule Tonga 1999 max - Oct Sat>=1 2:00s 1:00 S
144 #|Rule Tonga 2000 max - Apr Sun>=16 2:00s 0 -
145 # To handle this properly, we save every rule we encounter
146 # (thus overwriting older ones with newer ones, since rules
147 # are listed in order), and also use slot [2] to mark when
148 # we see a current year rule. When that happens, we stop
149 # saving rules. Thus we match the latest rule we see, or
150 # a matching rule if we find one. The format of slot [2]
151 # is just a 2 bit flag ([2]&1 means slot [0] matched,
152 # [2]&2 means slot [1] matched).
153
154 # Note that later, when the rules are post processed
155 # (see Postprocess), the slot [2] will be overwritten
156 # with the compressed rule string used to implement
157 # equality testing.
158
159 $zone = undef;
160 # Rule
161 #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
162 #|Rule US 1918 1919 - Mar lastSun 2:00 1:00 W # War
163 #|Rule US 1918 1919 - Oct lastSun 2:00 0 S
164 #|Rule US 1942 only - Feb 9 2:00 1:00 W # War
165 #|Rule US 1945 only - Sep 30 2:00 0 S
166 #|Rule US 1967 max - Oct lastSun 2:00 0 S
167 #|Rule US 1967 1973 - Apr lastSun 2:00 1:00 D
168 #|Rule US 1974 only - Jan 6 2:00 1:00 D
169 #|Rule US 1975 only - Feb 23 2:00 1:00 D
170 #|Rule US 1976 1986 - Apr lastSun 2:00 1:00 D
171 #|Rule US 1987 max - Apr Sun>=1 2:00 1:00 D
172 if (/^rule\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+
173 (\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/xi) {
174 my ($name, $from, $to, $type, $in, $on, $at, $save, $letter) =
175 ($1, $2, $3, $4, $5, $6, $7, $8, $9);
176 my $i = $save ? 0:1;
177
178 if (!exists $RULES->{$name}) {
179 $RULES->{$name} = [];
180 }
181 my $ruleArray = $RULES->{$name};
182
183 # Check our bit mask to see if we've already matched
184 # a current rule. If so, do nothing. If not, then
185 # save this rule line as the best one so far.
186 if (@{$ruleArray} < 3 ||
187 !($ruleArray->[2] & 1 << $i)) {
188 my $h = $ruleArray->[$i];
189 $ruleArray->[$i]->{from} = $from;
190 $ruleArray->[$i]->{to} = $to;
191 $ruleArray->[$i]->{type} = $type;
192 $ruleArray->[$i]->{in} = $in;
193 $ruleArray->[$i]->{on} = $on;
194 $ruleArray->[$i]->{at} = $at;
195 $ruleArray->[$i]->{save} = $save;
196 $ruleArray->[$i]->{letter} = $letter;
197
198 # Does this rule match the current year? If so,
199 # set the bit mask so we don't overwrite this rule.
200 # This makes us ingore rules for subsequent years
201 # that are already listed in the database -- as long
202 # as we have an overriding rule for the current year.
203 if (($from == $YEAR && $to =~ /only/i) ||
204 ($from <= $YEAR &&
205 (($to =~ /^\d/ && $YEAR <= $to) || $to =~ /max/i))) {
206 $ruleArray->[2] |= 1 << $i;
207 $ruleArray->[3] |= 1 << $i;
208 }
209 }
210 } else {
211 print STDERR "Can't parse in $FILE: $_";
212 ++$badLineCount;
213 }
214 } elsif (/^link/i) {
215 #|# Old names, for S5 users
216 #|
217 #|# Link LINK-FROM LINK-TO
218 #|Link America/New_York EST5EDT
219 #|Link America/Chicago CST6CDT
220 #|Link America/Denver MST7MDT
221 #|Link America/Los_Angeles PST8PDT
222 #|Link America/Indianapolis EST
223 #|Link America/Phoenix MST
224 #|Link Pacific/Honolulu HST
225 #
226 # There are also links for country-specific zones.
227 # These are zones the differ only in that they belong
228 # to a different country. E.g.,
229 #|Link Europe/Rome Europe/Vatican
230 #|Link Europe/Rome Europe/San_Marino
231 if (/^link\s+(\S+)\s+(\S+)/i) {
232 my ($from, $to) = ($1, $2);
233 # Record all links in $%LINKS
234 $LINKS->{$to} = $from;
235 } else {
236 print STDERR "Can't parse in $FILE: $_";
237 ++$badLineCount;
238 }
239 } else {
240 # Unexpected line
241 print STDERR "Ignoring in $FILE: $_";
242 ++$badLineCount;
243 }
244 if ($zoneRule &&
245 ($zoneUntil !~ /\S/ || ($zoneUntil =~ /^\d/ &&
246 $zoneUntil >= $YEAR))) {
247 $ZONES->{$zone}->{gmtoff} = $zoneGmtoff;
248 $ZONES->{$zone}->{rule} = $zoneRule;
249 $ZONES->{$zone}->{format} = $zoneFormat;
250 $ZONES->{$zone}->{until} = $zoneUntil;
251 }
252 }
253 close(FILE);
254}
255
256######################################################################
257# Param: Ref to hash of zones
258# Param: Ref to hash of rules
259sub Postprocess {
260 my ($ZONES, $RULES) = @_;
261 my %ruleInUse;
262
263# We no longer store links in the zone hash, so we don't need to do this.
264# # Eliminate zone links that have no corresponding zone
265# foreach (keys %$ZONES) {
266# if (exists $ZONES->{$_}->{link} && !exists $ZONES->{$_}->{rule}) {
267# if (0) {
268# print STDERR
269# "Deleting link from historical/nonexistent zone: ",
270# $_, " -> ", $ZONES->{$_}->{link}, "\n";
271# }
272# delete $ZONES->{$_};
273# }
274# }
275
276 # Check that each zone has a corresponding rule. At the same
277 # time, build up a hash that marks each rule that is in use.
278 foreach (sort keys %$ZONES) {
279 my $ruleName = $ZONES->{$_}->{rule};
280 next if ($ruleName eq $STANDARD);
281 if (exists $RULES->{$ruleName}) {
282 $ruleInUse{$ruleName} = 1;
283 } else {
284 # This means the zone is using the standard rule now
285 $ZONES->{$_}->{rule} = $STANDARD;
286 }
287 }
288
289 # Check that both parts are there for rules
290 # Check for unused rules
291 # Make coded string for comparisons
292 foreach (keys %$RULES) {
293 if (!exists $ruleInUse{$_}) {
294 if (0) {
295 print STDERR "Deleting historical/unused rule: $_\n";
296 }
297 delete $RULES->{$_};
298 } elsif (!$RULES->{$_}->[0] || !$RULES->{$_}->[1]) {
299 print STDERR "Rule doesn't have both parts: $_\n";
300 } else {
301 # Generate coded string
302 # This has all the data about a rule; it can be used
303 # to see if two rules behave identically
304 $RULES->{$_}->[2] =
305 lc($RULES->{$_}->[0]->{in} . "," .
306 $RULES->{$_}->[0]->{on} . "," .
307 $RULES->{$_}->[0]->{at} . "," .
308 $RULES->{$_}->[0]->{save} . ";" .
309 $RULES->{$_}->[1]->{in} . "," .
310 $RULES->{$_}->[1]->{on} . "," .
311 $RULES->{$_}->[1]->{at}); # [1]->{save} is always zero
312 }
313 }
314}
315
316######################################################################
317# Create a clone of the zone $oldID named $newID in the hash $ZONES.
318# Param: ref to hash of zones
319# Param: ID of zone to clone
320# Param: ID of new zone
321sub _CloneZone {
322 my $ZONES = shift;
323 my $oldID = shift;
324 my $newID = shift;
325 for my $field (keys %{$ZONES->{$oldID}}) {
326 $ZONES->{$newID}->{$field} = $ZONES->{$oldID}->{$field};
327 }
328}