]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | ###################################################################### |
2 | # Copyright (C) 1999-2001, International Business Machines | |
3 | # Corporation and others. All Rights Reserved. | |
4 | ###################################################################### | |
5 | # See: ftp://elsie.nci.nih.gov/pub/tzdata<year> | |
6 | # where <year> is "1999b" or a similar string. | |
7 | ###################################################################### | |
8 | # This package handles the parsing of time zone files. | |
9 | # Author: Alan Liu | |
10 | ###################################################################### | |
11 | # Usage: | |
12 | # Call ParseFile for each file to be imported. Then call ParseZoneTab | |
13 | # to add country data. Then call Postprocess to remove unused rules. | |
14 | ||
15 | package TZ; | |
16 | use strict; | |
17 | use Carp; | |
18 | use vars qw(@ISA @EXPORT $VERSION $YEAR $STANDARD); | |
19 | require 'dumpvar.pl'; | |
20 | ||
21 | @ISA = qw(Exporter); | |
22 | @EXPORT = qw(ParseFile | |
23 | Postprocess | |
24 | ParseZoneTab | |
25 | ); | |
26 | $VERSION = '0.2'; | |
27 | ||
28 | $STANDARD = '-'; # Name of the Standard Time rule | |
29 | ||
30 | ###################################################################### | |
31 | # Read the tzdata zone.tab file and add a {country} field to zones | |
32 | # in the given hash. | |
33 | # Param: File name (<dir>/zone.tab) | |
34 | # Param: Ref to hash of zones | |
35 | # Param: Ref to hash of links | |
36 | sub ParseZoneTab { | |
37 | my ($FILE, $ZONES, $LINKS) = @_; | |
38 | ||
39 | my %linkEntries; | |
40 | ||
41 | local(*FILE); | |
42 | open(FILE,"<$FILE") or confess "Can't open $FILE: $!"; | |
43 | while (<FILE>) { | |
44 | # Handle comments | |
45 | s/\#.*//; | |
46 | next if (!/\S/); | |
47 | ||
48 | if (/^\s*([A-Z]{2})\s+[-+0-9]+\s+(\S+)/) { | |
49 | my ($country, $zone) = ($1, $2); | |
50 | if (exists $ZONES->{$zone}) { | |
51 | $ZONES->{$zone}->{country} = $country; | |
52 | } elsif (exists $LINKS->{$zone}) { | |
53 | # We have a country mapping for a zone that isn't in | |
54 | # our hash. This means it is a link entry. Save this | |
55 | # then handle it below. | |
56 | $linkEntries{$zone} = $country; | |
57 | } else { | |
58 | print STDERR "Nonexistent zone $zone in $FILE\n"; | |
59 | } | |
60 | } else { | |
61 | confess "Can't parse line \"$_\" of $FILE"; | |
62 | } | |
63 | } | |
64 | close(FILE); | |
65 | ||
66 | # Now that we have mapped all of the zones in %$ZONES (except | |
67 | # those without country affiliations), process the link entries. | |
68 | # For those zones in the table that differ by country from their | |
69 | # source zone, instantiate a new zone in the new country. An | |
70 | # example is Europe/Vatican, which is linked to Europe/Rome. If | |
71 | # we don't instantiate it, we have nothing for Vatican City. | |
72 | # Another example is America/Shiprock, which links to | |
73 | # America/Denver. These are identical and both in the US, so we | |
74 | # don't instantiate America/Shiprock. | |
75 | foreach my $zone (keys %linkEntries) { | |
76 | my $country = $linkEntries{$zone}; | |
77 | my $linkZone = $LINKS->{$zone}; | |
78 | my $linkCountry = $ZONES->{$linkZone}->{country}; | |
79 | if ($linkCountry ne $country) { | |
80 | # print "Cloning $zone ($country) from $linkZone ($linkCountry)\n"; | |
81 | _CloneZone($ZONES, $LINKS->{$zone}, $zone); | |
82 | $ZONES->{$zone}->{country} = $country; | |
83 | } | |
84 | } | |
85 | } | |
86 | ||
87 | ###################################################################### | |
88 | # Param: File name | |
89 | # Param: Ref to hash of zones | |
90 | # Param: Ref to hash of rules | |
91 | # Parma: Ref to hash of links | |
92 | # Param: Current year | |
93 | sub ParseFile { | |
94 | my ($FILE, $ZONES, $RULES, $LINKS, $YEAR) = @_; | |
95 | ||
96 | local(*FILE); | |
97 | open(FILE,"<$FILE") or confess "Can't open $FILE: $!"; | |
98 | my $zone; # Current zone | |
99 | my $badLineCount = 0; | |
100 | while (<FILE>) { | |
101 | # Handle comments and blanks | |
102 | s/\#.*//; | |
103 | next if (!/\S/); | |
104 | ||
105 | #|# Zone NAME GMTOFF RULES FORMAT [UNTIL] | |
106 | #|Zone America/Montreal -4:54:16 - LMT 1884 | |
107 | #| -5:00 Mont E%sT | |
108 | #|Zone America/Thunder_Bay -5:57:00 - LMT 1895 | |
109 | #| -5:00 Canada E%sT 1970 | |
110 | #| -5:00 Mont E%sT 1973 | |
111 | #| -5:00 - EST 1974 | |
112 | #| -5:00 Canada E%sT | |
113 | my ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil); | |
114 | if (/^zone/i) { | |
115 | # Zone block start | |
116 | if (/^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/i | |
117 | || /^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)()/i) { | |
118 | $zone = $1; | |
119 | ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = | |
120 | ($2, $3, $4, $5); | |
121 | } else { | |
122 | print STDERR "Can't parse in $FILE: $_"; | |
123 | ++$badLineCount; | |
124 | } | |
125 | } elsif (/^\s/ && $zone) { | |
126 | # Zone continuation | |
127 | if (/^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/ | |
128 | || /^\s+(\S+)\s+(\S+)\s+(\S+)()/) { | |
129 | ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = | |
130 | ($1, $2, $3, $4); | |
131 | } else { | |
132 | print STDERR "Can't parse in $FILE: $_"; | |
133 | ++$badLineCount; | |
134 | } | |
135 | } elsif (/^rule/i) { | |
136 | # Here is where we parse a single line of the rule table. | |
137 | # Our goal is to accept only rules applying to the current | |
138 | # year. This is normally a matter of accepting rules | |
139 | # that match the current year. However, in some cases this | |
140 | # is more complicated. For example: | |
141 | #|# Tonga | |
142 | #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S | |
143 | #|Rule Tonga 1999 max - Oct Sat>=1 2:00s 1:00 S | |
144 | #|Rule Tonga 2000 max - Apr Sun>=16 2:00s 0 - | |
145 | # To handle this properly, we save every rule we encounter | |
146 | # (thus overwriting older ones with newer ones, since rules | |
147 | # are listed in order), and also use slot [2] to mark when | |
148 | # we see a current year rule. When that happens, we stop | |
149 | # saving rules. Thus we match the latest rule we see, or | |
150 | # a matching rule if we find one. The format of slot [2] | |
151 | # is just a 2 bit flag ([2]&1 means slot [0] matched, | |
152 | # [2]&2 means slot [1] matched). | |
153 | ||
154 | # Note that later, when the rules are post processed | |
155 | # (see Postprocess), the slot [2] will be overwritten | |
156 | # with the compressed rule string used to implement | |
157 | # equality testing. | |
158 | ||
159 | $zone = undef; | |
160 | # Rule | |
161 | #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S | |
162 | #|Rule US 1918 1919 - Mar lastSun 2:00 1:00 W # War | |
163 | #|Rule US 1918 1919 - Oct lastSun 2:00 0 S | |
164 | #|Rule US 1942 only - Feb 9 2:00 1:00 W # War | |
165 | #|Rule US 1945 only - Sep 30 2:00 0 S | |
166 | #|Rule US 1967 max - Oct lastSun 2:00 0 S | |
167 | #|Rule US 1967 1973 - Apr lastSun 2:00 1:00 D | |
168 | #|Rule US 1974 only - Jan 6 2:00 1:00 D | |
169 | #|Rule US 1975 only - Feb 23 2:00 1:00 D | |
170 | #|Rule US 1976 1986 - Apr lastSun 2:00 1:00 D | |
171 | #|Rule US 1987 max - Apr Sun>=1 2:00 1:00 D | |
172 | if (/^rule\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+ | |
173 | (\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/xi) { | |
174 | my ($name, $from, $to, $type, $in, $on, $at, $save, $letter) = | |
175 | ($1, $2, $3, $4, $5, $6, $7, $8, $9); | |
176 | my $i = $save ? 0:1; | |
177 | ||
178 | if (!exists $RULES->{$name}) { | |
179 | $RULES->{$name} = []; | |
180 | } | |
181 | my $ruleArray = $RULES->{$name}; | |
182 | ||
183 | # Check our bit mask to see if we've already matched | |
184 | # a current rule. If so, do nothing. If not, then | |
185 | # save this rule line as the best one so far. | |
186 | if (@{$ruleArray} < 3 || | |
187 | !($ruleArray->[2] & 1 << $i)) { | |
188 | my $h = $ruleArray->[$i]; | |
189 | $ruleArray->[$i]->{from} = $from; | |
190 | $ruleArray->[$i]->{to} = $to; | |
191 | $ruleArray->[$i]->{type} = $type; | |
192 | $ruleArray->[$i]->{in} = $in; | |
193 | $ruleArray->[$i]->{on} = $on; | |
194 | $ruleArray->[$i]->{at} = $at; | |
195 | $ruleArray->[$i]->{save} = $save; | |
196 | $ruleArray->[$i]->{letter} = $letter; | |
197 | ||
198 | # Does this rule match the current year? If so, | |
199 | # set the bit mask so we don't overwrite this rule. | |
200 | # This makes us ingore rules for subsequent years | |
201 | # that are already listed in the database -- as long | |
202 | # as we have an overriding rule for the current year. | |
203 | if (($from == $YEAR && $to =~ /only/i) || | |
204 | ($from <= $YEAR && | |
205 | (($to =~ /^\d/ && $YEAR <= $to) || $to =~ /max/i))) { | |
206 | $ruleArray->[2] |= 1 << $i; | |
207 | $ruleArray->[3] |= 1 << $i; | |
208 | } | |
209 | } | |
210 | } else { | |
211 | print STDERR "Can't parse in $FILE: $_"; | |
212 | ++$badLineCount; | |
213 | } | |
214 | } elsif (/^link/i) { | |
215 | #|# Old names, for S5 users | |
216 | #| | |
217 | #|# Link LINK-FROM LINK-TO | |
218 | #|Link America/New_York EST5EDT | |
219 | #|Link America/Chicago CST6CDT | |
220 | #|Link America/Denver MST7MDT | |
221 | #|Link America/Los_Angeles PST8PDT | |
222 | #|Link America/Indianapolis EST | |
223 | #|Link America/Phoenix MST | |
224 | #|Link Pacific/Honolulu HST | |
225 | # | |
226 | # There are also links for country-specific zones. | |
227 | # These are zones the differ only in that they belong | |
228 | # to a different country. E.g., | |
229 | #|Link Europe/Rome Europe/Vatican | |
230 | #|Link Europe/Rome Europe/San_Marino | |
231 | if (/^link\s+(\S+)\s+(\S+)/i) { | |
232 | my ($from, $to) = ($1, $2); | |
233 | # Record all links in $%LINKS | |
234 | $LINKS->{$to} = $from; | |
235 | } else { | |
236 | print STDERR "Can't parse in $FILE: $_"; | |
237 | ++$badLineCount; | |
238 | } | |
239 | } else { | |
240 | # Unexpected line | |
241 | print STDERR "Ignoring in $FILE: $_"; | |
242 | ++$badLineCount; | |
243 | } | |
244 | if ($zoneRule && | |
245 | ($zoneUntil !~ /\S/ || ($zoneUntil =~ /^\d/ && | |
246 | $zoneUntil >= $YEAR))) { | |
247 | $ZONES->{$zone}->{gmtoff} = $zoneGmtoff; | |
248 | $ZONES->{$zone}->{rule} = $zoneRule; | |
249 | $ZONES->{$zone}->{format} = $zoneFormat; | |
250 | $ZONES->{$zone}->{until} = $zoneUntil; | |
251 | } | |
252 | } | |
253 | close(FILE); | |
254 | } | |
255 | ||
256 | ###################################################################### | |
257 | # Param: Ref to hash of zones | |
258 | # Param: Ref to hash of rules | |
259 | sub Postprocess { | |
260 | my ($ZONES, $RULES) = @_; | |
261 | my %ruleInUse; | |
262 | ||
263 | # We no longer store links in the zone hash, so we don't need to do this. | |
264 | # # Eliminate zone links that have no corresponding zone | |
265 | # foreach (keys %$ZONES) { | |
266 | # if (exists $ZONES->{$_}->{link} && !exists $ZONES->{$_}->{rule}) { | |
267 | # if (0) { | |
268 | # print STDERR | |
269 | # "Deleting link from historical/nonexistent zone: ", | |
270 | # $_, " -> ", $ZONES->{$_}->{link}, "\n"; | |
271 | # } | |
272 | # delete $ZONES->{$_}; | |
273 | # } | |
274 | # } | |
275 | ||
276 | # Check that each zone has a corresponding rule. At the same | |
277 | # time, build up a hash that marks each rule that is in use. | |
278 | foreach (sort keys %$ZONES) { | |
279 | my $ruleName = $ZONES->{$_}->{rule}; | |
280 | next if ($ruleName eq $STANDARD); | |
281 | if (exists $RULES->{$ruleName}) { | |
282 | $ruleInUse{$ruleName} = 1; | |
283 | } else { | |
284 | # This means the zone is using the standard rule now | |
285 | $ZONES->{$_}->{rule} = $STANDARD; | |
286 | } | |
287 | } | |
288 | ||
289 | # Check that both parts are there for rules | |
290 | # Check for unused rules | |
291 | # Make coded string for comparisons | |
292 | foreach (keys %$RULES) { | |
293 | if (!exists $ruleInUse{$_}) { | |
294 | if (0) { | |
295 | print STDERR "Deleting historical/unused rule: $_\n"; | |
296 | } | |
297 | delete $RULES->{$_}; | |
298 | } elsif (!$RULES->{$_}->[0] || !$RULES->{$_}->[1]) { | |
299 | print STDERR "Rule doesn't have both parts: $_\n"; | |
300 | } else { | |
301 | # Generate coded string | |
302 | # This has all the data about a rule; it can be used | |
303 | # to see if two rules behave identically | |
304 | $RULES->{$_}->[2] = | |
305 | lc($RULES->{$_}->[0]->{in} . "," . | |
306 | $RULES->{$_}->[0]->{on} . "," . | |
307 | $RULES->{$_}->[0]->{at} . "," . | |
308 | $RULES->{$_}->[0]->{save} . ";" . | |
309 | $RULES->{$_}->[1]->{in} . "," . | |
310 | $RULES->{$_}->[1]->{on} . "," . | |
311 | $RULES->{$_}->[1]->{at}); # [1]->{save} is always zero | |
312 | } | |
313 | } | |
314 | } | |
315 | ||
316 | ###################################################################### | |
317 | # Create a clone of the zone $oldID named $newID in the hash $ZONES. | |
318 | # Param: ref to hash of zones | |
319 | # Param: ID of zone to clone | |
320 | # Param: ID of new zone | |
321 | sub _CloneZone { | |
322 | my $ZONES = shift; | |
323 | my $oldID = shift; | |
324 | my $newID = shift; | |
325 | for my $field (keys %{$ZONES->{$oldID}}) { | |
326 | $ZONES->{$newID}->{$field} = $ZONES->{$oldID}->{$field}; | |
327 | } | |
328 | } |