]>
Commit | Line | Data |
---|---|---|
729e4ab9 | 1 | #!/usr/bin/perl |
f3c0d7a5 A |
2 | # Copyright (C) 2016 and later: Unicode, Inc. and others. |
3 | # License & terms of use: http://www.unicode.org/copyright.html | |
b331163b | 4 | # Copyright (c) 2001-2015 International Business Machines |
374ca955 A |
5 | # Corporation and others. All Rights Reserved. |
6 | ||
7 | #################################################################################### | |
8 | # filterRFC3454.pl: | |
9 | # This tool filters the RFC-3454 txt file for StringPrep tables and creates a table | |
10 | # to be used in NamePrepProfile | |
11 | # | |
12 | # Author: Ram Viswanadha | |
13 | # | |
14 | #################################################################################### | |
15 | ||
16 | use File::Find; | |
17 | use File::Basename; | |
18 | use IO::File; | |
19 | use Cwd; | |
20 | use File::Copy; | |
21 | use Getopt::Long; | |
22 | use File::Path; | |
23 | use File::Copy; | |
729e4ab9 | 24 | use Time::localtime; |
374ca955 | 25 | |
729e4ab9 A |
26 | $icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n"; |
27 | $copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002). All Rights Reserved. \n###################\n\n"; | |
28 | $warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n"; | |
374ca955 A |
29 | #run the program) |
30 | main(); | |
31 | ||
32 | #--------------------------------------------------------------------- | |
33 | # The main program | |
34 | ||
35 | sub main(){ | |
36 | GetOptions( | |
37 | "--sourcedir=s" => \$sourceDir, | |
38 | "--destdir=s" => \$destDir, | |
39 | "--src-filename=s" => \$srcFileName, | |
40 | "--dest-filename=s" => \$destFileName, | |
41 | "--A1" => \$a1, | |
42 | "--B1" => \$b1, | |
43 | "--B2" => \$b2, | |
729e4ab9 | 44 | "--B3" => \$b3, |
374ca955 A |
45 | "--C11" => \$c11, |
46 | "--C12" => \$c12, | |
47 | "--C21" => \$c21, | |
48 | "--C22" => \$c22, | |
49 | "--C3" => \$c3, | |
50 | "--C4" => \$c4, | |
51 | "--C5" => \$c5, | |
52 | "--C6" => \$c6, | |
53 | "--C7" => \$c7, | |
54 | "--C8" => \$c8, | |
55 | "--C9" => \$c9, | |
729e4ab9 A |
56 | "--iscsi" => \$writeISCSIProhibitedExtra, |
57 | "--xmpp-node" => \$writeXMPPNodeProhibitedExtra, | |
58 | "--sasl" => \$writeSASLMap, | |
59 | "--ldap" => \$writeLDAPMap, | |
60 | "--normalize" => \$norm, | |
61 | "--check-bidi" => \$checkBidi, | |
374ca955 A |
62 | ); |
63 | usage() unless defined $sourceDir; | |
64 | usage() unless defined $destDir; | |
65 | usage() unless defined $srcFileName; | |
66 | usage() unless defined $destFileName; | |
67 | ||
68 | $infile = $sourceDir."/".$srcFileName; | |
69 | $inFH = IO::File->new($infile,"r") | |
70 | or die "could not open the file $infile for reading: $! \n"; | |
71 | $outfile = $destDir."/".$destFileName; | |
72 | ||
73 | unlink($outfile); | |
74 | $outFH = IO::File->new($outfile,"a") | |
75 | or die "could not open the file $outfile for writing: $! \n"; | |
729e4ab9 A |
76 | |
77 | printf $outFH $icu_copyright, localtime->year()+1900; | |
374ca955 A |
78 | print $outFH $copyright; |
79 | print $outFH $warning; | |
729e4ab9 A |
80 | |
81 | if(defined $norm) { | |
82 | print $outFH "\@normalize;;\n"; | |
83 | } | |
84 | if(defined $checkBidi) { | |
85 | print $outFH "\@check-bidi;;\n"; | |
86 | } | |
87 | print $outFH "\n"; | |
374ca955 A |
88 | close($outFH); |
89 | ||
90 | if(defined $b2 && defined $b3){ | |
91 | die "ERROR: --B2 and --B3 are both specified\!\n"; | |
92 | } | |
93 | ||
94 | while(defined ($line=<$inFH>)){ | |
95 | next unless $line=~ /Start\sTable/; | |
96 | if($line =~ /A.1/){ | |
97 | createUnassignedTable($inFH,$outfile); | |
98 | } | |
99 | if($line =~ /B.1/ && defined $b1){ | |
100 | createMapToNothing($inFH,$outfile); | |
101 | } | |
102 | if($line =~ /B.2/ && defined $b2){ | |
103 | createCaseMapNorm($inFH,$outfile); | |
104 | } | |
105 | if($line =~ /B.3/ && defined $b3){ | |
106 | createCaseMapNoNorm($inFH,$outfile); | |
107 | } | |
108 | if($line =~ /C.1.1/ && defined $c11 ){ | |
109 | createProhibitedTable($inFH,$outfile,$line); | |
110 | } | |
111 | if($line =~ /C.1.2/ && defined $c12 ){ | |
112 | createProhibitedTable($inFH,$outfile,$line); | |
113 | } | |
114 | if($line =~ /C.2.1/ && defined $c21 ){ | |
115 | createProhibitedTable($inFH,$outfile,$line); | |
116 | } | |
117 | if($line =~ /C.2.2/ && defined $c22 ){ | |
118 | createProhibitedTable($inFH,$outfile,$line); | |
119 | } | |
120 | if($line =~ /C.3/ && defined $c3 ){ | |
121 | createProhibitedTable($inFH,$outfile,$line); | |
122 | } | |
123 | if($line =~ /C.4/ && defined $c4 ){ | |
124 | createProhibitedTable($inFH,$outfile,$line); | |
125 | } | |
126 | if($line =~ /C.5/ && defined $c5 ){ | |
127 | createProhibitedTable($inFH,$outfile,$line); | |
128 | } | |
129 | if($line =~ /C.6/ && defined $c6 ){ | |
130 | createProhibitedTable($inFH,$outfile,$line); | |
131 | } | |
132 | if($line =~ /C.7/ && defined $c7 ){ | |
133 | createProhibitedTable($inFH,$outfile,$line); | |
134 | } | |
135 | if($line =~ /C.8/ && defined $c8 ){ | |
136 | createProhibitedTable($inFH,$outfile,$line); | |
137 | } | |
138 | if($line =~ /C.9/ && defined $c9 ){ | |
139 | createProhibitedTable($inFH,$outfile,$line); | |
140 | } | |
141 | } | |
729e4ab9 | 142 | if( defined $writeISCSIProhibitedExtra){ |
374ca955 A |
143 | create_iSCSIExtraProhibitedTable($inFH, $outfile); |
144 | } | |
b331163b | 145 | if( defined $writeXMPPNodeProhibitedExtra){ |
729e4ab9 A |
146 | create_XMPPNodeExtraProhibitedTable($inFH, $outfile); |
147 | } | |
148 | if( defined $writeSASLMap){ | |
149 | create_SASLMapTable($inFH, $outfile); | |
150 | } | |
151 | if( defined $writeLDAPMap){ | |
152 | create_LDAPMapTable($inFH, $outfile); | |
153 | } | |
374ca955 A |
154 | close($inFH); |
155 | } | |
156 | ||
157 | #----------------------------------------------------------------------- | |
158 | sub readPrint{ | |
159 | local ($inFH, $outFH,$comment, $table) = @_; | |
160 | $count = 0; | |
161 | print $outFH $comment."\n"; | |
162 | while(defined ($line = <$inFH>)){ | |
163 | next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading | |
164 | next if $line =~ /RFC\s3454/; # ignore heading | |
165 | next if $line =~ /\f/; # ignore form feed | |
166 | next if $line eq "\n"; # ignore blank lines | |
167 | # break if "End Table" is found | |
168 | if( $line =~ /End\sTable/){ | |
169 | print $outFH "\n# Total code points $count\n\n"; | |
170 | return; | |
171 | } | |
172 | if($print==1){ | |
173 | print $line; | |
174 | } | |
175 | $line =~ s/-/../; | |
176 | $line =~ s/^\s+//; | |
177 | if($line =~ /\;/){ | |
178 | }else{ | |
179 | $line =~ s/$/;/; | |
180 | } | |
181 | if($table =~ /A/ ){ | |
182 | ($code, $noise) = split /;/ , $line; | |
183 | $line = $code."; ; UNASSIGNED\n"; | |
184 | }elsif ( $table =~ /B\.1/ ){ | |
185 | $line =~ s/Map to nothing/MAP/; | |
186 | }elsif ( $table =~ /B\.[23]/ ){ | |
187 | $line =~ s/Case map/MAP/; | |
188 | $line =~ s/Additional folding/MAP/; | |
189 | }elsif ( $table =~ /C/ ) { | |
190 | ($code, $noise) = split /;/ , $line; | |
191 | $line = $code."; ; PROHIBITED\n"; | |
192 | } | |
193 | if($line =~ /\.\./){ | |
194 | ($code, $noise) = split /;/ , $line; | |
195 | ($startStr, $endStr ) = split /\.\./, $code; | |
196 | $start = atoi($startStr); | |
197 | $end = atoi($endStr); | |
198 | #print $start." ".$end."\n"; | |
199 | while($start <= $end){ | |
200 | $count++; | |
201 | $start++; | |
202 | } | |
203 | }else{ | |
204 | $count++; | |
205 | } | |
206 | print $outFH $line; | |
207 | } | |
208 | } | |
209 | #----------------------------------------------------------------------- | |
210 | sub atoi { | |
211 | my $t; | |
212 | foreach my $d (split(//, shift())) { | |
213 | $t = $t * 16 + $d; | |
214 | } | |
215 | return $t; | |
216 | } | |
217 | #----------------------------------------------------------------------- | |
218 | sub createUnassignedTable{ | |
219 | ($inFH,$outfile) = @_; | |
220 | $outFH = IO::File->new($outfile,"a") | |
221 | or die "could not open the file $outfile for writing: $! \n"; | |
222 | $comment = "# This table contains code points from Table A.1 from RFC 3454\n"; | |
223 | readPrint($inFH,$outFH, $comment, "A"); | |
224 | close($outFH); | |
225 | } | |
226 | #----------------------------------------------------------------------- | |
227 | sub createMapToNothing{ | |
228 | ($inFH,$outfile) = @_; | |
229 | $outFH = IO::File->new($outfile,"a") | |
230 | or die "could not open the file $outfile for writing: $! \n"; | |
231 | $comment = "# This table contains code points from Table B.1 from RFC 3454\n"; | |
232 | readPrint($inFH,$outFH,$comment, "B.1"); | |
233 | close($outFH); | |
234 | } | |
235 | #----------------------------------------------------------------------- | |
236 | sub createCaseMapNorm{ | |
237 | ($inFH,$outfile) = @_; | |
238 | $outFH = IO::File->new($outfile,"a") | |
239 | or die "could not open the file $outfile for writing: $! \n"; | |
240 | $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n"; | |
241 | readPrint($inFH,$outFH,$comment, "B.2"); | |
242 | close($outFH); | |
243 | } | |
244 | #----------------------------------------------------------------------- | |
245 | sub createCaseMapNoNorm{ | |
246 | ($inFH,$outfile) = @_; | |
247 | $outFH = IO::File->new($outfile,"a") | |
248 | or die "could not open the file $outfile for writing: $! \n"; | |
249 | $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n"; | |
250 | readPrint($inFH,$outFH,$comment, "B.3"); | |
251 | close($outFH); | |
252 | } | |
253 | #----------------------------------------------------------------------- | |
254 | sub createProhibitedTable{ | |
255 | ($inFH,$outfile,$line) = @_; | |
256 | $line =~ s/Start//; | |
257 | $line =~ s/-//g; | |
258 | $comment = "# code points from $line"; | |
259 | ||
260 | $outFH = IO::File->new($outfile, "a") | |
261 | or die "could not open the file $outfile for writing: $! \n"; | |
262 | readPrint($inFH,$outFH,$comment, "C"); | |
263 | close($outFH); | |
264 | } | |
265 | ||
266 | #----------------------------------------------------------------------- | |
267 | sub create_iSCSIExtraProhibitedTable{ | |
268 | ($inFH,$outfile,$line) = @_; | |
729e4ab9 | 269 | $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n"; |
374ca955 A |
270 | |
271 | $outFH = IO::File->new($outfile, "a") | |
272 | or die "could not open the file $outfile for writing: $! \n"; | |
273 | print $outFH $comment; | |
274 | print $outFH "0021..002C; ; PROHIBITED\n"; | |
275 | print $outFH "002F; ; PROHIBITED\n"; | |
276 | print $outFH "003B..0040; ; PROHIBITED\n"; | |
277 | print $outFH "005B..0060; ; PROHIBITED\n"; | |
278 | print $outFH "007B..007E; ; PROHIBITED\n"; | |
279 | print $outFH "3002; ; PROHIBITED\n"; | |
280 | print $outFH "\n# Total code points 30\n"; | |
281 | close($outFH); | |
282 | } | |
283 | #----------------------------------------------------------------------- | |
729e4ab9 A |
284 | sub create_XMPPNodeExtraProhibitedTable{ |
285 | ($inFH,$outfile,$line) = @_; | |
286 | $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n"; | |
287 | ||
288 | $outFH = IO::File->new($outfile, "a") | |
289 | or die "could not open the file $outfile for writing: $! \n"; | |
290 | print $outFH $comment; | |
291 | print $outFH "0022; ; PROHIBITED\n"; | |
292 | print $outFH "0026; ; PROHIBITED\n"; | |
293 | print $outFH "0027; ; PROHIBITED\n"; | |
294 | print $outFH "002F; ; PROHIBITED\n"; | |
295 | print $outFH "003A; ; PROHIBITED\n"; | |
296 | print $outFH "003C; ; PROHIBITED\n"; | |
297 | print $outFH "003E; ; PROHIBITED\n"; | |
298 | print $outFH "0040; ; PROHIBITED\n"; | |
299 | print $outFH "\n# Total code points 8\n"; | |
300 | close($outFH); | |
301 | } | |
302 | #----------------------------------------------------------------------- | |
303 | sub create_SASLMapTable{ | |
304 | ($inFH,$outfile,$line) = @_; | |
305 | $comment ="# Map table for SASL profile (rfc4013.txt)\n\n"; | |
306 | ||
307 | $outFH = IO::File->new($outfile, "a") | |
308 | or die "could not open the file $outfile for writing: $! \n"; | |
309 | print $outFH $comment; | |
310 | # non-ASCII space characters [C.1.2] to SPACE | |
311 | print $outFH "00A0; 0020; MAP\n"; | |
312 | print $outFH "1680; 0020; MAP\n"; | |
313 | print $outFH "2000; 0020; MAP\n"; | |
314 | print $outFH "2001; 0020; MAP\n"; | |
315 | print $outFH "2002; 0020; MAP\n"; | |
316 | print $outFH "2003; 0020; MAP\n"; | |
317 | print $outFH "2004; 0020; MAP\n"; | |
318 | print $outFH "2005; 0020; MAP\n"; | |
319 | print $outFH "2006; 0020; MAP\n"; | |
320 | print $outFH "2007; 0020; MAP\n"; | |
321 | print $outFH "2008; 0020; MAP\n"; | |
322 | print $outFH "2009; 0020; MAP\n"; | |
323 | print $outFH "200A; 0020; MAP\n"; | |
324 | print $outFH "200B; 0020; MAP\n"; | |
325 | print $outFH "202F; 0020; MAP\n"; | |
326 | print $outFH "205F; 0020; MAP\n"; | |
327 | print $outFH "3000; 0020; MAP\n"; | |
328 | ||
329 | # commonly mapped to nothing characters except U+200B to nothing | |
330 | print $outFH "00AD; ; MAP\n"; | |
331 | print $outFH "034F; ; MAP\n"; | |
332 | print $outFH "1806; ; MAP\n"; | |
333 | print $outFH "180B; ; MAP\n"; | |
334 | print $outFH "180C; ; MAP\n"; | |
335 | print $outFH "180D; ; MAP\n"; | |
336 | print $outFH "200C; ; MAP\n"; | |
337 | print $outFH "200D; ; MAP\n"; | |
338 | print $outFH "2060; ; MAP\n"; | |
339 | print $outFH "FE00; ; MAP\n"; | |
340 | print $outFH "FE01; ; MAP\n"; | |
341 | print $outFH "FE02; ; MAP\n"; | |
342 | print $outFH "FE03; ; MAP\n"; | |
343 | print $outFH "FE04; ; MAP\n"; | |
344 | print $outFH "FE05; ; MAP\n"; | |
345 | print $outFH "FE06; ; MAP\n"; | |
346 | print $outFH "FE07; ; MAP\n"; | |
347 | print $outFH "FE08; ; MAP\n"; | |
348 | print $outFH "FE09; ; MAP\n"; | |
349 | print $outFH "FE0A; ; MAP\n"; | |
350 | print $outFH "FE0B; ; MAP\n"; | |
351 | print $outFH "FE0C; ; MAP\n"; | |
352 | print $outFH "FE0D; ; MAP\n"; | |
353 | print $outFH "FE0E; ; MAP\n"; | |
354 | print $outFH "FE0F; ; MAP\n"; | |
355 | print $outFH "FEFF; ; MAP\n"; | |
356 | print $outFH "\n# Total code points 43\n"; | |
357 | close($outFH); | |
358 | } | |
359 | #----------------------------------------------------------------------- | |
360 | sub create_LDAPMapTable{ | |
361 | ($inFH,$outfile,$line) = @_; | |
362 | $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n"; | |
363 | ||
364 | $outFH = IO::File->new($outfile, "a") | |
365 | or die "could not open the file $outfile for writing: $! \n"; | |
366 | print $outFH $comment; | |
367 | ||
368 | # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code | |
369 | # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and | |
370 | # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also | |
371 | # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is | |
372 | # mapped to nothing. | |
373 | ||
374 | print $outFH "00AD; ; MAP\n"; | |
375 | print $outFH "034F; ; MAP\n"; | |
376 | print $outFH "1806; ; MAP\n"; | |
377 | print $outFH "180B; ; MAP\n"; | |
378 | print $outFH "180C; ; MAP\n"; | |
379 | print $outFH "180D; ; MAP\n"; | |
380 | print $outFH "FE00; ; MAP\n"; | |
381 | print $outFH "FE01; ; MAP\n"; | |
382 | print $outFH "FE02; ; MAP\n"; | |
383 | print $outFH "FE03; ; MAP\n"; | |
384 | print $outFH "FE04; ; MAP\n"; | |
385 | print $outFH "FE05; ; MAP\n"; | |
386 | print $outFH "FE06; ; MAP\n"; | |
387 | print $outFH "FE07; ; MAP\n"; | |
388 | print $outFH "FE08; ; MAP\n"; | |
389 | print $outFH "FE09; ; MAP\n"; | |
390 | print $outFH "FE0A; ; MAP\n"; | |
391 | print $outFH "FE0B; ; MAP\n"; | |
392 | print $outFH "FE0C; ; MAP\n"; | |
393 | print $outFH "FE0D; ; MAP\n"; | |
394 | print $outFH "FE0E; ; MAP\n"; | |
395 | print $outFH "FE0F; ; MAP\n"; | |
396 | print $outFH "FFFC; ; MAP\n"; | |
397 | ||
398 | # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE | |
399 | # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) | |
400 | # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). | |
401 | ||
402 | print $outFH "0009; 0020; MAP\n"; | |
403 | print $outFH "000A; 0020; MAP\n"; | |
404 | print $outFH "000B; 0020; MAP\n"; | |
405 | print $outFH "000C; 0020; MAP\n"; | |
406 | print $outFH "000D; 0020; MAP\n"; | |
407 | print $outFH "0085; 0020; MAP\n"; | |
408 | ||
409 | # All other control code (e.g., Cc) points or code points with a | |
410 | # control function (e.g., Cf) are mapped to nothing. The following is | |
411 | # a complete list of these code points: U+0000-0008, 000E-001F, 007F- | |
412 | # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, | |
413 | # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. | |
414 | ||
415 | print $outFH "0000; ; MAP\n"; | |
416 | print $outFH "0001; ; MAP\n"; | |
417 | print $outFH "0002; ; MAP\n"; | |
418 | print $outFH "0003; ; MAP\n"; | |
419 | print $outFH "0004; ; MAP\n"; | |
420 | print $outFH "0005; ; MAP\n"; | |
421 | print $outFH "0006; ; MAP\n"; | |
422 | print $outFH "0007; ; MAP\n"; | |
423 | print $outFH "0008; ; MAP\n"; | |
424 | print $outFH "000E; ; MAP\n"; | |
425 | print $outFH "000F; ; MAP\n"; | |
426 | print $outFH "0010; ; MAP\n"; | |
427 | print $outFH "0011; ; MAP\n"; | |
428 | print $outFH "0012; ; MAP\n"; | |
429 | print $outFH "0013; ; MAP\n"; | |
430 | print $outFH "0014; ; MAP\n"; | |
431 | print $outFH "0015; ; MAP\n"; | |
432 | print $outFH "0016; ; MAP\n"; | |
433 | print $outFH "0017; ; MAP\n"; | |
434 | print $outFH "0018; ; MAP\n"; | |
435 | print $outFH "0019; ; MAP\n"; | |
436 | print $outFH "001A; ; MAP\n"; | |
437 | print $outFH "001B; ; MAP\n"; | |
438 | print $outFH "001C; ; MAP\n"; | |
439 | print $outFH "001D; ; MAP\n"; | |
440 | print $outFH "001E; ; MAP\n"; | |
441 | print $outFH "001F; ; MAP\n"; | |
442 | print $outFH "007F; ; MAP\n"; | |
443 | print $outFH "0080; ; MAP\n"; | |
444 | print $outFH "0081; ; MAP\n"; | |
445 | print $outFH "0082; ; MAP\n"; | |
446 | print $outFH "0083; ; MAP\n"; | |
447 | print $outFH "0084; ; MAP\n"; | |
448 | print $outFH "0086; ; MAP\n"; | |
449 | print $outFH "0087; ; MAP\n"; | |
450 | print $outFH "0088; ; MAP\n"; | |
451 | print $outFH "0089; ; MAP\n"; | |
452 | print $outFH "008A; ; MAP\n"; | |
453 | print $outFH "008B; ; MAP\n"; | |
454 | print $outFH "008C; ; MAP\n"; | |
455 | print $outFH "008D; ; MAP\n"; | |
456 | print $outFH "008E; ; MAP\n"; | |
457 | print $outFH "008F; ; MAP\n"; | |
458 | print $outFH "0090; ; MAP\n"; | |
459 | print $outFH "0091; ; MAP\n"; | |
460 | print $outFH "0092; ; MAP\n"; | |
461 | print $outFH "0093; ; MAP\n"; | |
462 | print $outFH "0094; ; MAP\n"; | |
463 | print $outFH "0095; ; MAP\n"; | |
464 | print $outFH "0096; ; MAP\n"; | |
465 | print $outFH "0097; ; MAP\n"; | |
466 | print $outFH "0098; ; MAP\n"; | |
467 | print $outFH "0099; ; MAP\n"; | |
468 | print $outFH "009A; ; MAP\n"; | |
469 | print $outFH "009B; ; MAP\n"; | |
470 | print $outFH "009C; ; MAP\n"; | |
471 | print $outFH "009D; ; MAP\n"; | |
472 | print $outFH "009E; ; MAP\n"; | |
473 | print $outFH "009F; ; MAP\n"; | |
474 | print $outFH "06DD; ; MAP\n"; | |
475 | print $outFH "070F; ; MAP\n"; | |
476 | print $outFH "180E; ; MAP\n"; | |
477 | print $outFH "200C; ; MAP\n"; | |
478 | print $outFH "200D; ; MAP\n"; | |
479 | print $outFH "200E; ; MAP\n"; | |
480 | print $outFH "200F; ; MAP\n"; | |
481 | print $outFH "202A; ; MAP\n"; | |
482 | print $outFH "202B; ; MAP\n"; | |
483 | print $outFH "202C; ; MAP\n"; | |
484 | print $outFH "202D; ; MAP\n"; | |
485 | print $outFH "202E; ; MAP\n"; | |
486 | print $outFH "2060; ; MAP\n"; | |
487 | print $outFH "2061; ; MAP\n"; | |
488 | print $outFH "2062; ; MAP\n"; | |
489 | print $outFH "2063; ; MAP\n"; | |
490 | print $outFH "206A; ; MAP\n"; | |
491 | print $outFH "206B; ; MAP\n"; | |
492 | print $outFH "206C; ; MAP\n"; | |
493 | print $outFH "206D; ; MAP\n"; | |
494 | print $outFH "206E; ; MAP\n"; | |
495 | print $outFH "206F; ; MAP\n"; | |
496 | print $outFH "FEFF; ; MAP\n"; | |
497 | print $outFH "FFF9; ; MAP\n"; | |
498 | print $outFH "FFFA; ; MAP\n"; | |
499 | print $outFH "FFFB; ; MAP\n"; | |
500 | print $outFH "1D173; ; MAP\n"; | |
501 | print $outFH "1D174; ; MAP\n"; | |
502 | print $outFH "1D175; ; MAP\n"; | |
503 | print $outFH "1D176; ; MAP\n"; | |
504 | print $outFH "1D177; ; MAP\n"; | |
505 | print $outFH "1D178; ; MAP\n"; | |
506 | print $outFH "1D179; ; MAP\n"; | |
507 | print $outFH "1D17A; ; MAP\n"; | |
508 | print $outFH "E0001; ; MAP\n"; | |
509 | print $outFH "E0020; ; MAP\n"; | |
510 | print $outFH "E0021; ; MAP\n"; | |
511 | print $outFH "E0022; ; MAP\n"; | |
512 | print $outFH "E0023; ; MAP\n"; | |
513 | print $outFH "E0024; ; MAP\n"; | |
514 | print $outFH "E0025; ; MAP\n"; | |
515 | print $outFH "E0026; ; MAP\n"; | |
516 | print $outFH "E0027; ; MAP\n"; | |
517 | print $outFH "E0028; ; MAP\n"; | |
518 | print $outFH "E0029; ; MAP\n"; | |
519 | print $outFH "E002A; ; MAP\n"; | |
520 | print $outFH "E002B; ; MAP\n"; | |
521 | print $outFH "E002C; ; MAP\n"; | |
522 | print $outFH "E002D; ; MAP\n"; | |
523 | print $outFH "E002E; ; MAP\n"; | |
524 | print $outFH "E002F; ; MAP\n"; | |
525 | print $outFH "E0030; ; MAP\n"; | |
526 | print $outFH "E0031; ; MAP\n"; | |
527 | print $outFH "E0032; ; MAP\n"; | |
528 | print $outFH "E0033; ; MAP\n"; | |
529 | print $outFH "E0034; ; MAP\n"; | |
530 | print $outFH "E0035; ; MAP\n"; | |
531 | print $outFH "E0036; ; MAP\n"; | |
532 | print $outFH "E0037; ; MAP\n"; | |
533 | print $outFH "E0038; ; MAP\n"; | |
534 | print $outFH "E0039; ; MAP\n"; | |
535 | print $outFH "E003A; ; MAP\n"; | |
536 | print $outFH "E003B; ; MAP\n"; | |
537 | print $outFH "E003C; ; MAP\n"; | |
538 | print $outFH "E003D; ; MAP\n"; | |
539 | print $outFH "E003E; ; MAP\n"; | |
540 | print $outFH "E003F; ; MAP\n"; | |
541 | print $outFH "E0040; ; MAP\n"; | |
542 | print $outFH "E0041; ; MAP\n"; | |
543 | print $outFH "E0042; ; MAP\n"; | |
544 | print $outFH "E0043; ; MAP\n"; | |
545 | print $outFH "E0044; ; MAP\n"; | |
546 | print $outFH "E0045; ; MAP\n"; | |
547 | print $outFH "E0046; ; MAP\n"; | |
548 | print $outFH "E0047; ; MAP\n"; | |
549 | print $outFH "E0048; ; MAP\n"; | |
550 | print $outFH "E0049; ; MAP\n"; | |
551 | print $outFH "E004A; ; MAP\n"; | |
552 | print $outFH "E004B; ; MAP\n"; | |
553 | print $outFH "E004C; ; MAP\n"; | |
554 | print $outFH "E004D; ; MAP\n"; | |
555 | print $outFH "E004E; ; MAP\n"; | |
556 | print $outFH "E004F; ; MAP\n"; | |
557 | print $outFH "E0050; ; MAP\n"; | |
558 | print $outFH "E0051; ; MAP\n"; | |
559 | print $outFH "E0052; ; MAP\n"; | |
560 | print $outFH "E0053; ; MAP\n"; | |
561 | print $outFH "E0054; ; MAP\n"; | |
562 | print $outFH "E0055; ; MAP\n"; | |
563 | print $outFH "E0056; ; MAP\n"; | |
564 | print $outFH "E0057; ; MAP\n"; | |
565 | print $outFH "E0058; ; MAP\n"; | |
566 | print $outFH "E0059; ; MAP\n"; | |
567 | print $outFH "E005A; ; MAP\n"; | |
568 | print $outFH "E005B; ; MAP\n"; | |
569 | print $outFH "E005C; ; MAP\n"; | |
570 | print $outFH "E005D; ; MAP\n"; | |
571 | print $outFH "E005E; ; MAP\n"; | |
572 | print $outFH "E005F; ; MAP\n"; | |
573 | print $outFH "E0060; ; MAP\n"; | |
574 | print $outFH "E0061; ; MAP\n"; | |
575 | print $outFH "E0062; ; MAP\n"; | |
576 | print $outFH "E0063; ; MAP\n"; | |
577 | print $outFH "E0064; ; MAP\n"; | |
578 | print $outFH "E0065; ; MAP\n"; | |
579 | print $outFH "E0066; ; MAP\n"; | |
580 | print $outFH "E0067; ; MAP\n"; | |
581 | print $outFH "E0068; ; MAP\n"; | |
582 | print $outFH "E0069; ; MAP\n"; | |
583 | print $outFH "E006A; ; MAP\n"; | |
584 | print $outFH "E006B; ; MAP\n"; | |
585 | print $outFH "E006C; ; MAP\n"; | |
586 | print $outFH "E006D; ; MAP\n"; | |
587 | print $outFH "E006E; ; MAP\n"; | |
588 | print $outFH "E006F; ; MAP\n"; | |
589 | print $outFH "E0070; ; MAP\n"; | |
590 | print $outFH "E0071; ; MAP\n"; | |
591 | print $outFH "E0072; ; MAP\n"; | |
592 | print $outFH "E0073; ; MAP\n"; | |
593 | print $outFH "E0074; ; MAP\n"; | |
594 | print $outFH "E0075; ; MAP\n"; | |
595 | print $outFH "E0076; ; MAP\n"; | |
596 | print $outFH "E0077; ; MAP\n"; | |
597 | print $outFH "E0078; ; MAP\n"; | |
598 | print $outFH "E0079; ; MAP\n"; | |
599 | print $outFH "E007A; ; MAP\n"; | |
600 | print $outFH "E007B; ; MAP\n"; | |
601 | print $outFH "E007C; ; MAP\n"; | |
602 | print $outFH "E007D; ; MAP\n"; | |
603 | print $outFH "E007E; ; MAP\n"; | |
604 | print $outFH "E007F; ; MAP\n"; | |
605 | ||
606 | # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code | |
607 | # points with Separator (space, line, or paragraph) property (e.g., Zs, | |
608 | # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete | |
609 | # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, | |
610 | # 202F, 205F, 3000. | |
611 | ||
612 | print $outFH "200B; ; MAP\n"; | |
613 | print $outFH "00A0; 0020; MAP\n"; | |
614 | print $outFH "1680; 0020; MAP\n"; | |
615 | print $outFH "2000; 0020; MAP\n"; | |
616 | print $outFH "2001; 0020; MAP\n"; | |
617 | print $outFH "2002; 0020; MAP\n"; | |
618 | print $outFH "2003; 0020; MAP\n"; | |
619 | print $outFH "2004; 0020; MAP\n"; | |
620 | print $outFH "2005; 0020; MAP\n"; | |
621 | print $outFH "2006; 0020; MAP\n"; | |
622 | print $outFH "2007; 0020; MAP\n"; | |
623 | print $outFH "2008; 0020; MAP\n"; | |
624 | print $outFH "2009; 0020; MAP\n"; | |
625 | print $outFH "200A; 0020; MAP\n"; | |
626 | print $outFH "2028; 0020; MAP\n"; | |
627 | print $outFH "2029; 0020; MAP\n"; | |
628 | print $outFH "202F; 0020; MAP\n"; | |
629 | print $outFH "205F; 0020; MAP\n"; | |
630 | print $outFH "3000; 0020; MAP\n"; | |
631 | ||
632 | print $outFH "\n# Total code points 238\n"; | |
633 | close($outFH); | |
634 | } | |
635 | #----------------------------------------------------------------------- | |
374ca955 A |
636 | sub usage { |
637 | print << "END"; | |
638 | Usage: | |
639 | filterRFC3454.pl | |
640 | Options: | |
641 | --sourcedir=<directory> | |
642 | --destdir=<directory> | |
643 | --src-filename=<name of RFC file> | |
644 | --dest-filename=<name of destination file> | |
645 | --A1 Generate data for table A.1 | |
646 | --B1 Generate data for table B.1 | |
647 | --B2 Generate data for table B.2 | |
648 | --B3 Generate data for table B.3 | |
649 | --C11 Generate data for table C.1.1 | |
650 | --C12 Generate data for table C.1.2 | |
651 | --C21 Generate data for table C.2.1 | |
652 | --C22 Generate data for table C.2.2 | |
653 | --C3 Generate data for table C.3 | |
654 | --C4 Generate data for table C.4 | |
655 | --C5 Generate data for table C.5 | |
656 | --C6 Generate data for table C.6 | |
657 | --C7 Generate data for table C.7 | |
658 | --C8 Generate data for table C.8 | |
659 | --C9 Generate data for table C.9 | |
729e4ab9 A |
660 | --iscsi Generate data for iSCSI extra prohibited table |
661 | --xmpp-node Generate data for XMPP extra prohibited table | |
662 | --sasl Generate data for SASL map table | |
663 | --ldap Generate data for LDAP map table | |
664 | --normalize Embed the normalization directive in the output file | |
665 | --check-bidi Embed the check bidi directove in the output file | |
374ca955 A |
666 | |
667 | Note, --B2 and --B3 are mutually exclusive. | |
668 | ||
729e4ab9 | 669 | e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi |
374ca955 A |
670 | |
671 | filterRFC3454.pl filters the RFC file and creates String prep table files. | |
672 | The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt | |
673 | ||
674 | END | |
675 | exit(0); | |
676 | } | |
677 | ||
678 |