]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/gensprep/filterRFC3454.pl
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / tools / gensprep / filterRFC3454.pl
CommitLineData
729e4ab9
A
1#!/usr/bin/perl
2# Copyright (c) 2001-2009 International Business Machines
374ca955
A
3# Corporation and others. All Rights Reserved.
4
5####################################################################################
6# filterRFC3454.pl:
7# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
8# to be used in NamePrepProfile
9#
10# Author: Ram Viswanadha
11#
12####################################################################################
13
14use File::Find;
15use File::Basename;
16use IO::File;
17use Cwd;
18use File::Copy;
19use Getopt::Long;
20use File::Path;
21use File::Copy;
729e4ab9 22use Time::localtime;
374ca955 23
729e4ab9
A
24$icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
25$copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002). All Rights Reserved. \n###################\n\n";
26$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
374ca955
A
27#run the program)
28main();
29
30#---------------------------------------------------------------------
31# The main program
32
33sub main(){
34 GetOptions(
35 "--sourcedir=s" => \$sourceDir,
36 "--destdir=s" => \$destDir,
37 "--src-filename=s" => \$srcFileName,
38 "--dest-filename=s" => \$destFileName,
39 "--A1" => \$a1,
40 "--B1" => \$b1,
41 "--B2" => \$b2,
729e4ab9 42 "--B3" => \$b3,
374ca955
A
43 "--C11" => \$c11,
44 "--C12" => \$c12,
45 "--C21" => \$c21,
46 "--C22" => \$c22,
47 "--C3" => \$c3,
48 "--C4" => \$c4,
49 "--C5" => \$c5,
50 "--C6" => \$c6,
51 "--C7" => \$c7,
52 "--C8" => \$c8,
53 "--C9" => \$c9,
729e4ab9
A
54 "--iscsi" => \$writeISCSIProhibitedExtra,
55 "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
56 "--sasl" => \$writeSASLMap,
57 "--ldap" => \$writeLDAPMap,
58 "--normalize" => \$norm,
59 "--check-bidi" => \$checkBidi,
374ca955
A
60 );
61 usage() unless defined $sourceDir;
62 usage() unless defined $destDir;
63 usage() unless defined $srcFileName;
64 usage() unless defined $destFileName;
65
66 $infile = $sourceDir."/".$srcFileName;
67 $inFH = IO::File->new($infile,"r")
68 or die "could not open the file $infile for reading: $! \n";
69 $outfile = $destDir."/".$destFileName;
70
71 unlink($outfile);
72 $outFH = IO::File->new($outfile,"a")
73 or die "could not open the file $outfile for writing: $! \n";
729e4ab9
A
74
75 printf $outFH $icu_copyright, localtime->year()+1900;
374ca955
A
76 print $outFH $copyright;
77 print $outFH $warning;
729e4ab9
A
78
79 if(defined $norm) {
80 print $outFH "\@normalize;;\n";
81 }
82 if(defined $checkBidi) {
83 print $outFH "\@check-bidi;;\n";
84 }
85 print $outFH "\n";
374ca955
A
86 close($outFH);
87
88 if(defined $b2 && defined $b3){
89 die "ERROR: --B2 and --B3 are both specified\!\n";
90 }
91
92 while(defined ($line=<$inFH>)){
93 next unless $line=~ /Start\sTable/;
94 if($line =~ /A.1/){
95 createUnassignedTable($inFH,$outfile);
96 }
97 if($line =~ /B.1/ && defined $b1){
98 createMapToNothing($inFH,$outfile);
99 }
100 if($line =~ /B.2/ && defined $b2){
101 createCaseMapNorm($inFH,$outfile);
102 }
103 if($line =~ /B.3/ && defined $b3){
104 createCaseMapNoNorm($inFH,$outfile);
105 }
106 if($line =~ /C.1.1/ && defined $c11 ){
107 createProhibitedTable($inFH,$outfile,$line);
108 }
109 if($line =~ /C.1.2/ && defined $c12 ){
110 createProhibitedTable($inFH,$outfile,$line);
111 }
112 if($line =~ /C.2.1/ && defined $c21 ){
113 createProhibitedTable($inFH,$outfile,$line);
114 }
115 if($line =~ /C.2.2/ && defined $c22 ){
116 createProhibitedTable($inFH,$outfile,$line);
117 }
118 if($line =~ /C.3/ && defined $c3 ){
119 createProhibitedTable($inFH,$outfile,$line);
120 }
121 if($line =~ /C.4/ && defined $c4 ){
122 createProhibitedTable($inFH,$outfile,$line);
123 }
124 if($line =~ /C.5/ && defined $c5 ){
125 createProhibitedTable($inFH,$outfile,$line);
126 }
127 if($line =~ /C.6/ && defined $c6 ){
128 createProhibitedTable($inFH,$outfile,$line);
129 }
130 if($line =~ /C.7/ && defined $c7 ){
131 createProhibitedTable($inFH,$outfile,$line);
132 }
133 if($line =~ /C.8/ && defined $c8 ){
134 createProhibitedTable($inFH,$outfile,$line);
135 }
136 if($line =~ /C.9/ && defined $c9 ){
137 createProhibitedTable($inFH,$outfile,$line);
138 }
139 }
729e4ab9 140 if( defined $writeISCSIProhibitedExtra){
374ca955
A
141 create_iSCSIExtraProhibitedTable($inFH, $outfile);
142 }
729e4ab9
A
143 if( defined $writeXMPPNodeProhitedExtra){
144 create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
145 }
146 if( defined $writeSASLMap){
147 create_SASLMapTable($inFH, $outfile);
148 }
149 if( defined $writeLDAPMap){
150 create_LDAPMapTable($inFH, $outfile);
151 }
374ca955
A
152 close($inFH);
153}
154
155#-----------------------------------------------------------------------
156sub readPrint{
157 local ($inFH, $outFH,$comment, $table) = @_;
158 $count = 0;
159 print $outFH $comment."\n";
160 while(defined ($line = <$inFH>)){
161 next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading
162 next if $line =~ /RFC\s3454/; # ignore heading
163 next if $line =~ /\f/; # ignore form feed
164 next if $line eq "\n"; # ignore blank lines
165 # break if "End Table" is found
166 if( $line =~ /End\sTable/){
167 print $outFH "\n# Total code points $count\n\n";
168 return;
169 }
170 if($print==1){
171 print $line;
172 }
173 $line =~ s/-/../;
174 $line =~ s/^\s+//;
175 if($line =~ /\;/){
176 }else{
177 $line =~ s/$/;/;
178 }
179 if($table =~ /A/ ){
180 ($code, $noise) = split /;/ , $line;
181 $line = $code."; ; UNASSIGNED\n";
182 }elsif ( $table =~ /B\.1/ ){
183 $line =~ s/Map to nothing/MAP/;
184 }elsif ( $table =~ /B\.[23]/ ){
185 $line =~ s/Case map/MAP/;
186 $line =~ s/Additional folding/MAP/;
187 }elsif ( $table =~ /C/ ) {
188 ($code, $noise) = split /;/ , $line;
189 $line = $code."; ; PROHIBITED\n";
190 }
191 if($line =~ /\.\./){
192 ($code, $noise) = split /;/ , $line;
193 ($startStr, $endStr ) = split /\.\./, $code;
194 $start = atoi($startStr);
195 $end = atoi($endStr);
196 #print $start." ".$end."\n";
197 while($start <= $end){
198 $count++;
199 $start++;
200 }
201 }else{
202 $count++;
203 }
204 print $outFH $line;
205 }
206}
207#-----------------------------------------------------------------------
208sub atoi {
209 my $t;
210 foreach my $d (split(//, shift())) {
211 $t = $t * 16 + $d;
212 }
213 return $t;
214}
215#-----------------------------------------------------------------------
216sub createUnassignedTable{
217 ($inFH,$outfile) = @_;
218 $outFH = IO::File->new($outfile,"a")
219 or die "could not open the file $outfile for writing: $! \n";
220 $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
221 readPrint($inFH,$outFH, $comment, "A");
222 close($outFH);
223}
224#-----------------------------------------------------------------------
225sub createMapToNothing{
226 ($inFH,$outfile) = @_;
227 $outFH = IO::File->new($outfile,"a")
228 or die "could not open the file $outfile for writing: $! \n";
229 $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
230 readPrint($inFH,$outFH,$comment, "B.1");
231 close($outFH);
232}
233#-----------------------------------------------------------------------
234sub createCaseMapNorm{
235 ($inFH,$outfile) = @_;
236 $outFH = IO::File->new($outfile,"a")
237 or die "could not open the file $outfile for writing: $! \n";
238 $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
239 readPrint($inFH,$outFH,$comment, "B.2");
240 close($outFH);
241}
242#-----------------------------------------------------------------------
243sub createCaseMapNoNorm{
244 ($inFH,$outfile) = @_;
245 $outFH = IO::File->new($outfile,"a")
246 or die "could not open the file $outfile for writing: $! \n";
247 $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
248 readPrint($inFH,$outFH,$comment, "B.3");
249 close($outFH);
250}
251#-----------------------------------------------------------------------
252sub createProhibitedTable{
253 ($inFH,$outfile,$line) = @_;
254 $line =~ s/Start//;
255 $line =~ s/-//g;
256 $comment = "# code points from $line";
257
258 $outFH = IO::File->new($outfile, "a")
259 or die "could not open the file $outfile for writing: $! \n";
260 readPrint($inFH,$outFH,$comment, "C");
261 close($outFH);
262}
263
264#-----------------------------------------------------------------------
265sub create_iSCSIExtraProhibitedTable{
266 ($inFH,$outfile,$line) = @_;
729e4ab9 267 $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
374ca955
A
268
269 $outFH = IO::File->new($outfile, "a")
270 or die "could not open the file $outfile for writing: $! \n";
271 print $outFH $comment;
272 print $outFH "0021..002C; ; PROHIBITED\n";
273 print $outFH "002F; ; PROHIBITED\n";
274 print $outFH "003B..0040; ; PROHIBITED\n";
275 print $outFH "005B..0060; ; PROHIBITED\n";
276 print $outFH "007B..007E; ; PROHIBITED\n";
277 print $outFH "3002; ; PROHIBITED\n";
278 print $outFH "\n# Total code points 30\n";
279 close($outFH);
280}
281#-----------------------------------------------------------------------
729e4ab9
A
282sub create_XMPPNodeExtraProhibitedTable{
283 ($inFH,$outfile,$line) = @_;
284 $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
285
286 $outFH = IO::File->new($outfile, "a")
287 or die "could not open the file $outfile for writing: $! \n";
288 print $outFH $comment;
289 print $outFH "0022; ; PROHIBITED\n";
290 print $outFH "0026; ; PROHIBITED\n";
291 print $outFH "0027; ; PROHIBITED\n";
292 print $outFH "002F; ; PROHIBITED\n";
293 print $outFH "003A; ; PROHIBITED\n";
294 print $outFH "003C; ; PROHIBITED\n";
295 print $outFH "003E; ; PROHIBITED\n";
296 print $outFH "0040; ; PROHIBITED\n";
297 print $outFH "\n# Total code points 8\n";
298 close($outFH);
299}
300#-----------------------------------------------------------------------
301sub create_SASLMapTable{
302 ($inFH,$outfile,$line) = @_;
303 $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
304
305 $outFH = IO::File->new($outfile, "a")
306 or die "could not open the file $outfile for writing: $! \n";
307 print $outFH $comment;
308 # non-ASCII space characters [C.1.2] to SPACE
309 print $outFH "00A0; 0020; MAP\n";
310 print $outFH "1680; 0020; MAP\n";
311 print $outFH "2000; 0020; MAP\n";
312 print $outFH "2001; 0020; MAP\n";
313 print $outFH "2002; 0020; MAP\n";
314 print $outFH "2003; 0020; MAP\n";
315 print $outFH "2004; 0020; MAP\n";
316 print $outFH "2005; 0020; MAP\n";
317 print $outFH "2006; 0020; MAP\n";
318 print $outFH "2007; 0020; MAP\n";
319 print $outFH "2008; 0020; MAP\n";
320 print $outFH "2009; 0020; MAP\n";
321 print $outFH "200A; 0020; MAP\n";
322 print $outFH "200B; 0020; MAP\n";
323 print $outFH "202F; 0020; MAP\n";
324 print $outFH "205F; 0020; MAP\n";
325 print $outFH "3000; 0020; MAP\n";
326
327 # commonly mapped to nothing characters except U+200B to nothing
328 print $outFH "00AD; ; MAP\n";
329 print $outFH "034F; ; MAP\n";
330 print $outFH "1806; ; MAP\n";
331 print $outFH "180B; ; MAP\n";
332 print $outFH "180C; ; MAP\n";
333 print $outFH "180D; ; MAP\n";
334 print $outFH "200C; ; MAP\n";
335 print $outFH "200D; ; MAP\n";
336 print $outFH "2060; ; MAP\n";
337 print $outFH "FE00; ; MAP\n";
338 print $outFH "FE01; ; MAP\n";
339 print $outFH "FE02; ; MAP\n";
340 print $outFH "FE03; ; MAP\n";
341 print $outFH "FE04; ; MAP\n";
342 print $outFH "FE05; ; MAP\n";
343 print $outFH "FE06; ; MAP\n";
344 print $outFH "FE07; ; MAP\n";
345 print $outFH "FE08; ; MAP\n";
346 print $outFH "FE09; ; MAP\n";
347 print $outFH "FE0A; ; MAP\n";
348 print $outFH "FE0B; ; MAP\n";
349 print $outFH "FE0C; ; MAP\n";
350 print $outFH "FE0D; ; MAP\n";
351 print $outFH "FE0E; ; MAP\n";
352 print $outFH "FE0F; ; MAP\n";
353 print $outFH "FEFF; ; MAP\n";
354 print $outFH "\n# Total code points 43\n";
355 close($outFH);
356}
357#-----------------------------------------------------------------------
358sub create_LDAPMapTable{
359 ($inFH,$outfile,$line) = @_;
360 $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
361
362 $outFH = IO::File->new($outfile, "a")
363 or die "could not open the file $outfile for writing: $! \n";
364 print $outFH $comment;
365
366 # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
367 # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
368 # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
369 # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
370 # mapped to nothing.
371
372 print $outFH "00AD; ; MAP\n";
373 print $outFH "034F; ; MAP\n";
374 print $outFH "1806; ; MAP\n";
375 print $outFH "180B; ; MAP\n";
376 print $outFH "180C; ; MAP\n";
377 print $outFH "180D; ; MAP\n";
378 print $outFH "FE00; ; MAP\n";
379 print $outFH "FE01; ; MAP\n";
380 print $outFH "FE02; ; MAP\n";
381 print $outFH "FE03; ; MAP\n";
382 print $outFH "FE04; ; MAP\n";
383 print $outFH "FE05; ; MAP\n";
384 print $outFH "FE06; ; MAP\n";
385 print $outFH "FE07; ; MAP\n";
386 print $outFH "FE08; ; MAP\n";
387 print $outFH "FE09; ; MAP\n";
388 print $outFH "FE0A; ; MAP\n";
389 print $outFH "FE0B; ; MAP\n";
390 print $outFH "FE0C; ; MAP\n";
391 print $outFH "FE0D; ; MAP\n";
392 print $outFH "FE0E; ; MAP\n";
393 print $outFH "FE0F; ; MAP\n";
394 print $outFH "FFFC; ; MAP\n";
395
396# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
397# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
398# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
399
400 print $outFH "0009; 0020; MAP\n";
401 print $outFH "000A; 0020; MAP\n";
402 print $outFH "000B; 0020; MAP\n";
403 print $outFH "000C; 0020; MAP\n";
404 print $outFH "000D; 0020; MAP\n";
405 print $outFH "0085; 0020; MAP\n";
406
407 # All other control code (e.g., Cc) points or code points with a
408 # control function (e.g., Cf) are mapped to nothing. The following is
409 # a complete list of these code points: U+0000-0008, 000E-001F, 007F-
410 # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
411 # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
412
413 print $outFH "0000; ; MAP\n";
414 print $outFH "0001; ; MAP\n";
415 print $outFH "0002; ; MAP\n";
416 print $outFH "0003; ; MAP\n";
417 print $outFH "0004; ; MAP\n";
418 print $outFH "0005; ; MAP\n";
419 print $outFH "0006; ; MAP\n";
420 print $outFH "0007; ; MAP\n";
421 print $outFH "0008; ; MAP\n";
422 print $outFH "000E; ; MAP\n";
423 print $outFH "000F; ; MAP\n";
424 print $outFH "0010; ; MAP\n";
425 print $outFH "0011; ; MAP\n";
426 print $outFH "0012; ; MAP\n";
427 print $outFH "0013; ; MAP\n";
428 print $outFH "0014; ; MAP\n";
429 print $outFH "0015; ; MAP\n";
430 print $outFH "0016; ; MAP\n";
431 print $outFH "0017; ; MAP\n";
432 print $outFH "0018; ; MAP\n";
433 print $outFH "0019; ; MAP\n";
434 print $outFH "001A; ; MAP\n";
435 print $outFH "001B; ; MAP\n";
436 print $outFH "001C; ; MAP\n";
437 print $outFH "001D; ; MAP\n";
438 print $outFH "001E; ; MAP\n";
439 print $outFH "001F; ; MAP\n";
440 print $outFH "007F; ; MAP\n";
441 print $outFH "0080; ; MAP\n";
442 print $outFH "0081; ; MAP\n";
443 print $outFH "0082; ; MAP\n";
444 print $outFH "0083; ; MAP\n";
445 print $outFH "0084; ; MAP\n";
446 print $outFH "0086; ; MAP\n";
447 print $outFH "0087; ; MAP\n";
448 print $outFH "0088; ; MAP\n";
449 print $outFH "0089; ; MAP\n";
450 print $outFH "008A; ; MAP\n";
451 print $outFH "008B; ; MAP\n";
452 print $outFH "008C; ; MAP\n";
453 print $outFH "008D; ; MAP\n";
454 print $outFH "008E; ; MAP\n";
455 print $outFH "008F; ; MAP\n";
456 print $outFH "0090; ; MAP\n";
457 print $outFH "0091; ; MAP\n";
458 print $outFH "0092; ; MAP\n";
459 print $outFH "0093; ; MAP\n";
460 print $outFH "0094; ; MAP\n";
461 print $outFH "0095; ; MAP\n";
462 print $outFH "0096; ; MAP\n";
463 print $outFH "0097; ; MAP\n";
464 print $outFH "0098; ; MAP\n";
465 print $outFH "0099; ; MAP\n";
466 print $outFH "009A; ; MAP\n";
467 print $outFH "009B; ; MAP\n";
468 print $outFH "009C; ; MAP\n";
469 print $outFH "009D; ; MAP\n";
470 print $outFH "009E; ; MAP\n";
471 print $outFH "009F; ; MAP\n";
472 print $outFH "06DD; ; MAP\n";
473 print $outFH "070F; ; MAP\n";
474 print $outFH "180E; ; MAP\n";
475 print $outFH "200C; ; MAP\n";
476 print $outFH "200D; ; MAP\n";
477 print $outFH "200E; ; MAP\n";
478 print $outFH "200F; ; MAP\n";
479 print $outFH "202A; ; MAP\n";
480 print $outFH "202B; ; MAP\n";
481 print $outFH "202C; ; MAP\n";
482 print $outFH "202D; ; MAP\n";
483 print $outFH "202E; ; MAP\n";
484 print $outFH "2060; ; MAP\n";
485 print $outFH "2061; ; MAP\n";
486 print $outFH "2062; ; MAP\n";
487 print $outFH "2063; ; MAP\n";
488 print $outFH "206A; ; MAP\n";
489 print $outFH "206B; ; MAP\n";
490 print $outFH "206C; ; MAP\n";
491 print $outFH "206D; ; MAP\n";
492 print $outFH "206E; ; MAP\n";
493 print $outFH "206F; ; MAP\n";
494 print $outFH "FEFF; ; MAP\n";
495 print $outFH "FFF9; ; MAP\n";
496 print $outFH "FFFA; ; MAP\n";
497 print $outFH "FFFB; ; MAP\n";
498 print $outFH "1D173; ; MAP\n";
499 print $outFH "1D174; ; MAP\n";
500 print $outFH "1D175; ; MAP\n";
501 print $outFH "1D176; ; MAP\n";
502 print $outFH "1D177; ; MAP\n";
503 print $outFH "1D178; ; MAP\n";
504 print $outFH "1D179; ; MAP\n";
505 print $outFH "1D17A; ; MAP\n";
506 print $outFH "E0001; ; MAP\n";
507 print $outFH "E0020; ; MAP\n";
508 print $outFH "E0021; ; MAP\n";
509 print $outFH "E0022; ; MAP\n";
510 print $outFH "E0023; ; MAP\n";
511 print $outFH "E0024; ; MAP\n";
512 print $outFH "E0025; ; MAP\n";
513 print $outFH "E0026; ; MAP\n";
514 print $outFH "E0027; ; MAP\n";
515 print $outFH "E0028; ; MAP\n";
516 print $outFH "E0029; ; MAP\n";
517 print $outFH "E002A; ; MAP\n";
518 print $outFH "E002B; ; MAP\n";
519 print $outFH "E002C; ; MAP\n";
520 print $outFH "E002D; ; MAP\n";
521 print $outFH "E002E; ; MAP\n";
522 print $outFH "E002F; ; MAP\n";
523 print $outFH "E0030; ; MAP\n";
524 print $outFH "E0031; ; MAP\n";
525 print $outFH "E0032; ; MAP\n";
526 print $outFH "E0033; ; MAP\n";
527 print $outFH "E0034; ; MAP\n";
528 print $outFH "E0035; ; MAP\n";
529 print $outFH "E0036; ; MAP\n";
530 print $outFH "E0037; ; MAP\n";
531 print $outFH "E0038; ; MAP\n";
532 print $outFH "E0039; ; MAP\n";
533 print $outFH "E003A; ; MAP\n";
534 print $outFH "E003B; ; MAP\n";
535 print $outFH "E003C; ; MAP\n";
536 print $outFH "E003D; ; MAP\n";
537 print $outFH "E003E; ; MAP\n";
538 print $outFH "E003F; ; MAP\n";
539 print $outFH "E0040; ; MAP\n";
540 print $outFH "E0041; ; MAP\n";
541 print $outFH "E0042; ; MAP\n";
542 print $outFH "E0043; ; MAP\n";
543 print $outFH "E0044; ; MAP\n";
544 print $outFH "E0045; ; MAP\n";
545 print $outFH "E0046; ; MAP\n";
546 print $outFH "E0047; ; MAP\n";
547 print $outFH "E0048; ; MAP\n";
548 print $outFH "E0049; ; MAP\n";
549 print $outFH "E004A; ; MAP\n";
550 print $outFH "E004B; ; MAP\n";
551 print $outFH "E004C; ; MAP\n";
552 print $outFH "E004D; ; MAP\n";
553 print $outFH "E004E; ; MAP\n";
554 print $outFH "E004F; ; MAP\n";
555 print $outFH "E0050; ; MAP\n";
556 print $outFH "E0051; ; MAP\n";
557 print $outFH "E0052; ; MAP\n";
558 print $outFH "E0053; ; MAP\n";
559 print $outFH "E0054; ; MAP\n";
560 print $outFH "E0055; ; MAP\n";
561 print $outFH "E0056; ; MAP\n";
562 print $outFH "E0057; ; MAP\n";
563 print $outFH "E0058; ; MAP\n";
564 print $outFH "E0059; ; MAP\n";
565 print $outFH "E005A; ; MAP\n";
566 print $outFH "E005B; ; MAP\n";
567 print $outFH "E005C; ; MAP\n";
568 print $outFH "E005D; ; MAP\n";
569 print $outFH "E005E; ; MAP\n";
570 print $outFH "E005F; ; MAP\n";
571 print $outFH "E0060; ; MAP\n";
572 print $outFH "E0061; ; MAP\n";
573 print $outFH "E0062; ; MAP\n";
574 print $outFH "E0063; ; MAP\n";
575 print $outFH "E0064; ; MAP\n";
576 print $outFH "E0065; ; MAP\n";
577 print $outFH "E0066; ; MAP\n";
578 print $outFH "E0067; ; MAP\n";
579 print $outFH "E0068; ; MAP\n";
580 print $outFH "E0069; ; MAP\n";
581 print $outFH "E006A; ; MAP\n";
582 print $outFH "E006B; ; MAP\n";
583 print $outFH "E006C; ; MAP\n";
584 print $outFH "E006D; ; MAP\n";
585 print $outFH "E006E; ; MAP\n";
586 print $outFH "E006F; ; MAP\n";
587 print $outFH "E0070; ; MAP\n";
588 print $outFH "E0071; ; MAP\n";
589 print $outFH "E0072; ; MAP\n";
590 print $outFH "E0073; ; MAP\n";
591 print $outFH "E0074; ; MAP\n";
592 print $outFH "E0075; ; MAP\n";
593 print $outFH "E0076; ; MAP\n";
594 print $outFH "E0077; ; MAP\n";
595 print $outFH "E0078; ; MAP\n";
596 print $outFH "E0079; ; MAP\n";
597 print $outFH "E007A; ; MAP\n";
598 print $outFH "E007B; ; MAP\n";
599 print $outFH "E007C; ; MAP\n";
600 print $outFH "E007D; ; MAP\n";
601 print $outFH "E007E; ; MAP\n";
602 print $outFH "E007F; ; MAP\n";
603
604 # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
605 # points with Separator (space, line, or paragraph) property (e.g., Zs,
606 # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
607 # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
608 # 202F, 205F, 3000.
609
610 print $outFH "200B; ; MAP\n";
611 print $outFH "00A0; 0020; MAP\n";
612 print $outFH "1680; 0020; MAP\n";
613 print $outFH "2000; 0020; MAP\n";
614 print $outFH "2001; 0020; MAP\n";
615 print $outFH "2002; 0020; MAP\n";
616 print $outFH "2003; 0020; MAP\n";
617 print $outFH "2004; 0020; MAP\n";
618 print $outFH "2005; 0020; MAP\n";
619 print $outFH "2006; 0020; MAP\n";
620 print $outFH "2007; 0020; MAP\n";
621 print $outFH "2008; 0020; MAP\n";
622 print $outFH "2009; 0020; MAP\n";
623 print $outFH "200A; 0020; MAP\n";
624 print $outFH "2028; 0020; MAP\n";
625 print $outFH "2029; 0020; MAP\n";
626 print $outFH "202F; 0020; MAP\n";
627 print $outFH "205F; 0020; MAP\n";
628 print $outFH "3000; 0020; MAP\n";
629
630 print $outFH "\n# Total code points 238\n";
631 close($outFH);
632}
633#-----------------------------------------------------------------------
374ca955
A
634sub usage {
635 print << "END";
636Usage:
637filterRFC3454.pl
638Options:
639 --sourcedir=<directory>
640 --destdir=<directory>
641 --src-filename=<name of RFC file>
642 --dest-filename=<name of destination file>
643 --A1 Generate data for table A.1
644 --B1 Generate data for table B.1
645 --B2 Generate data for table B.2
646 --B3 Generate data for table B.3
647 --C11 Generate data for table C.1.1
648 --C12 Generate data for table C.1.2
649 --C21 Generate data for table C.2.1
650 --C22 Generate data for table C.2.2
651 --C3 Generate data for table C.3
652 --C4 Generate data for table C.4
653 --C5 Generate data for table C.5
654 --C6 Generate data for table C.6
655 --C7 Generate data for table C.7
656 --C8 Generate data for table C.8
657 --C9 Generate data for table C.9
729e4ab9
A
658 --iscsi Generate data for iSCSI extra prohibited table
659 --xmpp-node Generate data for XMPP extra prohibited table
660 --sasl Generate data for SASL map table
661 --ldap Generate data for LDAP map table
662 --normalize Embed the normalization directive in the output file
663 --check-bidi Embed the check bidi directove in the output file
374ca955
A
664
665Note, --B2 and --B3 are mutually exclusive.
666
729e4ab9 667e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
374ca955
A
668
669filterRFC3454.pl filters the RFC file and creates String prep table files.
670The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
671
672END
673 exit(0);
674}
675
676