]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gensprep/filterRFC3454.pl
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / gensprep / filterRFC3454.pl
1 #!/usr/bin/perl
2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
3 # License & terms of use: http://www.unicode.org/copyright.html
4 # Copyright (c) 2001-2015 International Business Machines
5 # Corporation and others. All Rights Reserved.
6
7 ####################################################################################
8 # filterRFC3454.pl:
9 # This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
10 # to be used in NamePrepProfile
11 #
12 # Author: Ram Viswanadha
13 #
14 ####################################################################################
15
16 use File::Find;
17 use File::Basename;
18 use IO::File;
19 use Cwd;
20 use File::Copy;
21 use Getopt::Long;
22 use File::Path;
23 use File::Copy;
24 use Time::localtime;
25
26 $icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
27 $copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002). All Rights Reserved. \n###################\n\n";
28 $warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
29 #run the program)
30 main();
31
32 #---------------------------------------------------------------------
33 # The main program
34
35 sub main(){
36 GetOptions(
37 "--sourcedir=s" => \$sourceDir,
38 "--destdir=s" => \$destDir,
39 "--src-filename=s" => \$srcFileName,
40 "--dest-filename=s" => \$destFileName,
41 "--A1" => \$a1,
42 "--B1" => \$b1,
43 "--B2" => \$b2,
44 "--B3" => \$b3,
45 "--C11" => \$c11,
46 "--C12" => \$c12,
47 "--C21" => \$c21,
48 "--C22" => \$c22,
49 "--C3" => \$c3,
50 "--C4" => \$c4,
51 "--C5" => \$c5,
52 "--C6" => \$c6,
53 "--C7" => \$c7,
54 "--C8" => \$c8,
55 "--C9" => \$c9,
56 "--iscsi" => \$writeISCSIProhibitedExtra,
57 "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
58 "--sasl" => \$writeSASLMap,
59 "--ldap" => \$writeLDAPMap,
60 "--normalize" => \$norm,
61 "--check-bidi" => \$checkBidi,
62 );
63 usage() unless defined $sourceDir;
64 usage() unless defined $destDir;
65 usage() unless defined $srcFileName;
66 usage() unless defined $destFileName;
67
68 $infile = $sourceDir."/".$srcFileName;
69 $inFH = IO::File->new($infile,"r")
70 or die "could not open the file $infile for reading: $! \n";
71 $outfile = $destDir."/".$destFileName;
72
73 unlink($outfile);
74 $outFH = IO::File->new($outfile,"a")
75 or die "could not open the file $outfile for writing: $! \n";
76
77 printf $outFH $icu_copyright, localtime->year()+1900;
78 print $outFH $copyright;
79 print $outFH $warning;
80
81 if(defined $norm) {
82 print $outFH "\@normalize;;\n";
83 }
84 if(defined $checkBidi) {
85 print $outFH "\@check-bidi;;\n";
86 }
87 print $outFH "\n";
88 close($outFH);
89
90 if(defined $b2 && defined $b3){
91 die "ERROR: --B2 and --B3 are both specified\!\n";
92 }
93
94 while(defined ($line=<$inFH>)){
95 next unless $line=~ /Start\sTable/;
96 if($line =~ /A.1/){
97 createUnassignedTable($inFH,$outfile);
98 }
99 if($line =~ /B.1/ && defined $b1){
100 createMapToNothing($inFH,$outfile);
101 }
102 if($line =~ /B.2/ && defined $b2){
103 createCaseMapNorm($inFH,$outfile);
104 }
105 if($line =~ /B.3/ && defined $b3){
106 createCaseMapNoNorm($inFH,$outfile);
107 }
108 if($line =~ /C.1.1/ && defined $c11 ){
109 createProhibitedTable($inFH,$outfile,$line);
110 }
111 if($line =~ /C.1.2/ && defined $c12 ){
112 createProhibitedTable($inFH,$outfile,$line);
113 }
114 if($line =~ /C.2.1/ && defined $c21 ){
115 createProhibitedTable($inFH,$outfile,$line);
116 }
117 if($line =~ /C.2.2/ && defined $c22 ){
118 createProhibitedTable($inFH,$outfile,$line);
119 }
120 if($line =~ /C.3/ && defined $c3 ){
121 createProhibitedTable($inFH,$outfile,$line);
122 }
123 if($line =~ /C.4/ && defined $c4 ){
124 createProhibitedTable($inFH,$outfile,$line);
125 }
126 if($line =~ /C.5/ && defined $c5 ){
127 createProhibitedTable($inFH,$outfile,$line);
128 }
129 if($line =~ /C.6/ && defined $c6 ){
130 createProhibitedTable($inFH,$outfile,$line);
131 }
132 if($line =~ /C.7/ && defined $c7 ){
133 createProhibitedTable($inFH,$outfile,$line);
134 }
135 if($line =~ /C.8/ && defined $c8 ){
136 createProhibitedTable($inFH,$outfile,$line);
137 }
138 if($line =~ /C.9/ && defined $c9 ){
139 createProhibitedTable($inFH,$outfile,$line);
140 }
141 }
142 if( defined $writeISCSIProhibitedExtra){
143 create_iSCSIExtraProhibitedTable($inFH, $outfile);
144 }
145 if( defined $writeXMPPNodeProhibitedExtra){
146 create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
147 }
148 if( defined $writeSASLMap){
149 create_SASLMapTable($inFH, $outfile);
150 }
151 if( defined $writeLDAPMap){
152 create_LDAPMapTable($inFH, $outfile);
153 }
154 close($inFH);
155 }
156
157 #-----------------------------------------------------------------------
158 sub readPrint{
159 local ($inFH, $outFH,$comment, $table) = @_;
160 $count = 0;
161 print $outFH $comment."\n";
162 while(defined ($line = <$inFH>)){
163 next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading
164 next if $line =~ /RFC\s3454/; # ignore heading
165 next if $line =~ /\f/; # ignore form feed
166 next if $line eq "\n"; # ignore blank lines
167 # break if "End Table" is found
168 if( $line =~ /End\sTable/){
169 print $outFH "\n# Total code points $count\n\n";
170 return;
171 }
172 if($print==1){
173 print $line;
174 }
175 $line =~ s/-/../;
176 $line =~ s/^\s+//;
177 if($line =~ /\;/){
178 }else{
179 $line =~ s/$/;/;
180 }
181 if($table =~ /A/ ){
182 ($code, $noise) = split /;/ , $line;
183 $line = $code."; ; UNASSIGNED\n";
184 }elsif ( $table =~ /B\.1/ ){
185 $line =~ s/Map to nothing/MAP/;
186 }elsif ( $table =~ /B\.[23]/ ){
187 $line =~ s/Case map/MAP/;
188 $line =~ s/Additional folding/MAP/;
189 }elsif ( $table =~ /C/ ) {
190 ($code, $noise) = split /;/ , $line;
191 $line = $code."; ; PROHIBITED\n";
192 }
193 if($line =~ /\.\./){
194 ($code, $noise) = split /;/ , $line;
195 ($startStr, $endStr ) = split /\.\./, $code;
196 $start = atoi($startStr);
197 $end = atoi($endStr);
198 #print $start." ".$end."\n";
199 while($start <= $end){
200 $count++;
201 $start++;
202 }
203 }else{
204 $count++;
205 }
206 print $outFH $line;
207 }
208 }
209 #-----------------------------------------------------------------------
210 sub atoi {
211 my $t;
212 foreach my $d (split(//, shift())) {
213 $t = $t * 16 + $d;
214 }
215 return $t;
216 }
217 #-----------------------------------------------------------------------
218 sub createUnassignedTable{
219 ($inFH,$outfile) = @_;
220 $outFH = IO::File->new($outfile,"a")
221 or die "could not open the file $outfile for writing: $! \n";
222 $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
223 readPrint($inFH,$outFH, $comment, "A");
224 close($outFH);
225 }
226 #-----------------------------------------------------------------------
227 sub createMapToNothing{
228 ($inFH,$outfile) = @_;
229 $outFH = IO::File->new($outfile,"a")
230 or die "could not open the file $outfile for writing: $! \n";
231 $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
232 readPrint($inFH,$outFH,$comment, "B.1");
233 close($outFH);
234 }
235 #-----------------------------------------------------------------------
236 sub createCaseMapNorm{
237 ($inFH,$outfile) = @_;
238 $outFH = IO::File->new($outfile,"a")
239 or die "could not open the file $outfile for writing: $! \n";
240 $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
241 readPrint($inFH,$outFH,$comment, "B.2");
242 close($outFH);
243 }
244 #-----------------------------------------------------------------------
245 sub createCaseMapNoNorm{
246 ($inFH,$outfile) = @_;
247 $outFH = IO::File->new($outfile,"a")
248 or die "could not open the file $outfile for writing: $! \n";
249 $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
250 readPrint($inFH,$outFH,$comment, "B.3");
251 close($outFH);
252 }
253 #-----------------------------------------------------------------------
254 sub createProhibitedTable{
255 ($inFH,$outfile,$line) = @_;
256 $line =~ s/Start//;
257 $line =~ s/-//g;
258 $comment = "# code points from $line";
259
260 $outFH = IO::File->new($outfile, "a")
261 or die "could not open the file $outfile for writing: $! \n";
262 readPrint($inFH,$outFH,$comment, "C");
263 close($outFH);
264 }
265
266 #-----------------------------------------------------------------------
267 sub create_iSCSIExtraProhibitedTable{
268 ($inFH,$outfile,$line) = @_;
269 $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
270
271 $outFH = IO::File->new($outfile, "a")
272 or die "could not open the file $outfile for writing: $! \n";
273 print $outFH $comment;
274 print $outFH "0021..002C; ; PROHIBITED\n";
275 print $outFH "002F; ; PROHIBITED\n";
276 print $outFH "003B..0040; ; PROHIBITED\n";
277 print $outFH "005B..0060; ; PROHIBITED\n";
278 print $outFH "007B..007E; ; PROHIBITED\n";
279 print $outFH "3002; ; PROHIBITED\n";
280 print $outFH "\n# Total code points 30\n";
281 close($outFH);
282 }
283 #-----------------------------------------------------------------------
284 sub create_XMPPNodeExtraProhibitedTable{
285 ($inFH,$outfile,$line) = @_;
286 $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
287
288 $outFH = IO::File->new($outfile, "a")
289 or die "could not open the file $outfile for writing: $! \n";
290 print $outFH $comment;
291 print $outFH "0022; ; PROHIBITED\n";
292 print $outFH "0026; ; PROHIBITED\n";
293 print $outFH "0027; ; PROHIBITED\n";
294 print $outFH "002F; ; PROHIBITED\n";
295 print $outFH "003A; ; PROHIBITED\n";
296 print $outFH "003C; ; PROHIBITED\n";
297 print $outFH "003E; ; PROHIBITED\n";
298 print $outFH "0040; ; PROHIBITED\n";
299 print $outFH "\n# Total code points 8\n";
300 close($outFH);
301 }
302 #-----------------------------------------------------------------------
303 sub create_SASLMapTable{
304 ($inFH,$outfile,$line) = @_;
305 $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
306
307 $outFH = IO::File->new($outfile, "a")
308 or die "could not open the file $outfile for writing: $! \n";
309 print $outFH $comment;
310 # non-ASCII space characters [C.1.2] to SPACE
311 print $outFH "00A0; 0020; MAP\n";
312 print $outFH "1680; 0020; MAP\n";
313 print $outFH "2000; 0020; MAP\n";
314 print $outFH "2001; 0020; MAP\n";
315 print $outFH "2002; 0020; MAP\n";
316 print $outFH "2003; 0020; MAP\n";
317 print $outFH "2004; 0020; MAP\n";
318 print $outFH "2005; 0020; MAP\n";
319 print $outFH "2006; 0020; MAP\n";
320 print $outFH "2007; 0020; MAP\n";
321 print $outFH "2008; 0020; MAP\n";
322 print $outFH "2009; 0020; MAP\n";
323 print $outFH "200A; 0020; MAP\n";
324 print $outFH "200B; 0020; MAP\n";
325 print $outFH "202F; 0020; MAP\n";
326 print $outFH "205F; 0020; MAP\n";
327 print $outFH "3000; 0020; MAP\n";
328
329 # commonly mapped to nothing characters except U+200B to nothing
330 print $outFH "00AD; ; MAP\n";
331 print $outFH "034F; ; MAP\n";
332 print $outFH "1806; ; MAP\n";
333 print $outFH "180B; ; MAP\n";
334 print $outFH "180C; ; MAP\n";
335 print $outFH "180D; ; MAP\n";
336 print $outFH "200C; ; MAP\n";
337 print $outFH "200D; ; MAP\n";
338 print $outFH "2060; ; MAP\n";
339 print $outFH "FE00; ; MAP\n";
340 print $outFH "FE01; ; MAP\n";
341 print $outFH "FE02; ; MAP\n";
342 print $outFH "FE03; ; MAP\n";
343 print $outFH "FE04; ; MAP\n";
344 print $outFH "FE05; ; MAP\n";
345 print $outFH "FE06; ; MAP\n";
346 print $outFH "FE07; ; MAP\n";
347 print $outFH "FE08; ; MAP\n";
348 print $outFH "FE09; ; MAP\n";
349 print $outFH "FE0A; ; MAP\n";
350 print $outFH "FE0B; ; MAP\n";
351 print $outFH "FE0C; ; MAP\n";
352 print $outFH "FE0D; ; MAP\n";
353 print $outFH "FE0E; ; MAP\n";
354 print $outFH "FE0F; ; MAP\n";
355 print $outFH "FEFF; ; MAP\n";
356 print $outFH "\n# Total code points 43\n";
357 close($outFH);
358 }
359 #-----------------------------------------------------------------------
360 sub create_LDAPMapTable{
361 ($inFH,$outfile,$line) = @_;
362 $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
363
364 $outFH = IO::File->new($outfile, "a")
365 or die "could not open the file $outfile for writing: $! \n";
366 print $outFH $comment;
367
368 # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
369 # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
370 # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
371 # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
372 # mapped to nothing.
373
374 print $outFH "00AD; ; MAP\n";
375 print $outFH "034F; ; MAP\n";
376 print $outFH "1806; ; MAP\n";
377 print $outFH "180B; ; MAP\n";
378 print $outFH "180C; ; MAP\n";
379 print $outFH "180D; ; MAP\n";
380 print $outFH "FE00; ; MAP\n";
381 print $outFH "FE01; ; MAP\n";
382 print $outFH "FE02; ; MAP\n";
383 print $outFH "FE03; ; MAP\n";
384 print $outFH "FE04; ; MAP\n";
385 print $outFH "FE05; ; MAP\n";
386 print $outFH "FE06; ; MAP\n";
387 print $outFH "FE07; ; MAP\n";
388 print $outFH "FE08; ; MAP\n";
389 print $outFH "FE09; ; MAP\n";
390 print $outFH "FE0A; ; MAP\n";
391 print $outFH "FE0B; ; MAP\n";
392 print $outFH "FE0C; ; MAP\n";
393 print $outFH "FE0D; ; MAP\n";
394 print $outFH "FE0E; ; MAP\n";
395 print $outFH "FE0F; ; MAP\n";
396 print $outFH "FFFC; ; MAP\n";
397
398 # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
399 # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
400 # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
401
402 print $outFH "0009; 0020; MAP\n";
403 print $outFH "000A; 0020; MAP\n";
404 print $outFH "000B; 0020; MAP\n";
405 print $outFH "000C; 0020; MAP\n";
406 print $outFH "000D; 0020; MAP\n";
407 print $outFH "0085; 0020; MAP\n";
408
409 # All other control code (e.g., Cc) points or code points with a
410 # control function (e.g., Cf) are mapped to nothing. The following is
411 # a complete list of these code points: U+0000-0008, 000E-001F, 007F-
412 # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
413 # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
414
415 print $outFH "0000; ; MAP\n";
416 print $outFH "0001; ; MAP\n";
417 print $outFH "0002; ; MAP\n";
418 print $outFH "0003; ; MAP\n";
419 print $outFH "0004; ; MAP\n";
420 print $outFH "0005; ; MAP\n";
421 print $outFH "0006; ; MAP\n";
422 print $outFH "0007; ; MAP\n";
423 print $outFH "0008; ; MAP\n";
424 print $outFH "000E; ; MAP\n";
425 print $outFH "000F; ; MAP\n";
426 print $outFH "0010; ; MAP\n";
427 print $outFH "0011; ; MAP\n";
428 print $outFH "0012; ; MAP\n";
429 print $outFH "0013; ; MAP\n";
430 print $outFH "0014; ; MAP\n";
431 print $outFH "0015; ; MAP\n";
432 print $outFH "0016; ; MAP\n";
433 print $outFH "0017; ; MAP\n";
434 print $outFH "0018; ; MAP\n";
435 print $outFH "0019; ; MAP\n";
436 print $outFH "001A; ; MAP\n";
437 print $outFH "001B; ; MAP\n";
438 print $outFH "001C; ; MAP\n";
439 print $outFH "001D; ; MAP\n";
440 print $outFH "001E; ; MAP\n";
441 print $outFH "001F; ; MAP\n";
442 print $outFH "007F; ; MAP\n";
443 print $outFH "0080; ; MAP\n";
444 print $outFH "0081; ; MAP\n";
445 print $outFH "0082; ; MAP\n";
446 print $outFH "0083; ; MAP\n";
447 print $outFH "0084; ; MAP\n";
448 print $outFH "0086; ; MAP\n";
449 print $outFH "0087; ; MAP\n";
450 print $outFH "0088; ; MAP\n";
451 print $outFH "0089; ; MAP\n";
452 print $outFH "008A; ; MAP\n";
453 print $outFH "008B; ; MAP\n";
454 print $outFH "008C; ; MAP\n";
455 print $outFH "008D; ; MAP\n";
456 print $outFH "008E; ; MAP\n";
457 print $outFH "008F; ; MAP\n";
458 print $outFH "0090; ; MAP\n";
459 print $outFH "0091; ; MAP\n";
460 print $outFH "0092; ; MAP\n";
461 print $outFH "0093; ; MAP\n";
462 print $outFH "0094; ; MAP\n";
463 print $outFH "0095; ; MAP\n";
464 print $outFH "0096; ; MAP\n";
465 print $outFH "0097; ; MAP\n";
466 print $outFH "0098; ; MAP\n";
467 print $outFH "0099; ; MAP\n";
468 print $outFH "009A; ; MAP\n";
469 print $outFH "009B; ; MAP\n";
470 print $outFH "009C; ; MAP\n";
471 print $outFH "009D; ; MAP\n";
472 print $outFH "009E; ; MAP\n";
473 print $outFH "009F; ; MAP\n";
474 print $outFH "06DD; ; MAP\n";
475 print $outFH "070F; ; MAP\n";
476 print $outFH "180E; ; MAP\n";
477 print $outFH "200C; ; MAP\n";
478 print $outFH "200D; ; MAP\n";
479 print $outFH "200E; ; MAP\n";
480 print $outFH "200F; ; MAP\n";
481 print $outFH "202A; ; MAP\n";
482 print $outFH "202B; ; MAP\n";
483 print $outFH "202C; ; MAP\n";
484 print $outFH "202D; ; MAP\n";
485 print $outFH "202E; ; MAP\n";
486 print $outFH "2060; ; MAP\n";
487 print $outFH "2061; ; MAP\n";
488 print $outFH "2062; ; MAP\n";
489 print $outFH "2063; ; MAP\n";
490 print $outFH "206A; ; MAP\n";
491 print $outFH "206B; ; MAP\n";
492 print $outFH "206C; ; MAP\n";
493 print $outFH "206D; ; MAP\n";
494 print $outFH "206E; ; MAP\n";
495 print $outFH "206F; ; MAP\n";
496 print $outFH "FEFF; ; MAP\n";
497 print $outFH "FFF9; ; MAP\n";
498 print $outFH "FFFA; ; MAP\n";
499 print $outFH "FFFB; ; MAP\n";
500 print $outFH "1D173; ; MAP\n";
501 print $outFH "1D174; ; MAP\n";
502 print $outFH "1D175; ; MAP\n";
503 print $outFH "1D176; ; MAP\n";
504 print $outFH "1D177; ; MAP\n";
505 print $outFH "1D178; ; MAP\n";
506 print $outFH "1D179; ; MAP\n";
507 print $outFH "1D17A; ; MAP\n";
508 print $outFH "E0001; ; MAP\n";
509 print $outFH "E0020; ; MAP\n";
510 print $outFH "E0021; ; MAP\n";
511 print $outFH "E0022; ; MAP\n";
512 print $outFH "E0023; ; MAP\n";
513 print $outFH "E0024; ; MAP\n";
514 print $outFH "E0025; ; MAP\n";
515 print $outFH "E0026; ; MAP\n";
516 print $outFH "E0027; ; MAP\n";
517 print $outFH "E0028; ; MAP\n";
518 print $outFH "E0029; ; MAP\n";
519 print $outFH "E002A; ; MAP\n";
520 print $outFH "E002B; ; MAP\n";
521 print $outFH "E002C; ; MAP\n";
522 print $outFH "E002D; ; MAP\n";
523 print $outFH "E002E; ; MAP\n";
524 print $outFH "E002F; ; MAP\n";
525 print $outFH "E0030; ; MAP\n";
526 print $outFH "E0031; ; MAP\n";
527 print $outFH "E0032; ; MAP\n";
528 print $outFH "E0033; ; MAP\n";
529 print $outFH "E0034; ; MAP\n";
530 print $outFH "E0035; ; MAP\n";
531 print $outFH "E0036; ; MAP\n";
532 print $outFH "E0037; ; MAP\n";
533 print $outFH "E0038; ; MAP\n";
534 print $outFH "E0039; ; MAP\n";
535 print $outFH "E003A; ; MAP\n";
536 print $outFH "E003B; ; MAP\n";
537 print $outFH "E003C; ; MAP\n";
538 print $outFH "E003D; ; MAP\n";
539 print $outFH "E003E; ; MAP\n";
540 print $outFH "E003F; ; MAP\n";
541 print $outFH "E0040; ; MAP\n";
542 print $outFH "E0041; ; MAP\n";
543 print $outFH "E0042; ; MAP\n";
544 print $outFH "E0043; ; MAP\n";
545 print $outFH "E0044; ; MAP\n";
546 print $outFH "E0045; ; MAP\n";
547 print $outFH "E0046; ; MAP\n";
548 print $outFH "E0047; ; MAP\n";
549 print $outFH "E0048; ; MAP\n";
550 print $outFH "E0049; ; MAP\n";
551 print $outFH "E004A; ; MAP\n";
552 print $outFH "E004B; ; MAP\n";
553 print $outFH "E004C; ; MAP\n";
554 print $outFH "E004D; ; MAP\n";
555 print $outFH "E004E; ; MAP\n";
556 print $outFH "E004F; ; MAP\n";
557 print $outFH "E0050; ; MAP\n";
558 print $outFH "E0051; ; MAP\n";
559 print $outFH "E0052; ; MAP\n";
560 print $outFH "E0053; ; MAP\n";
561 print $outFH "E0054; ; MAP\n";
562 print $outFH "E0055; ; MAP\n";
563 print $outFH "E0056; ; MAP\n";
564 print $outFH "E0057; ; MAP\n";
565 print $outFH "E0058; ; MAP\n";
566 print $outFH "E0059; ; MAP\n";
567 print $outFH "E005A; ; MAP\n";
568 print $outFH "E005B; ; MAP\n";
569 print $outFH "E005C; ; MAP\n";
570 print $outFH "E005D; ; MAP\n";
571 print $outFH "E005E; ; MAP\n";
572 print $outFH "E005F; ; MAP\n";
573 print $outFH "E0060; ; MAP\n";
574 print $outFH "E0061; ; MAP\n";
575 print $outFH "E0062; ; MAP\n";
576 print $outFH "E0063; ; MAP\n";
577 print $outFH "E0064; ; MAP\n";
578 print $outFH "E0065; ; MAP\n";
579 print $outFH "E0066; ; MAP\n";
580 print $outFH "E0067; ; MAP\n";
581 print $outFH "E0068; ; MAP\n";
582 print $outFH "E0069; ; MAP\n";
583 print $outFH "E006A; ; MAP\n";
584 print $outFH "E006B; ; MAP\n";
585 print $outFH "E006C; ; MAP\n";
586 print $outFH "E006D; ; MAP\n";
587 print $outFH "E006E; ; MAP\n";
588 print $outFH "E006F; ; MAP\n";
589 print $outFH "E0070; ; MAP\n";
590 print $outFH "E0071; ; MAP\n";
591 print $outFH "E0072; ; MAP\n";
592 print $outFH "E0073; ; MAP\n";
593 print $outFH "E0074; ; MAP\n";
594 print $outFH "E0075; ; MAP\n";
595 print $outFH "E0076; ; MAP\n";
596 print $outFH "E0077; ; MAP\n";
597 print $outFH "E0078; ; MAP\n";
598 print $outFH "E0079; ; MAP\n";
599 print $outFH "E007A; ; MAP\n";
600 print $outFH "E007B; ; MAP\n";
601 print $outFH "E007C; ; MAP\n";
602 print $outFH "E007D; ; MAP\n";
603 print $outFH "E007E; ; MAP\n";
604 print $outFH "E007F; ; MAP\n";
605
606 # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
607 # points with Separator (space, line, or paragraph) property (e.g., Zs,
608 # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
609 # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
610 # 202F, 205F, 3000.
611
612 print $outFH "200B; ; MAP\n";
613 print $outFH "00A0; 0020; MAP\n";
614 print $outFH "1680; 0020; MAP\n";
615 print $outFH "2000; 0020; MAP\n";
616 print $outFH "2001; 0020; MAP\n";
617 print $outFH "2002; 0020; MAP\n";
618 print $outFH "2003; 0020; MAP\n";
619 print $outFH "2004; 0020; MAP\n";
620 print $outFH "2005; 0020; MAP\n";
621 print $outFH "2006; 0020; MAP\n";
622 print $outFH "2007; 0020; MAP\n";
623 print $outFH "2008; 0020; MAP\n";
624 print $outFH "2009; 0020; MAP\n";
625 print $outFH "200A; 0020; MAP\n";
626 print $outFH "2028; 0020; MAP\n";
627 print $outFH "2029; 0020; MAP\n";
628 print $outFH "202F; 0020; MAP\n";
629 print $outFH "205F; 0020; MAP\n";
630 print $outFH "3000; 0020; MAP\n";
631
632 print $outFH "\n# Total code points 238\n";
633 close($outFH);
634 }
635 #-----------------------------------------------------------------------
636 sub usage {
637 print << "END";
638 Usage:
639 filterRFC3454.pl
640 Options:
641 --sourcedir=<directory>
642 --destdir=<directory>
643 --src-filename=<name of RFC file>
644 --dest-filename=<name of destination file>
645 --A1 Generate data for table A.1
646 --B1 Generate data for table B.1
647 --B2 Generate data for table B.2
648 --B3 Generate data for table B.3
649 --C11 Generate data for table C.1.1
650 --C12 Generate data for table C.1.2
651 --C21 Generate data for table C.2.1
652 --C22 Generate data for table C.2.2
653 --C3 Generate data for table C.3
654 --C4 Generate data for table C.4
655 --C5 Generate data for table C.5
656 --C6 Generate data for table C.6
657 --C7 Generate data for table C.7
658 --C8 Generate data for table C.8
659 --C9 Generate data for table C.9
660 --iscsi Generate data for iSCSI extra prohibited table
661 --xmpp-node Generate data for XMPP extra prohibited table
662 --sasl Generate data for SASL map table
663 --ldap Generate data for LDAP map table
664 --normalize Embed the normalization directive in the output file
665 --check-bidi Embed the check bidi directove in the output file
666
667 Note, --B2 and --B3 are mutually exclusive.
668
669 e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
670
671 filterRFC3454.pl filters the RFC file and creates String prep table files.
672 The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
673
674 END
675 exit(0);
676 }
677
678