2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
3 # License & terms of use: http://www.unicode.org/copyright.html
4 # Copyright (c) 2001-2015 International Business Machines
5 # Corporation and others. All Rights Reserved.
7 ####################################################################################
9 # This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
10 # to be used in NamePrepProfile
12 # Author: Ram Viswanadha
14 ####################################################################################
26 $icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
27 $copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002). All Rights Reserved. \n###################\n\n";
28 $warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
32 #---------------------------------------------------------------------
37 "--sourcedir=s" => \
$sourceDir,
38 "--destdir=s" => \
$destDir,
39 "--src-filename=s" => \
$srcFileName,
40 "--dest-filename=s" => \
$destFileName,
56 "--iscsi" => \
$writeISCSIProhibitedExtra,
57 "--xmpp-node" => \
$writeXMPPNodeProhibitedExtra,
58 "--sasl" => \
$writeSASLMap,
59 "--ldap" => \
$writeLDAPMap,
60 "--normalize" => \
$norm,
61 "--check-bidi" => \
$checkBidi,
63 usage
() unless defined $sourceDir;
64 usage
() unless defined $destDir;
65 usage
() unless defined $srcFileName;
66 usage
() unless defined $destFileName;
68 $infile = $sourceDir."/".$srcFileName;
69 $inFH = IO
::File-
>new($infile,"r")
70 or die "could not open the file $infile for reading: $! \n";
71 $outfile = $destDir."/".$destFileName;
74 $outFH = IO
::File-
>new($outfile,"a")
75 or die "could not open the file $outfile for writing: $! \n";
77 printf $outFH $icu_copyright, localtime-
>year()+1900;
78 print $outFH $copyright;
79 print $outFH $warning;
82 print $outFH "\@normalize;;\n";
84 if(defined $checkBidi) {
85 print $outFH "\@check-bidi;;\n";
90 if(defined $b2 && defined $b3){
91 die "ERROR: --B2 and --B3 are both specified\!\n";
94 while(defined ($line=<$inFH>)){
95 next unless $line=~ /Start\sTable/;
97 createUnassignedTable
($inFH,$outfile);
99 if($line =~ /B.1/ && defined $b1){
100 createMapToNothing
($inFH,$outfile);
102 if($line =~ /B.2/ && defined $b2){
103 createCaseMapNorm
($inFH,$outfile);
105 if($line =~ /B.3/ && defined $b3){
106 createCaseMapNoNorm
($inFH,$outfile);
108 if($line =~ /C.1.1/ && defined $c11 ){
109 createProhibitedTable
($inFH,$outfile,$line);
111 if($line =~ /C.1.2/ && defined $c12 ){
112 createProhibitedTable
($inFH,$outfile,$line);
114 if($line =~ /C.2.1/ && defined $c21 ){
115 createProhibitedTable
($inFH,$outfile,$line);
117 if($line =~ /C.2.2/ && defined $c22 ){
118 createProhibitedTable
($inFH,$outfile,$line);
120 if($line =~ /C.3/ && defined $c3 ){
121 createProhibitedTable
($inFH,$outfile,$line);
123 if($line =~ /C.4/ && defined $c4 ){
124 createProhibitedTable
($inFH,$outfile,$line);
126 if($line =~ /C.5/ && defined $c5 ){
127 createProhibitedTable
($inFH,$outfile,$line);
129 if($line =~ /C.6/ && defined $c6 ){
130 createProhibitedTable
($inFH,$outfile,$line);
132 if($line =~ /C.7/ && defined $c7 ){
133 createProhibitedTable
($inFH,$outfile,$line);
135 if($line =~ /C.8/ && defined $c8 ){
136 createProhibitedTable
($inFH,$outfile,$line);
138 if($line =~ /C.9/ && defined $c9 ){
139 createProhibitedTable
($inFH,$outfile,$line);
142 if( defined $writeISCSIProhibitedExtra){
143 create_iSCSIExtraProhibitedTable
($inFH, $outfile);
145 if( defined $writeXMPPNodeProhibitedExtra){
146 create_XMPPNodeExtraProhibitedTable
($inFH, $outfile);
148 if( defined $writeSASLMap){
149 create_SASLMapTable
($inFH, $outfile);
151 if( defined $writeLDAPMap){
152 create_LDAPMapTable
($inFH, $outfile);
157 #-----------------------------------------------------------------------
159 local ($inFH, $outFH,$comment, $table) = @_;
161 print $outFH $comment."\n";
162 while(defined ($line = <$inFH>)){
163 next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading
164 next if $line =~ /RFC\s3454/; # ignore heading
165 next if $line =~ /\f/; # ignore form feed
166 next if $line eq "\n"; # ignore blank lines
167 # break if "End Table" is found
168 if( $line =~ /End\sTable/){
169 print $outFH "\n# Total code points $count\n\n";
182 ($code, $noise) = split /;/ , $line;
183 $line = $code."; ; UNASSIGNED\n";
184 }elsif ( $table =~ /B\.1/ ){
185 $line =~ s/Map to nothing/MAP/;
186 }elsif ( $table =~ /B\.[23]/ ){
187 $line =~ s/Case map/MAP/;
188 $line =~ s/Additional folding/MAP/;
189 }elsif ( $table =~ /C/ ) {
190 ($code, $noise) = split /;/ , $line;
191 $line = $code."; ; PROHIBITED\n";
194 ($code, $noise) = split /;/ , $line;
195 ($startStr, $endStr ) = split /\.\./, $code;
196 $start = atoi
($startStr);
197 $end = atoi
($endStr);
198 #print $start." ".$end."\n";
199 while($start <= $end){
209 #-----------------------------------------------------------------------
212 foreach my $d (split(//, shift())) {
217 #-----------------------------------------------------------------------
218 sub createUnassignedTable
{
219 ($inFH,$outfile) = @_;
220 $outFH = IO
::File-
>new($outfile,"a")
221 or die "could not open the file $outfile for writing: $! \n";
222 $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
223 readPrint
($inFH,$outFH, $comment, "A");
226 #-----------------------------------------------------------------------
227 sub createMapToNothing
{
228 ($inFH,$outfile) = @_;
229 $outFH = IO
::File-
>new($outfile,"a")
230 or die "could not open the file $outfile for writing: $! \n";
231 $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
232 readPrint
($inFH,$outFH,$comment, "B.1");
235 #-----------------------------------------------------------------------
236 sub createCaseMapNorm
{
237 ($inFH,$outfile) = @_;
238 $outFH = IO
::File-
>new($outfile,"a")
239 or die "could not open the file $outfile for writing: $! \n";
240 $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
241 readPrint
($inFH,$outFH,$comment, "B.2");
244 #-----------------------------------------------------------------------
245 sub createCaseMapNoNorm
{
246 ($inFH,$outfile) = @_;
247 $outFH = IO
::File-
>new($outfile,"a")
248 or die "could not open the file $outfile for writing: $! \n";
249 $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
250 readPrint
($inFH,$outFH,$comment, "B.3");
253 #-----------------------------------------------------------------------
254 sub createProhibitedTable
{
255 ($inFH,$outfile,$line) = @_;
258 $comment = "# code points from $line";
260 $outFH = IO
::File-
>new($outfile, "a")
261 or die "could not open the file $outfile for writing: $! \n";
262 readPrint
($inFH,$outFH,$comment, "C");
266 #-----------------------------------------------------------------------
267 sub create_iSCSIExtraProhibitedTable
{
268 ($inFH,$outfile,$line) = @_;
269 $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
271 $outFH = IO
::File-
>new($outfile, "a")
272 or die "could not open the file $outfile for writing: $! \n";
273 print $outFH $comment;
274 print $outFH "0021..002C; ; PROHIBITED\n";
275 print $outFH "002F; ; PROHIBITED\n";
276 print $outFH "003B..0040; ; PROHIBITED\n";
277 print $outFH "005B..0060; ; PROHIBITED\n";
278 print $outFH "007B..007E; ; PROHIBITED\n";
279 print $outFH "3002; ; PROHIBITED\n";
280 print $outFH "\n# Total code points 30\n";
283 #-----------------------------------------------------------------------
284 sub create_XMPPNodeExtraProhibitedTable
{
285 ($inFH,$outfile,$line) = @_;
286 $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
288 $outFH = IO
::File-
>new($outfile, "a")
289 or die "could not open the file $outfile for writing: $! \n";
290 print $outFH $comment;
291 print $outFH "0022; ; PROHIBITED\n";
292 print $outFH "0026; ; PROHIBITED\n";
293 print $outFH "0027; ; PROHIBITED\n";
294 print $outFH "002F; ; PROHIBITED\n";
295 print $outFH "003A; ; PROHIBITED\n";
296 print $outFH "003C; ; PROHIBITED\n";
297 print $outFH "003E; ; PROHIBITED\n";
298 print $outFH "0040; ; PROHIBITED\n";
299 print $outFH "\n# Total code points 8\n";
302 #-----------------------------------------------------------------------
303 sub create_SASLMapTable
{
304 ($inFH,$outfile,$line) = @_;
305 $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
307 $outFH = IO
::File-
>new($outfile, "a")
308 or die "could not open the file $outfile for writing: $! \n";
309 print $outFH $comment;
310 # non-ASCII space characters [C.1.2] to SPACE
311 print $outFH "00A0; 0020; MAP\n";
312 print $outFH "1680; 0020; MAP\n";
313 print $outFH "2000; 0020; MAP\n";
314 print $outFH "2001; 0020; MAP\n";
315 print $outFH "2002; 0020; MAP\n";
316 print $outFH "2003; 0020; MAP\n";
317 print $outFH "2004; 0020; MAP\n";
318 print $outFH "2005; 0020; MAP\n";
319 print $outFH "2006; 0020; MAP\n";
320 print $outFH "2007; 0020; MAP\n";
321 print $outFH "2008; 0020; MAP\n";
322 print $outFH "2009; 0020; MAP\n";
323 print $outFH "200A; 0020; MAP\n";
324 print $outFH "200B; 0020; MAP\n";
325 print $outFH "202F; 0020; MAP\n";
326 print $outFH "205F; 0020; MAP\n";
327 print $outFH "3000; 0020; MAP\n";
329 # commonly mapped to nothing characters except U+200B to nothing
330 print $outFH "00AD; ; MAP\n";
331 print $outFH "034F; ; MAP\n";
332 print $outFH "1806; ; MAP\n";
333 print $outFH "180B; ; MAP\n";
334 print $outFH "180C; ; MAP\n";
335 print $outFH "180D; ; MAP\n";
336 print $outFH "200C; ; MAP\n";
337 print $outFH "200D; ; MAP\n";
338 print $outFH "2060; ; MAP\n";
339 print $outFH "FE00; ; MAP\n";
340 print $outFH "FE01; ; MAP\n";
341 print $outFH "FE02; ; MAP\n";
342 print $outFH "FE03; ; MAP\n";
343 print $outFH "FE04; ; MAP\n";
344 print $outFH "FE05; ; MAP\n";
345 print $outFH "FE06; ; MAP\n";
346 print $outFH "FE07; ; MAP\n";
347 print $outFH "FE08; ; MAP\n";
348 print $outFH "FE09; ; MAP\n";
349 print $outFH "FE0A; ; MAP\n";
350 print $outFH "FE0B; ; MAP\n";
351 print $outFH "FE0C; ; MAP\n";
352 print $outFH "FE0D; ; MAP\n";
353 print $outFH "FE0E; ; MAP\n";
354 print $outFH "FE0F; ; MAP\n";
355 print $outFH "FEFF; ; MAP\n";
356 print $outFH "\n# Total code points 43\n";
359 #-----------------------------------------------------------------------
360 sub create_LDAPMapTable
{
361 ($inFH,$outfile,$line) = @_;
362 $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
364 $outFH = IO
::File-
>new($outfile, "a")
365 or die "could not open the file $outfile for writing: $! \n";
366 print $outFH $comment;
368 # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
369 # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
370 # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
371 # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
374 print $outFH "00AD; ; MAP\n";
375 print $outFH "034F; ; MAP\n";
376 print $outFH "1806; ; MAP\n";
377 print $outFH "180B; ; MAP\n";
378 print $outFH "180C; ; MAP\n";
379 print $outFH "180D; ; MAP\n";
380 print $outFH "FE00; ; MAP\n";
381 print $outFH "FE01; ; MAP\n";
382 print $outFH "FE02; ; MAP\n";
383 print $outFH "FE03; ; MAP\n";
384 print $outFH "FE04; ; MAP\n";
385 print $outFH "FE05; ; MAP\n";
386 print $outFH "FE06; ; MAP\n";
387 print $outFH "FE07; ; MAP\n";
388 print $outFH "FE08; ; MAP\n";
389 print $outFH "FE09; ; MAP\n";
390 print $outFH "FE0A; ; MAP\n";
391 print $outFH "FE0B; ; MAP\n";
392 print $outFH "FE0C; ; MAP\n";
393 print $outFH "FE0D; ; MAP\n";
394 print $outFH "FE0E; ; MAP\n";
395 print $outFH "FE0F; ; MAP\n";
396 print $outFH "FFFC; ; MAP\n";
398 # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
399 # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
400 # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
402 print $outFH "0009; 0020; MAP\n";
403 print $outFH "000A; 0020; MAP\n";
404 print $outFH "000B; 0020; MAP\n";
405 print $outFH "000C; 0020; MAP\n";
406 print $outFH "000D; 0020; MAP\n";
407 print $outFH "0085; 0020; MAP\n";
409 # All other control code (e.g., Cc) points or code points with a
410 # control function (e.g., Cf) are mapped to nothing. The following is
411 # a complete list of these code points: U+0000-0008, 000E-001F, 007F-
412 # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
413 # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
415 print $outFH "0000; ; MAP\n";
416 print $outFH "0001; ; MAP\n";
417 print $outFH "0002; ; MAP\n";
418 print $outFH "0003; ; MAP\n";
419 print $outFH "0004; ; MAP\n";
420 print $outFH "0005; ; MAP\n";
421 print $outFH "0006; ; MAP\n";
422 print $outFH "0007; ; MAP\n";
423 print $outFH "0008; ; MAP\n";
424 print $outFH "000E; ; MAP\n";
425 print $outFH "000F; ; MAP\n";
426 print $outFH "0010; ; MAP\n";
427 print $outFH "0011; ; MAP\n";
428 print $outFH "0012; ; MAP\n";
429 print $outFH "0013; ; MAP\n";
430 print $outFH "0014; ; MAP\n";
431 print $outFH "0015; ; MAP\n";
432 print $outFH "0016; ; MAP\n";
433 print $outFH "0017; ; MAP\n";
434 print $outFH "0018; ; MAP\n";
435 print $outFH "0019; ; MAP\n";
436 print $outFH "001A; ; MAP\n";
437 print $outFH "001B; ; MAP\n";
438 print $outFH "001C; ; MAP\n";
439 print $outFH "001D; ; MAP\n";
440 print $outFH "001E; ; MAP\n";
441 print $outFH "001F; ; MAP\n";
442 print $outFH "007F; ; MAP\n";
443 print $outFH "0080; ; MAP\n";
444 print $outFH "0081; ; MAP\n";
445 print $outFH "0082; ; MAP\n";
446 print $outFH "0083; ; MAP\n";
447 print $outFH "0084; ; MAP\n";
448 print $outFH "0086; ; MAP\n";
449 print $outFH "0087; ; MAP\n";
450 print $outFH "0088; ; MAP\n";
451 print $outFH "0089; ; MAP\n";
452 print $outFH "008A; ; MAP\n";
453 print $outFH "008B; ; MAP\n";
454 print $outFH "008C; ; MAP\n";
455 print $outFH "008D; ; MAP\n";
456 print $outFH "008E; ; MAP\n";
457 print $outFH "008F; ; MAP\n";
458 print $outFH "0090; ; MAP\n";
459 print $outFH "0091; ; MAP\n";
460 print $outFH "0092; ; MAP\n";
461 print $outFH "0093; ; MAP\n";
462 print $outFH "0094; ; MAP\n";
463 print $outFH "0095; ; MAP\n";
464 print $outFH "0096; ; MAP\n";
465 print $outFH "0097; ; MAP\n";
466 print $outFH "0098; ; MAP\n";
467 print $outFH "0099; ; MAP\n";
468 print $outFH "009A; ; MAP\n";
469 print $outFH "009B; ; MAP\n";
470 print $outFH "009C; ; MAP\n";
471 print $outFH "009D; ; MAP\n";
472 print $outFH "009E; ; MAP\n";
473 print $outFH "009F; ; MAP\n";
474 print $outFH "06DD; ; MAP\n";
475 print $outFH "070F; ; MAP\n";
476 print $outFH "180E; ; MAP\n";
477 print $outFH "200C; ; MAP\n";
478 print $outFH "200D; ; MAP\n";
479 print $outFH "200E; ; MAP\n";
480 print $outFH "200F; ; MAP\n";
481 print $outFH "202A; ; MAP\n";
482 print $outFH "202B; ; MAP\n";
483 print $outFH "202C; ; MAP\n";
484 print $outFH "202D; ; MAP\n";
485 print $outFH "202E; ; MAP\n";
486 print $outFH "2060; ; MAP\n";
487 print $outFH "2061; ; MAP\n";
488 print $outFH "2062; ; MAP\n";
489 print $outFH "2063; ; MAP\n";
490 print $outFH "206A; ; MAP\n";
491 print $outFH "206B; ; MAP\n";
492 print $outFH "206C; ; MAP\n";
493 print $outFH "206D; ; MAP\n";
494 print $outFH "206E; ; MAP\n";
495 print $outFH "206F; ; MAP\n";
496 print $outFH "FEFF; ; MAP\n";
497 print $outFH "FFF9; ; MAP\n";
498 print $outFH "FFFA; ; MAP\n";
499 print $outFH "FFFB; ; MAP\n";
500 print $outFH "1D173; ; MAP\n";
501 print $outFH "1D174; ; MAP\n";
502 print $outFH "1D175; ; MAP\n";
503 print $outFH "1D176; ; MAP\n";
504 print $outFH "1D177; ; MAP\n";
505 print $outFH "1D178; ; MAP\n";
506 print $outFH "1D179; ; MAP\n";
507 print $outFH "1D17A; ; MAP\n";
508 print $outFH "E0001; ; MAP\n";
509 print $outFH "E0020; ; MAP\n";
510 print $outFH "E0021; ; MAP\n";
511 print $outFH "E0022; ; MAP\n";
512 print $outFH "E0023; ; MAP\n";
513 print $outFH "E0024; ; MAP\n";
514 print $outFH "E0025; ; MAP\n";
515 print $outFH "E0026; ; MAP\n";
516 print $outFH "E0027; ; MAP\n";
517 print $outFH "E0028; ; MAP\n";
518 print $outFH "E0029; ; MAP\n";
519 print $outFH "E002A; ; MAP\n";
520 print $outFH "E002B; ; MAP\n";
521 print $outFH "E002C; ; MAP\n";
522 print $outFH "E002D; ; MAP\n";
523 print $outFH "E002E; ; MAP\n";
524 print $outFH "E002F; ; MAP\n";
525 print $outFH "E0030; ; MAP\n";
526 print $outFH "E0031; ; MAP\n";
527 print $outFH "E0032; ; MAP\n";
528 print $outFH "E0033; ; MAP\n";
529 print $outFH "E0034; ; MAP\n";
530 print $outFH "E0035; ; MAP\n";
531 print $outFH "E0036; ; MAP\n";
532 print $outFH "E0037; ; MAP\n";
533 print $outFH "E0038; ; MAP\n";
534 print $outFH "E0039; ; MAP\n";
535 print $outFH "E003A; ; MAP\n";
536 print $outFH "E003B; ; MAP\n";
537 print $outFH "E003C; ; MAP\n";
538 print $outFH "E003D; ; MAP\n";
539 print $outFH "E003E; ; MAP\n";
540 print $outFH "E003F; ; MAP\n";
541 print $outFH "E0040; ; MAP\n";
542 print $outFH "E0041; ; MAP\n";
543 print $outFH "E0042; ; MAP\n";
544 print $outFH "E0043; ; MAP\n";
545 print $outFH "E0044; ; MAP\n";
546 print $outFH "E0045; ; MAP\n";
547 print $outFH "E0046; ; MAP\n";
548 print $outFH "E0047; ; MAP\n";
549 print $outFH "E0048; ; MAP\n";
550 print $outFH "E0049; ; MAP\n";
551 print $outFH "E004A; ; MAP\n";
552 print $outFH "E004B; ; MAP\n";
553 print $outFH "E004C; ; MAP\n";
554 print $outFH "E004D; ; MAP\n";
555 print $outFH "E004E; ; MAP\n";
556 print $outFH "E004F; ; MAP\n";
557 print $outFH "E0050; ; MAP\n";
558 print $outFH "E0051; ; MAP\n";
559 print $outFH "E0052; ; MAP\n";
560 print $outFH "E0053; ; MAP\n";
561 print $outFH "E0054; ; MAP\n";
562 print $outFH "E0055; ; MAP\n";
563 print $outFH "E0056; ; MAP\n";
564 print $outFH "E0057; ; MAP\n";
565 print $outFH "E0058; ; MAP\n";
566 print $outFH "E0059; ; MAP\n";
567 print $outFH "E005A; ; MAP\n";
568 print $outFH "E005B; ; MAP\n";
569 print $outFH "E005C; ; MAP\n";
570 print $outFH "E005D; ; MAP\n";
571 print $outFH "E005E; ; MAP\n";
572 print $outFH "E005F; ; MAP\n";
573 print $outFH "E0060; ; MAP\n";
574 print $outFH "E0061; ; MAP\n";
575 print $outFH "E0062; ; MAP\n";
576 print $outFH "E0063; ; MAP\n";
577 print $outFH "E0064; ; MAP\n";
578 print $outFH "E0065; ; MAP\n";
579 print $outFH "E0066; ; MAP\n";
580 print $outFH "E0067; ; MAP\n";
581 print $outFH "E0068; ; MAP\n";
582 print $outFH "E0069; ; MAP\n";
583 print $outFH "E006A; ; MAP\n";
584 print $outFH "E006B; ; MAP\n";
585 print $outFH "E006C; ; MAP\n";
586 print $outFH "E006D; ; MAP\n";
587 print $outFH "E006E; ; MAP\n";
588 print $outFH "E006F; ; MAP\n";
589 print $outFH "E0070; ; MAP\n";
590 print $outFH "E0071; ; MAP\n";
591 print $outFH "E0072; ; MAP\n";
592 print $outFH "E0073; ; MAP\n";
593 print $outFH "E0074; ; MAP\n";
594 print $outFH "E0075; ; MAP\n";
595 print $outFH "E0076; ; MAP\n";
596 print $outFH "E0077; ; MAP\n";
597 print $outFH "E0078; ; MAP\n";
598 print $outFH "E0079; ; MAP\n";
599 print $outFH "E007A; ; MAP\n";
600 print $outFH "E007B; ; MAP\n";
601 print $outFH "E007C; ; MAP\n";
602 print $outFH "E007D; ; MAP\n";
603 print $outFH "E007E; ; MAP\n";
604 print $outFH "E007F; ; MAP\n";
606 # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
607 # points with Separator (space, line, or paragraph) property (e.g., Zs,
608 # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
609 # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
612 print $outFH "200B; ; MAP\n";
613 print $outFH "00A0; 0020; MAP\n";
614 print $outFH "1680; 0020; MAP\n";
615 print $outFH "2000; 0020; MAP\n";
616 print $outFH "2001; 0020; MAP\n";
617 print $outFH "2002; 0020; MAP\n";
618 print $outFH "2003; 0020; MAP\n";
619 print $outFH "2004; 0020; MAP\n";
620 print $outFH "2005; 0020; MAP\n";
621 print $outFH "2006; 0020; MAP\n";
622 print $outFH "2007; 0020; MAP\n";
623 print $outFH "2008; 0020; MAP\n";
624 print $outFH "2009; 0020; MAP\n";
625 print $outFH "200A; 0020; MAP\n";
626 print $outFH "2028; 0020; MAP\n";
627 print $outFH "2029; 0020; MAP\n";
628 print $outFH "202F; 0020; MAP\n";
629 print $outFH "205F; 0020; MAP\n";
630 print $outFH "3000; 0020; MAP\n";
632 print $outFH "\n# Total code points 238\n";
635 #-----------------------------------------------------------------------
641 --sourcedir
=<directory
>
642 --destdir
=<directory
>
643 --src-filename
=<name of RFC file
>
644 --dest-filename
=<name of destination file
>
645 --A1 Generate data
for table A
.1
646 --B1 Generate data
for table B
.1
647 --B2 Generate data
for table B
.2
648 --B3 Generate data
for table B
.3
649 --C11 Generate data
for table C
.1.1
650 --C12 Generate data
for table C
.1.2
651 --C21 Generate data
for table C
.2.1
652 --C22 Generate data
for table C
.2.2
653 --C3 Generate data
for table C
.3
654 --C4 Generate data
for table C
.4
655 --C5 Generate data
for table C
.5
656 --C6 Generate data
for table C
.6
657 --C7 Generate data
for table C
.7
658 --C8 Generate data
for table C
.8
659 --C9 Generate data
for table C
.9
660 --iscsi Generate data
for iSCSI extra prohibited table
661 --xmpp-node Generate data
for XMPP extra prohibited table
662 --sasl Generate data
for SASL
map table
663 --ldap Generate data
for LDAP
map table
664 --normalize Embed the normalization directive
in the output file
665 --check-bidi Embed the check bidi directove
in the output file
667 Note
, --B2
and --B3 are mutually exclusive
.
669 e
.g
.: filterRFC3454
.pl
--sourcedir
=. --destdir
=./output
--src-filename
=rfc3454
.txt
--dest-filename
=NamePrepProfile
.txt
--A1
--B1
--B2
--C12
--C22
--C3
--C4
--C5
--C6
--C7
--C8
--C9
--normalize
--check-bidi
671 filterRFC3454
.pl filters the RFC file
and creates String prep table files
.
672 The RFC text can be downloaded from ftp
://ftp
.rfc-editor
.org
/in-notes/rfc3454
.txt