]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | #!/usr/bin/perl |
2 | #* | |
3 | #******************************************************************************* | |
4 | #* Copyright (C) 2006, International Business Machines | |
5 | #* Corporation and others. All Rights Reserved. | |
6 | #******************************************************************************* | |
7 | #* | |
8 | #* file name: genspva.pl | |
9 | #* encoding: US-ASCII | |
10 | #* tab size: 8 (not used) | |
11 | #* indentation:4 | |
12 | #* | |
13 | #* Created by: Ram Viswanadha | |
14 | #* | |
15 | #* This file filters iso15924-utf8-<date>.txt | |
16 | #* | |
17 | ||
18 | use File::Find; | |
19 | use File::Basename; | |
20 | use IO::File; | |
21 | use Cwd; | |
22 | use File::Copy; | |
23 | use Getopt::Long; | |
24 | use File::Path; | |
25 | use File::Copy; | |
26 | ||
27 | #run the program | |
28 | main(); | |
29 | ||
30 | #--------------------------------------------------------------------- | |
31 | # The main program | |
32 | ||
33 | sub main(){ | |
34 | GetOptions( | |
35 | "--destdir=s" => \$destdir, | |
36 | "--iso15924=s" => \$iso, | |
37 | "--prop=s" => \$prop, | |
38 | "--code-start=s" => \$code, | |
39 | ); | |
40 | usage() unless defined $destdir; | |
41 | usage() unless defined $iso; | |
42 | usage() unless defined $prop; | |
43 | ||
44 | $outfile = "$destdir/SyntheticPropertyValueAliases.txt"; | |
45 | $propFH = IO::File->new($prop,"r") | |
46 | or die "could not open the file $prop for reading: $! \n"; | |
47 | $isoFH = IO::File->new($iso,"r") | |
48 | or die "could not open the file $iso for reading: $! \n"; | |
49 | $outFH = IO::File->new($outfile,"w") | |
50 | or die "could not open the file $outfile for reading: $! \n"; | |
51 | my @propLines; | |
52 | while (<$propFH>) { | |
53 | next if(!($_ =~/sc ; /)); | |
54 | push(@propLines, $_); | |
55 | } | |
56 | printHeader($outFH); | |
57 | if(defined $code){ | |
58 | print "Please add the following to UScriptCode enum in uscript.h.\n"; | |
59 | print "#ifndef U_HIDE_DRAFT_API\n"; | |
60 | } | |
61 | while (<$isoFH>) { | |
62 | next if($_=~/^#/);#skip if the line starts with a comment char | |
63 | ($script, $t, $name, $rest) = split(/;/,$_,4); | |
64 | #sc ; Arab | |
65 | $outstr = "sc ; $script"; | |
66 | $encoded = 0; #false | |
67 | ||
68 | # seach the propLines to make sure that this scipt code is not | |
69 | # encoded in Unicode | |
70 | foreach $key (@propLines){ | |
71 | if($key =~ /$outstr/){ | |
72 | $encoded = 1; | |
73 | } | |
74 | } | |
75 | next if($encoded == 1); | |
76 | #ignore private use codes | |
77 | next if($script =~ /Qa[ab][a-z]/); | |
78 | ||
79 | #if($script eq "Qaaa"){ | |
80 | # $outstr = $outstr." ; Private_Use_Start\n"; | |
81 | #}elsif($script eq "Qabx"){ | |
82 | # $outstr = $outstr." ; Private_Use_End\n"; | |
83 | #}else{ | |
84 | # $outstr = $outstr." ; $script \n"; | |
85 | #} | |
86 | ||
87 | $outstr = $outstr." ; $script \n"; | |
88 | print $outFH $outstr; | |
89 | ||
90 | #print to console | |
91 | if(defined $code){ | |
92 | if($name =~ /[(\s,\x80-\xFF]/){ | |
93 | $name = $script; | |
94 | } | |
95 | $name =~s/-/_/g; | |
96 | ||
97 | $scriptcode = "USCRIPT_".uc($name); | |
98 | print " $scriptcode = $code, /* $script */\n"; | |
99 | $code++; | |
100 | } | |
101 | ||
102 | } | |
103 | if(defined $code){ | |
104 | print "#endif /* U_HIDE_DRAFT_API */\n"; | |
105 | } | |
106 | for($i=0; $i<2; $i++){ | |
107 | ||
108 | } | |
109 | close($isoFH); | |
110 | close($propFH); | |
111 | close($outFH); | |
112 | } | |
113 | #----------------------------------------------------------------------- | |
114 | sub printHeader{ | |
115 | ($outFH) = @_; | |
116 | ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5]; | |
117 | $YEAR += 1900; | |
118 | #We will print our copyright here + warnings | |
119 | print $outFH <<END_HEADER_COMMENT; | |
120 | ######################################################################## | |
121 | # Copyright (c) 2006-$YEAR, International Business Machines | |
122 | # Corporation and others. All Rights Reserved. | |
123 | ######################################################################## | |
124 | # file name: SyntheticPropertyValueAliases.txt | |
125 | # encoding: US-ASCII | |
126 | # tab size: 8 (not used) | |
127 | # indentation: 4 | |
128 | # created by: gensvpa.pl | |
129 | ######################################################################## | |
130 | ||
131 | # This file follows the format of PropertyValueAliases.txt | |
132 | # It contains synthetic property value aliases not present | |
133 | # in the UCD. Unlike PropertyValueAliases.txt, it should | |
134 | # NOT contain a version number. | |
135 | ||
136 | ######################################################################## | |
137 | # THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW | |
138 | # WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! | |
139 | ######################################################################## | |
140 | ||
141 | # set the same names as short and long names to fit the syntax without | |
142 | # inventing names that we would have to support forever | |
143 | ||
144 | # Script (sc) | |
145 | ||
146 | END_HEADER_COMMENT | |
147 | } | |
148 | #----------------------------------------------------------------------- | |
149 | sub usage { | |
150 | print << "END"; | |
151 | Usage: | |
152 | gensvpa.pl | |
153 | Options: | |
154 | --destdir=<directory> | |
155 | --iso15924=<file name> | |
156 | --prop=<PropertyValueAliases.txt> | |
157 | --code-start=s | |
158 | e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60 | |
159 | END | |
160 | exit(0); | |
161 | } |