]>
Commit | Line | Data |
---|---|---|
46f4442e A |
1 | #!/usr/bin/perl |
2 | # ******************************************************************** | |
3 | # * COPYRIGHT: | |
4 | # * Copyright (c) 2002-2008, International Business Machines Corporation and | |
5 | # * others. All Rights Reserved. | |
6 | # ******************************************************************** | |
7 | ||
8 | require "../perldriver/Common.pl"; | |
9 | ||
10 | use lib '../perldriver'; | |
11 | ||
12 | my $p; | |
13 | if ($OnWindows) { | |
729e4ab9 | 14 | $p = $ICUPathLatest . "/collationperf/$WindowsPlatform/Release/collationperf.exe"; |
46f4442e A |
15 | } |
16 | else { | |
17 | $p = $ICUPathLatest . "/collationperf/collperf"; | |
18 | } | |
19 | ||
20 | my @locale = ( | |
21 | "en_US", | |
22 | "da_DK", | |
23 | "de_DE", | |
24 | "fr_FR", | |
25 | "ja_JP", | |
26 | "ja_JP", | |
27 | "ja_JP", | |
28 | "ja_JP", | |
29 | "zh_CN", | |
30 | "zh_CN", | |
31 | "zh_CN", | |
32 | "zh_TW", | |
33 | "zh_TW", | |
34 | "ko_KR", | |
35 | "ko_KR", | |
36 | "ru_RU", | |
37 | "ru_RU", | |
38 | "th_TH", | |
39 | "th_TH" | |
40 | ); | |
41 | ||
42 | my $filePath = $CollationDataPath . "/"; | |
43 | my $filePrefix = "TestNames_"; | |
44 | my @data = ( | |
45 | $filePrefix."Latin.txt", | |
46 | $filePrefix."Latin.txt", | |
47 | $filePrefix."Latin.txt", | |
48 | $filePrefix."Latin.txt", | |
49 | $filePrefix."Latin.txt", | |
50 | $filePrefix."Japanese_h.txt", | |
51 | $filePrefix."Japanese_k.txt", | |
52 | $filePrefix."Asian.txt", | |
53 | $filePrefix."Latin.txt", | |
54 | $filePrefix."Chinese.txt", | |
55 | $filePrefix."Simplified_Chinese.txt", | |
56 | $filePrefix."Latin.txt", | |
57 | $filePrefix."Chinese.txt", | |
58 | $filePrefix."Latin.txt", | |
59 | $filePrefix."Korean.txt", | |
60 | $filePrefix."Latin.txt", | |
61 | $filePrefix."Russian.txt", | |
62 | $filePrefix."Latin.txt", | |
63 | $filePrefix."Thai.txt" | |
64 | ); | |
65 | ||
66 | my @resultPER; | |
67 | my @resultFIN; | |
68 | ||
69 | for ( $n = 0 ; $n < @data ; $n++ ) { | |
70 | my $resultICU; | |
71 | my $resultNIX; | |
72 | $resultICU = @locale[$n].",".@data[$n].","; | |
73 | $resultNIX = @locale[$n].",".@data[$n].","; | |
74 | @resultFIN[$n] = @locale[$n].",".@data[$n].","; | |
75 | ||
76 | #quicksort | |
77 | my @icu = `$p -locale @locale[$n] -loop 1000 -file $filePath@data[$n] -qsort`; | |
78 | my @nix = `$p -locale @locale[$n] -unix -loop 1000 -file $filePath@data[$n] -qsort`; | |
79 | ||
80 | my @icua = split( ' = ', $icu[2] ); | |
81 | my @icub = split( ' ', $icua[1] ); | |
82 | my @nixa = split( ' = ', $nix[2] ); | |
83 | my @nixb = split( ' ', $nixa[1] ); | |
84 | ||
85 | $resultICU = $resultICU.$icub[0].","; | |
86 | $resultNIX = $resultNIX.$nixb[0].","; | |
87 | ||
88 | #keygen time | |
89 | @icu = `$p -locale @locale[$n] -loop 1000 -file $filePath@data[$n] -keygen`; | |
90 | @nix = `$p -locale @locale[$n] -unix -loop 1000 -file $filePath@data[$n] -keygen`; | |
91 | ||
92 | @icua = split( ' = ', $icu[2] ); | |
93 | @icub = split( ' ', $icua[1] ); | |
94 | @nixa = split( ' = ', $nix[2] ); | |
95 | @nixb = split( ' ', $nixa[1] ); | |
96 | ||
97 | $resultICU = $resultICU.$icub[0].","; | |
98 | $resultNIX = $resultNIX.$nixb[0].","; | |
99 | ||
100 | #keygen len | |
101 | @icua = split( ' = ', $icu[3] ); | |
102 | @nixa = split( ' = ', $nix[3] ); | |
103 | ||
104 | chomp( @icua[1] ); | |
105 | chomp( @nixa[1] ); | |
106 | ||
107 | $resultICU = $resultICU.$icua[1].","; | |
108 | $resultNIX = $resultNIX.$nixa[1].","; | |
109 | ||
110 | my @resultSplitICU; | |
111 | my @resultSplitNIX; | |
112 | ||
113 | #percent | |
114 | for ( $i = 0 ; $i < 3 ; $i++ ) { | |
115 | my $percent = 0; | |
116 | @resultSplitICU = split( ',', $resultICU ); | |
117 | @resultSplitNIX = split( ',', $resultNIX ); | |
118 | if ( @resultSplitICU[ 2 + $i ] > 0 ) { | |
119 | $percent = substr(((( | |
120 | @resultSplitNIX[ 2 + $i ] - @resultSplitICU[ 2 + $i ]) / @resultSplitICU[ 2 + $i ]) * 100), | |
121 | 0, 7); | |
122 | } | |
123 | @resultPER[$n] = @resultPER[$n].$percent."%,"; | |
124 | } | |
125 | ||
126 | #store ICU result | |
127 | for ( $j = 0 ; $j < 3 ; $j++ ) { | |
128 | @resultFIN[$n] = @resultFIN[$n].@resultSplitICU[ 2 + $j ].","; | |
129 | } | |
130 | ||
131 | #store Unix result | |
132 | for ( $j = 0 ; $j < 3 ; $j++ ) { | |
133 | @resultFIN[$n] = @resultFIN[$n].@resultSplitNIX[ 2 + $j ].","; | |
134 | } | |
135 | ||
136 | #store Percent result | |
137 | @resultFIN[$n] = @resultFIN[$n].@resultPER[$n]; | |
138 | } | |
139 | ||
140 | # Print the results in a HTML page | |
141 | printOutput(); | |
142 | ||
143 | exit(0); | |
144 | ||
145 | # This subroutine creates the web page and prints out the results in a table | |
146 | sub printOutput { | |
147 | my $title = "Collation: ICU " . $ICULatestVersion . " vs GLIBC"; | |
148 | my $html = localtime; | |
149 | $html =~ s/://g; # ':' illegal | |
150 | $html =~ s/\s*\d+$//; # delete year | |
151 | $html =~ s/^\w+\s*//; # delete dow | |
152 | $html = "CollationPerformance $html.html"; | |
153 | $html = "../results/" . $html; | |
154 | $html =~ s/ /_/g; | |
155 | open( HTML, ">$html" ) or die "Can't write to $html: $!"; | |
156 | print HTML <<EOF; | |
157 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | |
158 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
159 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
160 | <head> | |
161 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
162 | <title>Collation: ICU4C vs. glibc</title> | |
163 | <link rel="stylesheet" href="../icu.css" type="text/css" /> | |
164 | </head> | |
165 | <body> | |
166 | <!--#include virtual="../ssi/header.html" --> | |
167 | EOF | |
168 | ||
169 | print HTML "<h2>Collation: ICU4C ".$ICULatestVersion." vs. GLIBC</h2>\n"; | |
170 | ||
171 | print HTML <<EOF; | |
172 | <p>The performance test takes a locale and creates a RuleBasedCollator with | |
173 | default options. A large list of names is used as data in each test, where the | |
174 | names vary according to language. Each Collation operation over the whole list | |
175 | is repeated 1000 times. The percentage values in the final column are the most | |
176 | useful. They measure differences, where positive is better for ICU4C, and | |
177 | negative is better for the compared implementation.</p> | |
178 | <h3>Key</h3> | |
179 | <table border="1" cellspacing="0" cellpadding="4"> | |
180 | <tr> | |
181 | <th align="left">Operation</th> | |
182 | <th align="left">Units</th> | |
183 | <th align="left">Description</th> | |
184 | </tr> | |
185 | <tr> | |
186 | <td>strcoll</td> | |
187 | <td>nanosecs</td> | |
188 | <td>Timing for string collation, an incremental compare of strings.</td> | |
189 | </tr> | |
190 | <tr> | |
191 | <td>keygen</td> | |
192 | <td>nanosecs</td> | |
193 | <td>Timing for generation of sort keys, used to 'precompile' information so | |
194 | that subsequent operations can use binary comparison.</td> | |
195 | </tr> | |
196 | <tr> | |
197 | <td>keylen</td> | |
198 | <td>bytes/char</td> | |
199 | <td>The average length of the generated sort keys, in bytes per character | |
200 | (Unicode/ISO 10646 code point). Generally this is the important field for sort | |
201 | key performance, since it directly impacts the time necessary for binary | |
202 | comparison, and the overhead of memory usage and retrieval time for sort | |
203 | keys.</td> | |
204 | </tr> | |
205 | </table> | |
206 | EOF | |
207 | printData(); | |
208 | ||
209 | print HTML <<EOF; | |
210 | <h3><i>Notes</i></h3> | |
211 | <ol> | |
212 | <li>As with all performance measurements, the results will vary according to | |
213 | the hardware and compiler. The strcoll operation is particularly sensitive; we | |
214 | have found that even slight changes in code alignment can produce 10% | |
215 | differences.</li> | |
216 | <li>For more information on incremental vs. sort key comparison, the importance | |
217 | of multi-level sorting, and other features of collation, see <a href= | |
218 | "http://www.unicode.org/reports/tr10/">Unicode Collation (UCA)</a>.</li> | |
219 | <li>For general information on ICU collation see <a href= | |
220 | "/userguide/Collate_Intro.html">User Guide</a>.</li> | |
221 | <li>For information on APIs, see <a href="/apiref/icu4c/ucol_8h.html">C</a>, | |
222 | <a href="/apiref/icu4c/classCollator.html">C++</a>, or <a href= | |
223 | "/apiref/icu4j/com/ibm/icu/text/Collator.html">Java</a>.</li> | |
224 | </ol> | |
225 | <!--#include virtual="../ssi/footer.html" --> | |
226 | </body> | |
227 | </html> | |
228 | ||
229 | EOF | |
230 | ||
231 | close(HTML) or die "Can't close $html: $!"; | |
232 | } | |
233 | ||
234 | # This subroutine formats and prints the table. | |
235 | sub printData() { | |
236 | print HTML <<EOF; | |
237 | <h3>Data</h3> | |
238 | <table border="1" cellspacing="0" cellpadding="4"> | |
239 | <tr> | |
240 | <td align="left"><b>Locale</b></td> | |
241 | <td align="left"><b>Data file</b></td> | |
242 | <td align="left"><b>strcoll</b> <i>(ICU)</i></td> | |
243 | <td align="left"><b>keygen</b> <i>(ICU)</i></td> | |
244 | <td align="left"><b>keylen</b> <i>(ICU)</i></td> | |
245 | <td align="left"><b>strcoll</b> <i>(GLIBC)</i></td> | |
246 | <td align="left"><b>keygen</b> <i>(GLIBC)</i></td> | |
247 | <td align="left"><b>keylen</b> <i>(GLIBC)</i></td> | |
248 | <td align="left"><b>strcoll</b> <i>(GLIBC-ICU)/ICU)</i></td> | |
249 | <td align="left"><b>keygen</b> <i>(GLIBC-ICU)/ICU)</i></td> | |
250 | <td align="left"><b>keylen</b> <i>(GLIBC-ICU)/ICU)</i></td> | |
251 | </tr> | |
252 | EOF | |
253 | ||
254 | for ( $n = 0 ; $n < @resultFIN ; $n++ ) { | |
255 | print HTML "<tr>"; | |
256 | my @parsed = split( ',', @resultFIN[$n] ); | |
257 | for ( $i = 0 ; $i < @parsed ; $i++ ) { | |
258 | my $value = @parsed[$i]; | |
259 | print HTML "<td align=\"center\">"; | |
260 | ||
261 | if ( $value =~ m/^[-]/ ) { | |
262 | print HTML "<font color=\"red\">$value</font>"; | |
263 | } | |
264 | else { | |
265 | print HTML "$value"; | |
266 | } | |
267 | ||
268 | print HTML "</td>"; | |
269 | ||
270 | } | |
271 | print HTML "</tr>\n"; | |
272 | } | |
273 | ||
274 | print HTML<<EOF; | |
275 | </table> | |
276 | EOF | |
277 | } |