]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/perf/leperf/xmlreader.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / perf / leperf / xmlreader.cpp
CommitLineData
57a6839d 1/*
f3c0d7a5
A
2 *******************************************************************************
3 *
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html#License
6 *
7 *******************************************************************************
57a6839d
A
8 *******************************************************************************
9 *
10 * Copyright (C) 1999-2013, International Business Machines
11 * Corporation and others. All Rights Reserved.
12 *
13 *******************************************************************************
14 */
15
16#include "unicode/utypes.h"
17#include "unicode/uclean.h"
18#include "unicode/uchar.h"
19#include "unicode/unistr.h"
20#include "unicode/uscript.h"
21#include "unicode/putil.h"
22#include "unicode/ctest.h"
23
24#include "layout/LETypes.h"
25#include "layout/LEScripts.h"
26
27#include "letsutil.h"
28#include "letest.h"
29
30#include "xmlreader.h"
31
32#include "xmlparser.h"
33
34#include <stdlib.h>
35#include <stdio.h>
36#include <string.h>
37
38//U_NAMESPACE_USE
39
40#define CH_COMMA 0x002C
41
42static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
43{
44 int32_t offset = -1;
45
46 arraySize = 1;
47 while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
48 arraySize += 1;
49 }
50
51 le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
52 char number[16];
53 le_int32 count = 0;
54 le_int32 start = 0, end = 0;
55 le_int32 len = 0;
56
57 // trim leading whitespace
58 while(u_isUWhiteSpace(numbers[start])) {
59 start += 1;
60 }
61
62 while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
63 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
64 number[len] = '\0';
65 start = end + 1;
66
67 sscanf(number, "%x", &array[count++]);
68
69 // trim whitespace following the comma
70 while(u_isUWhiteSpace(numbers[start])) {
71 start += 1;
72 }
73 }
74
75 // trim trailing whitespace
76 end = numbers.length();
77 while(u_isUWhiteSpace(numbers[end - 1])) {
78 end -= 1;
79 }
80
81 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
82 number[len] = '\0';
83 sscanf(number, "%x", &array[count]);
84
85 return array;
86}
87
88static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
89{
90 int32_t offset = -1;
91
92 arraySize = 1;
93 while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
94 arraySize += 1;
95 }
96
97 float *array = NEW_ARRAY(float, arraySize);
98 char number[32];
99 le_int32 count = 0;
100 le_int32 start = 0, end = 0;
101 le_int32 len = 0;
102
103 // trim leading whitespace
104 while(u_isUWhiteSpace(numbers[start])) {
105 start += 1;
106 }
107
108 while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
109 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
110 number[len] = '\0';
111 start = end + 1;
112
113 sscanf(number, "%f", &array[count++]);
114
115 // trim whiteapce following the comma
116 while(u_isUWhiteSpace(numbers[start])) {
117 start += 1;
118 }
119 }
120
121 while(u_isUWhiteSpace(numbers[start])) {
122 start += 1;
123 }
124
125 // trim trailing whitespace
126 end = numbers.length();
127 while(u_isUWhiteSpace(numbers[end - 1])) {
128 end -= 1;
129 }
130
131 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
132 number[len] = '\0';
133 sscanf(number, "%f", &array[count]);
134
135 return array;
136}
137
138U_CDECL_BEGIN
139void readTestFile(const char *testFilePath, TestCaseCallback callback)
140{
141#if !UCONFIG_NO_REGULAR_EXPRESSIONS
142 UErrorCode status = U_ZERO_ERROR;
143 UXMLParser *parser = UXMLParser::createParser(status);
144 UXMLElement *root = parser->parseFile(testFilePath, status);
145
146 if (root == NULL) {
147 log_err("Could not open the test data file: %s\n", testFilePath);
148 delete parser;
149 return;
150 }
151
152 UnicodeString test_case = UNICODE_STRING_SIMPLE("test-case");
153 UnicodeString test_text = UNICODE_STRING_SIMPLE("test-text");
154 UnicodeString test_font = UNICODE_STRING_SIMPLE("test-font");
155 UnicodeString result_glyphs = UNICODE_STRING_SIMPLE("result-glyphs");
156 UnicodeString result_indices = UNICODE_STRING_SIMPLE("result-indices");
157 UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");
158
159 // test-case attributes
160 UnicodeString id_attr = UNICODE_STRING_SIMPLE("id");
161 UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
162 UnicodeString lang_attr = UNICODE_STRING_SIMPLE("lang");
163
164 // test-font attributes
165 UnicodeString name_attr = UNICODE_STRING_SIMPLE("name");
166 UnicodeString ver_attr = UNICODE_STRING_SIMPLE("version");
167 UnicodeString cksum_attr = UNICODE_STRING_SIMPLE("checksum");
168
169 const UXMLElement *testCase;
170 int32_t tc = 0;
171
172 while((testCase = root->nextChildElement(tc)) != NULL) {
173 if (testCase->getTagName().compare(test_case) == 0) {
174 char *id = getCString(testCase->getAttribute(id_attr));
175 char *script = getCString(testCase->getAttribute(script_attr));
176 char *lang = getCString(testCase->getAttribute(lang_attr));
177 char *fontName = NULL;
178 char *fontVer = NULL;
179 char *fontCksum = NULL;
180 const UXMLElement *element;
181 int32_t ec = 0;
182 int32_t charCount = 0;
183 int32_t typoFlags = 3; // kerning + ligatures...
184 UScriptCode scriptCode;
185 le_int32 languageCode = -1;
186 UnicodeString text, glyphs, indices, positions;
187 int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
188 TestResult expected = {0, NULL, NULL, NULL};
189
190 uscript_getCode(script, &scriptCode, 1, &status);
191 if (LE_FAILURE(status)) {
192 log_err("invalid script name: %s.\n", script);
193 goto free_c_strings;
194 }
195
196 if (lang != NULL) {
197 languageCode = getLanguageCode(lang);
198
199 if (languageCode < 0) {
200 log_err("invalid language name: %s.\n", lang);
201 goto free_c_strings;
202 }
203 }
204
205 while((element = testCase->nextChildElement(ec)) != NULL) {
206 UnicodeString tag = element->getTagName();
207
208 // TODO: make sure that each element is only used once.
209 if (tag.compare(test_font) == 0) {
210 fontName = getCString(element->getAttribute(name_attr));
211 fontVer = getCString(element->getAttribute(ver_attr));
212 fontCksum = getCString(element->getAttribute(cksum_attr));
213
214 } else if (tag.compare(test_text) == 0) {
215 text = element->getText(TRUE);
216 charCount = text.length();
217 } else if (tag.compare(result_glyphs) == 0) {
218 glyphs = element->getText(TRUE);
219 } else if (tag.compare(result_indices) == 0) {
220 indices = element->getText(TRUE);
221 } else if (tag.compare(result_positions) == 0) {
222 positions = element->getText(TRUE);
223 } else {
224 // an unknown tag...
225 char *cTag = getCString(&tag);
226
227 log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
228 freeCString(cTag);
229 }
230 }
231
232 expected.glyphs = (LEGlyphID *) getHexArray(glyphs, glyphCount);
233 expected.indices = (le_int32 *) getHexArray(indices, indexCount);
234 expected.positions = getFloatArray(positions, positionCount);
235
236 expected.glyphCount = glyphCount;
237
238 if (glyphCount < charCount || indexCount != glyphCount || positionCount < glyphCount * 2 + 2) {
239 log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
240 id, charCount, glyphCount, indexCount, positionCount);
241 goto free_expected;
242 };
243
244 (*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);
245
246free_expected:
247 DELETE_ARRAY(expected.positions);
248 DELETE_ARRAY(expected.indices);
249 DELETE_ARRAY(expected.glyphs);
250
251free_c_strings:
252 freeCString(fontCksum);
253 freeCString(fontVer);
254 freeCString(fontName);
255 freeCString(lang);
256 freeCString(script);
257 freeCString(id);
258 }
259 }
260
261 delete root;
262 delete parser;
263#endif
264}
265U_CDECL_END