icuSources/test/perf/leperf/xmlreader.cpp

   1 /*
   2  *******************************************************************************
   3  *
   4  *   © 2016 and later: Unicode, Inc. and others.
   5  *   License & terms of use: http://www.unicode.org/copyright.html#License
   6  *
   7  *******************************************************************************
   8  *******************************************************************************
   9  *
  10  *   Copyright (C) 1999-2013, International Business Machines
  11  *   Corporation and others.  All Rights Reserved.
  12  *
  13  *******************************************************************************
  14  */
  15
  16 #include "unicode/utypes.h"
  17 #include "unicode/uclean.h"
  18 #include "unicode/uchar.h"
  19 #include "unicode/unistr.h"
  20 #include "unicode/uscript.h"
  21 #include "unicode/putil.h"
  22 #include "unicode/ctest.h"
  23
  24 #include "layout/LETypes.h"
  25 #include "layout/LEScripts.h"
  26
  27 #include "letsutil.h"
  28 #include "letest.h"
  29
  30 #include "xmlreader.h"
  31
  32 #include "xmlparser.h"
  33
  34 #include <stdlib.h>
  35 #include <stdio.h>
  36 #include <string.h>
  37
  38 //U_NAMESPACE_USE
  39
  40 #define CH_COMMA 0x002C
  41
  42 static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
  43 {
  44     int32_t offset = -1;
  45
  46     arraySize = 1;
  47     while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
  48         arraySize += 1;
  49     }
  50
  51     le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
  52     char number[16];
  53     le_int32 count = 0;
  54     le_int32 start = 0, end = 0;
  55     le_int32 len = 0;
  56
  57     // trim leading whitespace
  58     while(u_isUWhiteSpace(numbers[start])) {
  59         start += 1;
  60     }
  61
  62     while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
  63         len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
  64         number[len] = '\0';
  65         start = end + 1;
  66
  67         sscanf(number, "%x", &array[count++]);
  68
  69         // trim whitespace following the comma
  70         while(u_isUWhiteSpace(numbers[start])) {
  71             start += 1;
  72         }
  73     }
  74
  75     // trim trailing whitespace
  76     end = numbers.length();
  77     while(u_isUWhiteSpace(numbers[end - 1])) {
  78         end -= 1;
  79     }
  80
  81     len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
  82     number[len] = '\0';
  83     sscanf(number, "%x", &array[count]);
  84
  85     return array;
  86 }
  87
  88 static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
  89 {
  90     int32_t offset = -1;
  91
  92     arraySize = 1;
  93     while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
  94         arraySize += 1;
  95     }
  96
  97     float *array = NEW_ARRAY(float, arraySize);
  98     char number[32];
  99     le_int32 count = 0;
 100     le_int32 start = 0, end = 0;
 101     le_int32 len = 0;
 102
 103     // trim leading whitespace
 104     while(u_isUWhiteSpace(numbers[start])) {
 105         start += 1;
 106     }
 107
 108     while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
 109         len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
 110         number[len] = '\0';
 111         start = end + 1;
 112
 113         sscanf(number, "%f", &array[count++]);
 114
 115         // trim whiteapce following the comma
 116         while(u_isUWhiteSpace(numbers[start])) {
 117             start += 1;
 118         }
 119     }
 120
 121     while(u_isUWhiteSpace(numbers[start])) {
 122         start += 1;
 123     }
 124
 125     // trim trailing whitespace
 126     end = numbers.length();
 127     while(u_isUWhiteSpace(numbers[end - 1])) {
 128         end -= 1;
 129     }
 130
 131     len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
 132     number[len] = '\0';
 133     sscanf(number, "%f", &array[count]);
 134
 135     return array;
 136 }
 137
 138 U_CDECL_BEGIN
 139 void readTestFile(const char *testFilePath, TestCaseCallback callback)
 140 {
 141 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
 142     UErrorCode status = U_ZERO_ERROR;
 143     UXMLParser  *parser = UXMLParser::createParser(status);
 144     UXMLElement *root   = parser->parseFile(testFilePath, status);
 145
 146     if (root == NULL) {
 147         log_err("Could not open the test data file: %s\n", testFilePath);
 148         delete parser;
 149         return;
 150     }
 151
 152     UnicodeString test_case        = UNICODE_STRING_SIMPLE("test-case");
 153     UnicodeString test_text        = UNICODE_STRING_SIMPLE("test-text");
 154     UnicodeString test_font        = UNICODE_STRING_SIMPLE("test-font");
 155     UnicodeString result_glyphs    = UNICODE_STRING_SIMPLE("result-glyphs");
 156     UnicodeString result_indices   = UNICODE_STRING_SIMPLE("result-indices");
 157     UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");
 158
 159     // test-case attributes
 160     UnicodeString id_attr     = UNICODE_STRING_SIMPLE("id");
 161     UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
 162     UnicodeString lang_attr   = UNICODE_STRING_SIMPLE("lang");
 163
 164     // test-font attributes
 165     UnicodeString name_attr   = UNICODE_STRING_SIMPLE("name");
 166     UnicodeString ver_attr    = UNICODE_STRING_SIMPLE("version");
 167     UnicodeString cksum_attr  = UNICODE_STRING_SIMPLE("checksum");
 168
 169     const UXMLElement *testCase;
 170     int32_t tc = 0;
 171
 172     while((testCase = root->nextChildElement(tc)) != NULL) {
 173         if (testCase->getTagName().compare(test_case) == 0) {
 174             char *id = getCString(testCase->getAttribute(id_attr));
 175             char *script    = getCString(testCase->getAttribute(script_attr));
 176             char *lang      = getCString(testCase->getAttribute(lang_attr));
 177             char *fontName  = NULL;
 178                         char *fontVer   = NULL;
 179                         char *fontCksum = NULL;
 180             const UXMLElement *element;
 181             int32_t ec = 0;
 182             int32_t charCount = 0;
 183             int32_t typoFlags = 3; // kerning + ligatures...
 184             UScriptCode scriptCode;
 185             le_int32 languageCode = -1;
 186             UnicodeString text, glyphs, indices, positions;
 187             int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
 188             TestResult expected = {0, NULL, NULL, NULL};
 189
 190             uscript_getCode(script, &scriptCode, 1, &status);
 191             if (LE_FAILURE(status)) {
 192                 log_err("invalid script name: %s.\n", script);
 193                 goto free_c_strings;
 194             }
 195
 196             if (lang != NULL) {
 197                 languageCode = getLanguageCode(lang);
 198
 199                 if (languageCode < 0) {
 200                     log_err("invalid language name: %s.\n", lang);
 201                     goto free_c_strings;
 202                 }
 203             }
 204
 205             while((element = testCase->nextChildElement(ec)) != NULL) {
 206                 UnicodeString tag = element->getTagName();
 207
 208                 // TODO: make sure that each element is only used once.
 209                 if (tag.compare(test_font) == 0) {
 210                     fontName  = getCString(element->getAttribute(name_attr));
 211                     fontVer   = getCString(element->getAttribute(ver_attr));
 212                     fontCksum = getCString(element->getAttribute(cksum_attr));
 213
 214                 } else if (tag.compare(test_text) == 0) {
 215                     text = element->getText(TRUE);
 216                     charCount = text.length();
 217                 } else if (tag.compare(result_glyphs) == 0) {
 218                     glyphs = element->getText(TRUE);
 219                 } else if (tag.compare(result_indices) == 0) {
 220                     indices = element->getText(TRUE);
 221                 } else if (tag.compare(result_positions) == 0) {
 222                     positions = element->getText(TRUE);
 223                 } else {
 224                     // an unknown tag...
 225                     char *cTag = getCString(&tag);
 226
 227                     log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
 228                     freeCString(cTag);
 229                 }
 230             }
 231
 232             expected.glyphs    = (LEGlyphID *) getHexArray(glyphs, glyphCount);
 233             expected.indices   = (le_int32 *)  getHexArray(indices, indexCount);
 234             expected.positions = getFloatArray(positions, positionCount);
 235
 236             expected.glyphCount = glyphCount;
 237
 238             if (glyphCount < charCount || indexCount != glyphCount || positionCount < glyphCount * 2 + 2) {
 239                 log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
 240                     id, charCount, glyphCount, indexCount, positionCount);
 241                 goto free_expected;
 242             };
 243
 244                         (*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);
 245
 246 free_expected:
 247             DELETE_ARRAY(expected.positions);
 248             DELETE_ARRAY(expected.indices);
 249             DELETE_ARRAY(expected.glyphs);
 250
 251 free_c_strings:
 252                         freeCString(fontCksum);
 253                         freeCString(fontVer);
 254                         freeCString(fontName);
 255             freeCString(lang);
 256             freeCString(script);
 257             freeCString(id);
 258         }
 259     }
 260
 261     delete root;
 262     delete parser;
 263 #endif
 264 }
 265 U_CDECL_END