[apple/icu.git] / icuSources / test / letest / xmlreader.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 *
 *   Copyright (C) 1999-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 */

#include "unicode/utypes.h"
#include "unicode/uclean.h"
#include "unicode/uchar.h"
#include "unicode/unistr.h"
#include "unicode/uscript.h"
#include "unicode/putil.h"
#include "unicode/ctest.h"

#include "layout/LETypes.h"
#include "layout/LEScripts.h"

#include "letsutil.h"
#include "letest.h"

#include "xmlreader.h"

#include "xmlparser.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

//U_NAMESPACE_USE

#define CH_COMMA 0x002C

static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
{
    int32_t offset = -1;

    arraySize = 1;
    while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
        arraySize += 1;
    }

    le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
    char number[16];
    le_int32 count = 0;
    le_int32 start = 0, end = 0;
    le_int32 len = 0;

    // trim leading whitespace
    while(u_isUWhiteSpace(numbers[start])) {
        start += 1;
    }

    while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
        len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
        number[len] = '\0';
        start = end + 1;

        sscanf(number, "%x", &array[count++]);

        // trim whitespace following the comma
        while(u_isUWhiteSpace(numbers[start])) {
            start += 1;
        }
    }

    // trim trailing whitespace
    end = numbers.length();
    while(u_isUWhiteSpace(numbers[end - 1])) {
        end -= 1;
    }

    len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
    number[len] = '\0';
    sscanf(number, "%x", &array[count]);

    return array;
}

static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
{
    int32_t offset = -1;

    arraySize = 1;
    while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
        arraySize += 1;
    }

    float *array = NEW_ARRAY(float, arraySize);
    char number[32];
    le_int32 count = 0;
    le_int32 start = 0, end = 0;
    le_int32 len = 0;

    // trim leading whitespace
    while(u_isUWhiteSpace(numbers[start])) {
        start += 1;
    }

    while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
        len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
        number[len] = '\0';
        start = end + 1;

        sscanf(number, "%f", &array[count++]);

        // trim whiteapce following the comma
        while(u_isUWhiteSpace(numbers[start])) {
            start += 1;
        }
    }

    while(u_isUWhiteSpace(numbers[start])) {
        start += 1;
    }

    // trim trailing whitespace
    end = numbers.length();
    while(u_isUWhiteSpace(numbers[end - 1])) {
        end -= 1;
    }

    len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
    number[len] = '\0';
    sscanf(number, "%f", &array[count]);

    return array;
}

U_CDECL_BEGIN
void readTestFile(const char *testFilePath, TestCaseCallback callback)
{
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
    UErrorCode status = U_ZERO_ERROR;
    UXMLParser  *parser = UXMLParser::createParser(status);
    UXMLElement *root   = parser->parseFile(testFilePath, status);

    if (root == NULL) {
        log_err("Could not open the test data file: %s\n", testFilePath);
        delete parser;
        return;
    }

    UnicodeString test_case        = UNICODE_STRING_SIMPLE("test-case");
    UnicodeString test_text        = UNICODE_STRING_SIMPLE("test-text");
    UnicodeString test_font        = UNICODE_STRING_SIMPLE("test-font");
    UnicodeString result_glyphs    = UNICODE_STRING_SIMPLE("result-glyphs");
    UnicodeString result_indices   = UNICODE_STRING_SIMPLE("result-indices");
    UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");

    // test-case attributes
    UnicodeString id_attr     = UNICODE_STRING_SIMPLE("id");
    UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
    UnicodeString lang_attr   = UNICODE_STRING_SIMPLE("lang");

    // test-font attributes
    UnicodeString name_attr   = UNICODE_STRING_SIMPLE("name");
    UnicodeString ver_attr    = UNICODE_STRING_SIMPLE("version");
    UnicodeString cksum_attr  = UNICODE_STRING_SIMPLE("checksum");

    const UXMLElement *testCase;
    int32_t tc = 0;

    while((testCase = root->nextChildElement(tc)) != NULL) {
        if (testCase->getTagName().compare(test_case) == 0) {
            char *id = getCString(testCase->getAttribute(id_attr));
            char *script    = getCString(testCase->getAttribute(script_attr));
            char *lang      = getCString(testCase->getAttribute(lang_attr));
            char *fontName  = NULL;
			char *fontVer   = NULL;
			char *fontCksum = NULL;
            const UXMLElement *element;
            int32_t ec = 0;
            int32_t charCount = 0;
            // int32_t typoFlags = 3; // kerning + ligatures...
            UScriptCode scriptCode;
            le_int32 languageCode = -1;
            UnicodeString text, glyphs, indices, positions;
            int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
            TestResult expected = {0, NULL, NULL, NULL};

            uscript_getCode(script, &scriptCode, 1, &status);
            if (LE_FAILURE(status)) {
                log_err("invalid script name: %s.\n", script);
                goto free_c_strings;
            }

            if (lang != NULL) {
                languageCode = getLanguageCode(lang);

                if (languageCode < 0) {
                    log_err("invalid language name: %s.\n", lang);
                    goto free_c_strings;
                }
            }

            while((element = testCase->nextChildElement(ec)) != NULL) {
                UnicodeString tag = element->getTagName();

                // TODO: make sure that each element is only used once.
                if (tag.compare(test_font) == 0) {
                    fontName  = getCString(element->getAttribute(name_attr));
                    fontVer   = getCString(element->getAttribute(ver_attr));
                    fontCksum = getCString(element->getAttribute(cksum_attr));

                } else if (tag.compare(test_text) == 0) {
                    text = element->getText(TRUE);
                    charCount = text.length();
                } else if (tag.compare(result_glyphs) == 0) {
                    glyphs = element->getText(TRUE);
                } else if (tag.compare(result_indices) == 0) {
                    indices = element->getText(TRUE);
                } else if (tag.compare(result_positions) == 0) {
                    positions = element->getText(TRUE);
                } else {
                    // an unknown tag...
                    char *cTag = getCString(&tag);

                    log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
                    freeCString(cTag);
                }
            }

            expected.glyphs    = (LEGlyphID *) getHexArray(glyphs, glyphCount);
            expected.indices   = (le_int32 *)  getHexArray(indices, indexCount);
            expected.positions = getFloatArray(positions, positionCount);

            expected.glyphCount = glyphCount;

            if (glyphCount < charCount || indexCount != glyphCount || positionCount < glyphCount * 2 + 2) {
                log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
                    id, charCount, glyphCount, indexCount, positionCount);
                goto free_expected;
            };

			(*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);

free_expected:
            DELETE_ARRAY(expected.positions);
            DELETE_ARRAY(expected.indices);
            DELETE_ARRAY(expected.glyphs);

free_c_strings:
			freeCString(fontCksum);
			freeCString(fontVer);
			freeCString(fontName);
            freeCString(lang);
            freeCString(script);
            freeCString(id);
        }
    }

    delete root;
    delete parser;
#endif
}
U_CDECL_END
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
46f4442e A	3	/*
	4	*******************************************************************************
	5	*
b331163b	6	* Copyright (C) 1999-2014, International Business Machines
46f4442e A	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
	10	*/
	11
	12	#include "unicode/utypes.h"
	13	#include "unicode/uclean.h"
	14	#include "unicode/uchar.h"
	15	#include "unicode/unistr.h"
	16	#include "unicode/uscript.h"
	17	#include "unicode/putil.h"
	18	#include "unicode/ctest.h"
	19
	20	#include "layout/LETypes.h"
	21	#include "layout/LEScripts.h"
	22
	23	#include "letsutil.h"
	24	#include "letest.h"
	25
	26	#include "xmlreader.h"
	27
	28	#include "xmlparser.h"
	29
	30	#include <stdlib.h>
	31	#include <stdio.h>
	32	#include <string.h>
	33
	34	//U_NAMESPACE_USE
	35
	36	#define CH_COMMA 0x002C
	37
	38	static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
	39	{
	40	int32_t offset = -1;
	41
	42	arraySize = 1;
	43	while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
	44	arraySize += 1;
	45	}
	46
	47	le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
	48	char number[16];
	49	le_int32 count = 0;
	50	le_int32 start = 0, end = 0;
	51	le_int32 len = 0;
	52
	53	// trim leading whitespace
	54	while(u_isUWhiteSpace(numbers[start])) {
	55	start += 1;
	56	}
	57
	58	while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
	59	len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
	60	number[len] = '\0';
	61	start = end + 1;
	62
	63	sscanf(number, "%x", &array[count++]);
	64
	65	// trim whitespace following the comma
	66	while(u_isUWhiteSpace(numbers[start])) {
	67	start += 1;
	68	}
	69	}
	70
71	// trim trailing whitespace
72	end = numbers.length();
73	while(u_isUWhiteSpace(numbers[end - 1])) {
74	end -= 1;
75	}
76
77	len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
78	number[len] = '\0';
79	sscanf(number, "%x", &array[count]);
80
81	return array;
82	}
83
84	static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
85	{
86	int32_t offset = -1;
87
88	arraySize = 1;
89	while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
90	arraySize += 1;
91	}
92
93	float *array = NEW_ARRAY(float, arraySize);
94	char number[32];
95	le_int32 count = 0;
96	le_int32 start = 0, end = 0;
97	le_int32 len = 0;
98
99	// trim leading whitespace
100	while(u_isUWhiteSpace(numbers[start])) {
101	start += 1;
102	}
103
104	while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
105	len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
106	number[len] = '\0';
107	start = end + 1;
108
109	sscanf(number, "%f", &array[count++]);
110
111	// trim whiteapce following the comma
112	while(u_isUWhiteSpace(numbers[start])) {
113	start += 1;
114	}
115	}
116
117	while(u_isUWhiteSpace(numbers[start])) {
118	start += 1;
119	}
120
121	// trim trailing whitespace
122	end = numbers.length();
123	while(u_isUWhiteSpace(numbers[end - 1])) {
124	end -= 1;
125	}
126
127	len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
128	number[len] = '\0';
129	sscanf(number, "%f", &array[count]);
130
131	return array;
132	}
133
134	U_CDECL_BEGIN
135	void readTestFile(const char *testFilePath, TestCaseCallback callback)
136	{
137	#if !UCONFIG_NO_REGULAR_EXPRESSIONS
138	UErrorCode status = U_ZERO_ERROR;
139	UXMLParser *parser = UXMLParser::createParser(status);
140	UXMLElement *root = parser->parseFile(testFilePath, status);
141
142	if (root == NULL) {
143	log_err("Could not open the test data file: %s\n", testFilePath);
144	delete parser;
145	return;
146	}
147
148	UnicodeString test_case = UNICODE_STRING_SIMPLE("test-case");
149	UnicodeString test_text = UNICODE_STRING_SIMPLE("test-text");
150	UnicodeString test_font = UNICODE_STRING_SIMPLE("test-font");
151	UnicodeString result_glyphs = UNICODE_STRING_SIMPLE("result-glyphs");
152	UnicodeString result_indices = UNICODE_STRING_SIMPLE("result-indices");
153	UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");
154
155	// test-case attributes
156	UnicodeString id_attr = UNICODE_STRING_SIMPLE("id");
157	UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
158	UnicodeString lang_attr = UNICODE_STRING_SIMPLE("lang");
159
160	// test-font attributes
161	UnicodeString name_attr = UNICODE_STRING_SIMPLE("name");
162	UnicodeString ver_attr = UNICODE_STRING_SIMPLE("version");
163	UnicodeString cksum_attr = UNICODE_STRING_SIMPLE("checksum");
164
165	const UXMLElement *testCase;
166	int32_t tc = 0;
167
168	while((testCase = root->nextChildElement(tc)) != NULL) {
169	if (testCase->getTagName().compare(test_case) == 0) {
170	char *id = getCString(testCase->getAttribute(id_attr));
171	char *script = getCString(testCase->getAttribute(script_attr));
172	char *lang = getCString(testCase->getAttribute(lang_attr));
173	char *fontName = NULL;
174	char *fontVer = NULL;
175	char *fontCksum = NULL;
176	const UXMLElement *element;
177	int32_t ec = 0;
178	int32_t charCount = 0;
b331163b	179	// int32_t typoFlags = 3; // kerning + ligatures...
46f4442e A	180	UScriptCode scriptCode;
	181	le_int32 languageCode = -1;
	182	UnicodeString text, glyphs, indices, positions;
	183	int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
	184	TestResult expected = {0, NULL, NULL, NULL};
	185
	186	uscript_getCode(script, &scriptCode, 1, &status);
	187	if (LE_FAILURE(status)) {
	188	log_err("invalid script name: %s.\n", script);
	189	goto free_c_strings;
	190	}
	191
	192	if (lang != NULL) {
	193	languageCode = getLanguageCode(lang);
	194
	195	if (languageCode < 0) {
	196	log_err("invalid language name: %s.\n", lang);
	197	goto free_c_strings;
	198	}
	199	}
	200
	201	while((element = testCase->nextChildElement(ec)) != NULL) {
	202	UnicodeString tag = element->getTagName();
	203
	204	// TODO: make sure that each element is only used once.
	205	if (tag.compare(test_font) == 0) {
	206	fontName = getCString(element->getAttribute(name_attr));
	207	fontVer = getCString(element->getAttribute(ver_attr));
	208	fontCksum = getCString(element->getAttribute(cksum_attr));
	209
	210	} else if (tag.compare(test_text) == 0) {
	211	text = element->getText(TRUE);
	212	charCount = text.length();
	213	} else if (tag.compare(result_glyphs) == 0) {
	214	glyphs = element->getText(TRUE);
	215	} else if (tag.compare(result_indices) == 0) {
	216	indices = element->getText(TRUE);
	217	} else if (tag.compare(result_positions) == 0) {
	218	positions = element->getText(TRUE);
	219	} else {
	220	// an unknown tag...
	221	char *cTag = getCString(&tag);
	222
	223	log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
	224	freeCString(cTag);
	225	}
	226	}
	227
	228	expected.glyphs = (LEGlyphID *) getHexArray(glyphs, glyphCount);
	229	expected.indices = (le_int32 *) getHexArray(indices, indexCount);
	230	expected.positions = getFloatArray(positions, positionCount);
	231
	232	expected.glyphCount = glyphCount;
	233
	234	if (glyphCount < charCount \|\| indexCount != glyphCount \|\| positionCount < glyphCount * 2 + 2) {
	235	log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
	236	id, charCount, glyphCount, indexCount, positionCount);
	237	goto free_expected;
	238	};
	239
	240	(*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);
	241
	242	free_expected:
	243	DELETE_ARRAY(expected.positions);
244	DELETE_ARRAY(expected.indices);
245	DELETE_ARRAY(expected.glyphs);
246
247	free_c_strings:
248	freeCString(fontCksum);
249	freeCString(fontVer);
250	freeCString(fontName);
251	freeCString(lang);
252	freeCString(script);
253	freeCString(id);
254	}
255	}
256
257	delete root;
258	delete parser;
259	#endif
260	}
261	U_CDECL_END