]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/intltest/bidiconf.cpp
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / test / intltest / bidiconf.cpp
diff --git a/icuSources/test/intltest/bidiconf.cpp b/icuSources/test/intltest/bidiconf.cpp
new file mode 100644 (file)
index 0000000..c30a76b
--- /dev/null
@@ -0,0 +1,488 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  bidiconf.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009oct16
+*   created by: Markus W. Scherer
+*
+*   BiDi conformance test, using the Unicode BidiTest.txt file.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "unicode/utypes.h"
+#include "unicode/ubidi.h"
+#include "unicode/errorcode.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "unicode/unistr.h"
+#include "intltest.h"
+#include "uparse.h"
+
+class BiDiConformanceTest : public IntlTest {
+public:
+    BiDiConformanceTest() :
+        directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
+        errorCount(0) {}
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
+
+    void TestBidiTest();
+private:
+    char *getUnidataPath(char path[]);
+
+    UBool parseLevels(const char *start);
+    UBool parseOrdering(const char *start);
+    UBool parseInputStringFromBiDiClasses(const char *&start);
+
+    UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
+                      const char *paraLevelName);
+    UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
+
+    void printErrorLine(const char *paraLevelName);
+
+    char line[10000];
+    UBiDiLevel levels[1000];
+    uint32_t directionBits;
+    int32_t ordering[1000];
+    int32_t lineNumber;
+    int32_t levelsCount;
+    int32_t orderingCount;
+    int32_t errorCount;
+    UnicodeString inputString;
+};
+
+extern IntlTest *createBiDiConformanceTest() {
+    return new BiDiConformanceTest();
+}
+
+void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+    if(exec) {
+        logln("TestSuite BiDiConformanceTest: ");
+    }
+    switch (index) {
+        TESTCASE(0, TestBidiTest);
+        default:
+            name="";
+            break; // needed to end the loop
+    }
+}
+
+// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
+char *BiDiConformanceTest::getUnidataPath(char path[]) {
+    IcuTestErrorCode errorCode(*this, "getUnidataPath");
+    const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
+
+    // Look inside ICU_DATA first.
+    strcpy(path, pathToDataDirectory());
+    strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
+    FILE *f=fopen(path, "r");
+    if(f!=NULL) {
+        fclose(f);
+        *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
+        return path;
+    }
+
+    // As a fallback, try to guess where the source data was located
+    // at the time ICU was built, and look there.
+#   ifdef U_TOPSRCDIR
+        strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
+#   else
+        strcpy(path, loadTestData(errorCode));
+        strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
+                     U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
+                     U_FILE_SEP_STRING "data");
+#   endif
+    strcat(path, U_FILE_SEP_STRING);
+    strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
+    f=fopen(path, "r");
+    if(f!=NULL) {
+        fclose(f);
+        *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
+        return path;
+    }
+    return NULL;
+}
+
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
+
+UBool BiDiConformanceTest::parseLevels(const char *start) {
+    directionBits=0;
+    levelsCount=0;
+    while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
+        if(*start=='x') {
+            levels[levelsCount++]=UBIDI_DEFAULT_LTR;
+            ++start;
+        } else {
+            char *end;
+            uint32_t value=(uint32_t)strtoul(start, &end, 10);
+            if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
+                errln("@Levels: parse error at %s", start);
+                return FALSE;
+            }
+            levels[levelsCount++]=(UBiDiLevel)value;
+            directionBits|=(1<<(value&1));
+            start=end;
+        }
+    }
+    return TRUE;
+}
+
+UBool BiDiConformanceTest::parseOrdering(const char *start) {
+    orderingCount=0;
+    while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
+        char *end;
+        uint32_t value=(uint32_t)strtoul(start, &end, 10);
+        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
+            errln("@Reorder: parse error at %s", start);
+            return FALSE;
+        }
+        ordering[orderingCount++]=(int32_t)value;
+        start=end;
+    }
+    return TRUE;
+}
+
+static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
+    0x6c,   // 'l' for L
+    0x52,   // 'R' for R
+    0x33,   // '3' for EN
+    0x2d,   // '-' for ES
+    0x25,   // '%' for ET
+    0x39,   // '9' for AN
+    0x2c,   // ',' for CS
+    0x2f,   // '/' for B
+    0x5f,   // '_' for S
+    0x20,   // ' ' for WS
+    0x3d,   // '=' for ON
+    0x65,   // 'e' for LRE
+    0x6f,   // 'o' for LRO
+    0x41,   // 'A' for AL
+    0x45,   // 'E' for RLE
+    0x4f,   // 'O' for RLO
+    0x2a,   // '*' for PDF
+    0x60,   // '`' for NSM
+    0x7c    // '|' for BN
+};
+
+U_CDECL_BEGIN
+
+static UCharDirection U_CALLCONV
+biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
+    for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
+        if(c==charFromBiDiClass[i]) {
+            return (UCharDirection)i;
+        }
+    }
+    // Character not in our hardcoded table.
+    // Should not occur during testing.
+    return U_BIDI_CLASS_DEFAULT;
+}
+
+U_CDECL_END
+
+static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
+    1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
+};
+
+UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
+    inputString.remove();
+    /*
+     * Lengthy but fast BiDi class parser.
+     * A simple parser could terminate or extract the name string and use
+     *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
+     * but that makes this test take significantly more time.
+     */
+    while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
+        UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
+        // Compare each character once until we have a match on
+        // a complete, short BiDi class name.
+        if(start[0]=='L') {
+            if(start[1]=='R') {
+                if(start[2]=='E') {
+                    biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
+                } else if(start[2]=='O') {
+                    biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
+                }
+            } else {
+                biDiClass=U_LEFT_TO_RIGHT;
+            }
+        } else if(start[0]=='R') {
+            if(start[1]=='L') {
+                if(start[2]=='E') {
+                    biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
+                } else if(start[2]=='O') {
+                    biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
+                }
+            } else {
+                biDiClass=U_RIGHT_TO_LEFT;
+            }
+        } else if(start[0]=='E') {
+            if(start[1]=='N') {
+                biDiClass=U_EUROPEAN_NUMBER;
+            } else if(start[1]=='S') {
+                biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
+            } else if(start[1]=='T') {
+                biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
+            }
+        } else if(start[0]=='A') {
+            if(start[1]=='L') {
+                biDiClass=U_RIGHT_TO_LEFT_ARABIC;
+            } else if(start[1]=='N') {
+                biDiClass=U_ARABIC_NUMBER;
+            }
+        } else if(start[0]=='C' && start[1]=='S') {
+            biDiClass=U_COMMON_NUMBER_SEPARATOR;
+        } else if(start[0]=='B') {
+            if(start[1]=='N') {
+                biDiClass=U_BOUNDARY_NEUTRAL;
+            } else {
+                biDiClass=U_BLOCK_SEPARATOR;
+            }
+        } else if(start[0]=='S') {
+            biDiClass=U_SEGMENT_SEPARATOR;
+        } else if(start[0]=='W' && start[1]=='S') {
+            biDiClass=U_WHITE_SPACE_NEUTRAL;
+        } else if(start[0]=='O' && start[1]=='N') {
+            biDiClass=U_OTHER_NEUTRAL;
+        } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
+            biDiClass=U_POP_DIRECTIONAL_FORMAT;
+        } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
+            biDiClass=U_DIR_NON_SPACING_MARK;
+        }
+        // Now we verify that the class name is terminated properly,
+        // and not just the start of a longer word.
+        int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
+        char c=start[biDiClassNameLength];
+        if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
+            errln("BiDi class string not recognized at %s", start);
+            return FALSE;
+        }
+        inputString.append(charFromBiDiClass[biDiClass]);
+        start+=biDiClassNameLength;
+    }
+    return TRUE;
+}
+
+void BiDiConformanceTest::TestBidiTest() {
+    IcuTestErrorCode errorCode(*this, "TestBidiTest");
+    const char *sourceTestDataPath=getSourceTestData(errorCode);
+    if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
+                                      "folder (getSourceTestData())")) {
+        return;
+    }
+    char bidiTestPath[400];
+    strcpy(bidiTestPath, sourceTestDataPath);
+    strcat(bidiTestPath, "BidiTest.txt");
+    LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
+    if(bidiTestFile.isNull()) {
+        errln("unable to open %s", bidiTestPath);
+        return;
+    }
+    LocalUBiDiPointer ubidi(ubidi_open());
+    ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
+                           NULL, NULL, errorCode);
+    if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
+        return;
+    }
+    lineNumber=0;
+    levelsCount=0;
+    orderingCount=0;
+    errorCount=0;
+    while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
+        ++lineNumber;
+        // Remove trailing comments and whitespace.
+        char *commentStart=strchr(line, '#');
+        if(commentStart!=NULL) {
+            *commentStart=0;
+        }
+        u_rtrim(line);
+        const char *start=u_skipWhitespace(line);
+        if(*start==0) {
+            continue;  // Skip empty and comment-only lines.
+        }
+        if(*start=='@') {
+            ++start;
+            if(0==strncmp(start, "Levels:", 7)) {
+                if(!parseLevels(start+7)) {
+                    return;
+                }
+            } else if(0==strncmp(start, "Reorder:", 8)) {
+                if(!parseOrdering(start+8)) {
+                    return;
+                }
+            }
+            // Skip unknown @Xyz: ...
+        } else {
+            if(!parseInputStringFromBiDiClasses(start)) {
+                return;
+            }
+            start=u_skipWhitespace(start);
+            if(*start!=';') {
+                errln("missing ; separator on input line %s", line);
+                return;
+            }
+            start=u_skipWhitespace(start+1);
+            char *end;
+            uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
+            if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
+                errln("input bitset parse error at %s", start);
+                return;
+            }
+            // Loop over the bitset.
+            static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
+            static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
+            for(int i=0; i<=3; ++i) {
+                if(bitset&(1<<i)) {
+                    ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
+                                  paraLevels[i], NULL, errorCode);
+                    const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
+                    if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
+                        errln("Input line %d: %s", (int)lineNumber, line);
+                        return;
+                    }
+                    if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
+                                    paraLevelNames[i])) {
+                        // continue outerLoop;  does not exist in C++
+                        // so just break out of the inner loop.
+                        break;
+                    }
+                    if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
+                        // continue outerLoop;  does not exist in C++
+                        // so just break out of the inner loop.
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+static UChar printLevel(UBiDiLevel level) {
+    if(level<UBIDI_DEFAULT_LTR) {
+        return 0x30+level;
+    } else {
+        return 0x78;  // 'x'
+    }
+}
+
+static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
+    uint32_t actualDirectionBits=0;
+    for(int32_t i=0; i<actualCount; ++i) {
+        actualDirectionBits|=(1<<(actualLevels[i]&1));
+    }
+    return actualDirectionBits;
+}
+
+UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
+                                       const char *paraLevelName) {
+    UBool isOk=TRUE;
+    if(levelsCount!=actualCount) {
+        errln("Wrong number of level values; expected %d actual %d",
+              (int)levelsCount, (int)actualCount);
+        isOk=FALSE;
+    } else {
+        for(int32_t i=0; i<actualCount; ++i) {
+            if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
+                if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
+                    // ICU used a shortcut:
+                    // Since the text is unidirectional, it did not store the resolved
+                    // levels but just returns all levels as the paragraph level 0 or 1.
+                    // The reordering result is the same, so this is fine.
+                    break;
+                } else {
+                    errln("Wrong level value at index %d; expected %d actual %d",
+                          (int)i, levels[i], actualLevels[i]);
+                    isOk=FALSE;
+                    break;
+                }
+            }
+        }
+    }
+    if(!isOk) {
+        printErrorLine(paraLevelName);
+        UnicodeString els("Expected levels:   ");
+        int32_t i;
+        for(i=0; i<levelsCount; ++i) {
+            els.append((UChar)0x20).append(printLevel(levels[i]));
+        }
+        UnicodeString als("Actual   levels:   ");
+        for(i=0; i<actualCount; ++i) {
+            als.append((UChar)0x20).append(printLevel(actualLevels[i]));
+        }
+        errln(els);
+        errln(als);
+    }
+    return isOk;
+}
+
+// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
+// does not work for custom BiDi class assignments
+// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
+// Therefore we just skip the indexes for BiDi controls while comparing
+// with the expected ordering that has them omitted.
+UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
+    UBool isOk=TRUE;
+    IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
+    int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
+    int32_t i, visualIndex;
+    // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
+    // and loop over each run's indexes, but that seems unnecessary for this test code.
+    for(i=visualIndex=0; i<resultLength; ++i) {
+        int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
+        if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
+            errln("Input line %d: %s", (int)lineNumber, line);
+            return FALSE;
+        }
+        if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
+            continue;  // BiDi control, omitted from expected ordering.
+        }
+        if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
+            errln("Wrong ordering value at visual index %d; expected %d actual %d",
+                  (int)visualIndex, ordering[visualIndex], logicalIndex);
+            isOk=FALSE;
+            break;
+        }
+        ++visualIndex;
+    }
+    // visualIndex is now the visual length minus the BiDi controls,
+    // which should match the length of the BidiTest.txt ordering.
+    if(isOk && orderingCount!=visualIndex) {
+        errln("Wrong number of ordering values; expected %d actual %d",
+              (int)orderingCount, (int)visualIndex);
+        isOk=FALSE;
+    }
+    if(!isOk) {
+        printErrorLine(paraLevelName);
+        UnicodeString eord("Expected ordering: ");
+        for(i=0; i<orderingCount; ++i) {
+            eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
+        }
+        UnicodeString aord("Actual   ordering: ");
+        for(i=0; i<resultLength; ++i) {
+            int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
+            if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
+                aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
+            }
+        }
+        errln(eord);
+        errln(aord);
+    }
+    return isOk;
+}
+
+void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
+    ++errorCount;
+    errln("Input line %5d:   %s", (int)lineNumber, line);
+    errln(UnicodeString("Input string:       ")+inputString);
+    errln("Para level:         %s", paraLevelName);
+}