ICU-461.18.tar.gz

[apple/icu.git] / icuSources / test / cintltst / citertst.c
diff --git a/icuSources/test/cintltst/citertst.c b/icuSources/test/cintltst/citertst.c

index d802502524b03450c4bb5ee264ed448991f6ef67..610f007d761a8ac5cf470f16e31a318052c3166a 100644 (file)
--- a/icuSources/test/cintltst/citertst.c
+++ b/icuSources/test/cintltst/citertst.c
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 1997-2008, International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /********************************************************************************
@@ -22,6 +22,7 @@
  #if !UCONFIG_NO_COLLATION
  
  #include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
  #include "unicode/uloc.h"
  #include "unicode/uchar.h"
  #include "unicode/ustring.h"
@@ -35,6 +36,7 @@
  #include "cstring.h"
  #include "ucol_imp.h"
  #include "ucol_tok.h"
+#include "uparse.h"
  #include <stdio.h>
  
  extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
@@ -57,6 +59,7 @@ void addCollIterTest(TestNode** root)
      addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
      addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
      addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
+    addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
  }
  
  /* The locales we support */
@@ -80,7 +83,7 @@ static void TestBug672() {
          UCollationElements *titer = ucol_openElements(coll, text, -1,
                                                       &status);
          if (U_FAILURE(status)) {
-            log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
+            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
                      myErrorName(status));
              return;
          }
@@ -156,7 +159,7 @@ static void TestBug672Normalize() {
          pitr = ucol_openElements(coll, pattern, -1, &status);
          titer = ucol_openElements(coll, text, -1, &status);
          if (U_FAILURE(status)) {
-            log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
+            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
                      myErrorName(status));
              return;
          }
@@ -226,7 +229,7 @@ static void TestUnicodeChar()
      UChar *test;
      en_us = ucol_open("en_US", &status);
      if (U_FAILURE(status)){
-       log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
+       log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
                myErrorName(status));
         return;
      }
@@ -293,7 +296,7 @@ static void TestNormalizedUnicodeChar()
      /* thai should have normalization on */
      th_th = ucol_open("th_TH", &status);
      if (U_FAILURE(status)){
-        log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
+        log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
                myErrorName(status));
          return;
      }
@@ -369,7 +372,7 @@ static void TestNormalization()
      coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
      ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
      if (U_FAILURE(status)){
-        log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
+        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
                myErrorName(status));
          return;
      }
@@ -434,7 +437,7 @@ static void TestPrevious()
      iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
      log_verbose("English locale testing back and forth\n");
      if(U_FAILURE(status)){
-        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
+        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
              myErrorName(status));
          ucol_close(coll);
          return;
@@ -574,7 +577,7 @@ static void TestOffset()
      log_verbose("Testing getOffset and setOffset for collations\n");
      iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
      if(U_FAILURE(status)){
-        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
+        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
              myErrorName(status));
          ucol_close(en_us);
          return;
@@ -716,7 +719,7 @@ static void TestSetText()
      log_verbose("testing setText for Collation elements\n");
      iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
      if(U_FAILURE(status)){
-        log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
+        log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
              myErrorName(status));
      ucol_close(en_us);
          return;
@@ -786,7 +789,7 @@ static void TestMaxExpansion()
      UChar               ch     = 0;
      UChar32             unassigned = 0xEFFFD;
      UChar               supplementary[2];
-    uint32_t            index = 0;
+    uint32_t            stringOffset = 0;
      UBool               isError = FALSE;
      uint32_t            sorder = 0;
      UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
@@ -858,7 +861,7 @@ static void TestMaxExpansion()
                    ch, 3);
        }
  
-      U16_APPEND(supplementary, index, 2, unassigned, isError);
+      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
        ucol_setText(iter, supplementary, 2, &status);
        sorder = ucol_previous(iter, &status);
  
@@ -905,7 +908,7 @@ static void TestMaxExpansion()
        ucol_closeElements(iter);
        ucol_close(coll);
      } else {
-      log_data_err("Couldn't open collator\n");
+      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
      }
  
  }
@@ -994,11 +997,6 @@ static void TestSmallBuffer()
        free(orders);
  
        ucol_reset(testiter);
-      /* ensures that the writable buffer was cleared */
-      if (testiter->iteratordata_.writableBuffer !=
-          testiter->iteratordata_.stackWritableBuffer) {
-          log_err("Error Writable buffer in collation element iterator not reset\n");
-      }
  
        /* ensures closing of elements done properly to clear writable buffer */
        ucol_next(testiter, &status);
@@ -1007,7 +1005,7 @@ static void TestSmallBuffer()
        ucol_closeElements(iter);
        ucol_close(coll);
      } else {
-      log_data_err("Couldn't open collator\n");
+      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
      }
  }
  
@@ -1033,41 +1031,40 @@ static int32_t hex2num(char hex) {
  * @param codepoints array for storage, assuming size > 5
  * @return position at the end of the codepoint section
  */
-static char * getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
-    char *pStartCP = str;
-    char *pEndCP   = str + 4;
-
-    *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
-                          (hex2num(*(pStartCP + 1)) << 8) |
-                          (hex2num(*(pStartCP + 2)) << 4) |
-                          (hex2num(*(pStartCP + 3))));
-    if (*pEndCP == '|' || *(pEndCP+1) == '|') {
-        /* pre-context rule */
-        pStartCP = pEndCP;
-        while (*pStartCP==' ' || *pStartCP== '|' ) {
-            pStartCP++;
+static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    char *semi = uprv_strchr(str, ';');
+    char *pipe = uprv_strchr(str, '|');
+    char *s;
+    *codepoints = 0;
+    *contextCPs = 0;
+    if(semi == NULL) {
+        log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
+        return str;
+    }
+    if(pipe != NULL) {
+        int32_t contextLength;
+        *pipe = 0;
+        contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
+        *pipe = '|';
+        if(U_FAILURE(errorCode)) {
+            log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
+            return str;
          }
-        pEndCP = pStartCP+4;
-        *contextCPs = *codepoints;
-        *(++codepoints) = (UChar)((hex2num(*pStartCP) << 12) |
-                                  (hex2num(*(pStartCP + 1)) << 8) |
-                                  (hex2num(*(pStartCP + 2)) << 4) |
-                                  (hex2num(*(pStartCP + 3))));
-        contextCPs++;
+        /* prepend the precontext string to the codepoints */
+        u_memcpy(codepoints, contextCPs, contextLength);
+        codepoints += contextLength;
+        /* start of the code point string */
+        s = pipe + 1;
+    } else {
+        s = str;
      }
-    *contextCPs = 0;
-    codepoints ++;
-    while (*pEndCP != ';') {
-        pStartCP = pEndCP + 1;
-        *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
-                          (hex2num(*(pStartCP + 1)) << 8) |
-                          (hex2num(*(pStartCP + 2)) << 4) |
-                          (hex2num(*(pStartCP + 3))));
-        codepoints ++;
-        pEndCP = pStartCP + 4;
+    u_parseString(s, codepoints, 99, NULL, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
+        return str;
      }
-    *codepoints = 0;
-    return pEndCP + 1;
+    return semi + 1;
  }
  
  /**
@@ -1267,7 +1264,7 @@ static FileStream * getFractionalUCA(void)
  */
  static void TestCEs() {
      FileStream *file = NULL;
-    char        line[1024];
+    char        line[2048];
      char       *str;
      UChar       codepoints[10];
      uint32_t    ces[20];
@@ -1277,7 +1274,7 @@ static void TestCEs() {
      UChar       contextCPs[5];    
  
      if (U_FAILURE(status)) {
-        log_err("Error in opening root collator\n");
+        log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
          return;
      }
  
@@ -1410,7 +1407,7 @@ static void TestDiscontiguos() {
      resultiter = ucol_openElements(coll, rule, 1, &status);
  
      if (U_FAILURE(status)) {
-        log_err("Error opening collation rules\n");
+        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
          return;
      }
  
@@ -1480,7 +1477,7 @@ static void TestCEBufferOverflow()
      u_uastrcpy(rule, "&z < AB");
      coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
      if (U_FAILURE(status)) {
-        log_err("Rule based collator not created for testing ce buffer overflow\n");
+        log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
          return;
      }
  
@@ -1501,147 +1498,192 @@ static void TestCEBufferOverflow()
  }
  
  /**
-* Byte bounds checks. Checks if each byte in data is between upper and lower
-* inclusive.
+* Checking collation element validity.
  */
-static UBool checkByteBounds(uint32_t data, char upper, char lower)
-{
-    int count = 4;
-    while (count > 0) {
-        char b = (char)(data & 0xFF);
-        if (b > upper || b < lower) {
-            return FALSE;
-        }
-        data = data >> 8;
-        count --;
-    }
-    return TRUE;
-}
-
-/**
-* Determines case of the string of codepoints.
-* If it is a multiple codepoints it has to treated as a contraction.
-*/
-#if 0
-static uint8_t getCase(const UChar *s, uint32_t len) {
-    UBool       lower = FALSE;
-    UBool       upper = FALSE;
-    UBool       title = FALSE;
-    UErrorCode  status = U_ZERO_ERROR;
-    UChar       str[256];
-    const UChar      *ps = s;
-
-    if (len == 0) {
-        return UCOL_LOWER_CASE;
-    }
-
-    while (len > 0) {
-        UChar c = *ps ++;
-
-        if (u_islower(c)) {
-            lower = TRUE;
-        }
-        if (u_isupper(c)) {
-            upper = TRUE;
-        }
-        if (u_istitle(c)) {
-            title = TRUE;
+#define MAX_CODEPOINTS_TO_SHOW 10
+static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
+    int i, lengthToUse = length;
+    if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
+        lengthToUse = MAX_CODEPOINTS_TO_SHOW;
+    }
+    for (i = 0; i < lengthToUse; ++i) {
+        int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
+        if (bytesWritten <= 0) {
+            break;
          }
-
-        len --;
-    }
-    if ((lower && !upper && !title) || (!lower && !upper && !title)){
-        return UCOL_LOWER_CASE;
+        codepointText += bytesWritten;
      }
-    if (upper && !lower && !title) {
-        return UCOL_UPPER_CASE;
+    if (i < length) {
+        sprintf(codepointText, " ...");
      }
-    /* mix of cases here */
-    /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
-    if (U_FAILURE(status)) {
-        log_err("Error normalizing data string\n");
-        return UCOL_LOWER_CASE;
-    }*/
-
-    if ((title && len >= 2) || (lower && upper)) {
-        return UCOL_MIXED_CASE;
-    }
-    if (u_isupper(s[0])) {
-        return UCOL_UPPER_CASE;
-    }
-    return UCOL_LOWER_CASE;
  }
-#endif
  
-/**
-* Checking collation element validity given the boundary arguments.
-*/
  static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
-                             int length, uint32_t primarymax,
-                             uint32_t secondarymax)
+                             int length)
  {
      UErrorCode          status = U_ZERO_ERROR;
      UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
                                                    &status);
-    uint32_t            ce;
-    UBool               first  = TRUE;
-/*
-    UBool               upper  = FALSE;
-    UBool               lower  = FALSE;
-*/
+    UBool result = FALSE;
+    UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
+    const char * collLocale;
  
      if (U_FAILURE(status)) {
          log_err("Error creating iterator for testing validity\n");
+        return FALSE;
+    }
+    collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
+    if (U_FAILURE(status) || collLocale==NULL) {
+        status = U_ZERO_ERROR;
+        collLocale = "?";
      }
  
-    ce = ucol_next(iter, &status);
-
-    while (ce != UCOL_NULLORDER) {
-       if (ce != 0) {
-           uint32_t primary   = UCOL_PRIMARYORDER(ce);
-           uint32_t secondary = UCOL_SECONDARYORDER(ce);
-           uint32_t tertiary  = UCOL_TERTIARYORDER(ce);
-/*           uint32_t scasebits = tertiary & 0xC0;*/
-
-           if ((tertiary == 0 && secondary != 0) ||
-               (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
-               /* n-1th level is not zero when the nth level is
-                  except for continuations, this is wrong */
-               log_err("Lower level weight not 0 when high level weight is 0\n");
-               goto fail;
-           }
-           else {
-               /* checks if any byte is illegal ie = 01 02 03. */
-               if (checkByteBounds(ce, 0x3, 0x1)) {
-                   log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
-                   goto fail;
-               }
-           }
-           if ((primary != 0 && primary < primarymax) 
-               || ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF) 
-               || ((primary & 0xFF) && ((primary & 0xFF) <= 0x03)) 
-               || (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 0x03)
-               || (primary >= 0xFE00 && !isContinuation(ce))) {
-               log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n", 
-                   primary, codepoints[0]);
-               goto fail;
-           }
-           /* case matching not done since data generated by ken */
-           if (first) {
-               if (secondary >= 6 && secondary <= secondarymax) {
-                   log_err("Secondary weight out of range\n");
-                   goto fail;
-               }
-               first = FALSE;
-           }
-       }
-       ce   = ucol_next(iter, &status);
-   }
-   ucol_closeElements(iter);
-   return TRUE;
-fail :
-   ucol_closeElements(iter);
-   return FALSE;
+    for (;;) {
+        uint32_t ce = ucol_next(iter, &status);
+        uint32_t primary, p1, p2, secondary, tertiary;
+        if (ce == UCOL_NULLORDER) {
+            result = TRUE;
+            break;
+        }
+        if (ce == 0) {
+            continue;
+        }
+        if (ce == 0x02000202) {
+            /* special CE for merge-sort character */
+            if (*codepoints == 0xFFFE /* && length == 1 */) {
+                /*
+                 * Note: We should check for length==1 but the token parser appears
+                 * to give us trailing NUL characters.
+                 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
+                 *                     rather than the internal collation rule parser
+                 */
+                continue;
+            } else {
+                log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
+                        (int)*codepoints, (int)length);
+                break;
+            }
+        }
+        primary   = UCOL_PRIMARYORDER(ce);
+        p1 = primary >> 8;
+        p2 = primary & 0xFF;
+        secondary = UCOL_SECONDARYORDER(ce);
+        tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
+
+        if (!isContinuation(ce)) {
+            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
+                log_err("Empty CE %08lX except for case bits\n", (long)ce);
+                break;
+            }
+            if (p1 == 0) {
+                if (p2 != 0) {
+                    log_err("Primary 00 xx in %08lX\n", (long)ce);
+                    break;
+                }
+                primaryDone = TRUE;
+            } else {
+                if (p1 <= 2 || p1 >= 0xF0) {
+                    /* Primary first bytes F0..FF are specials. */
+                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
+                    break;
+                }
+                if (p2 == 0) {
+                    primaryDone = TRUE;
+                } else {
+                    if (p2 <= 3 || p2 >= 0xFF) {
+                        /* Primary second bytes 03 and FF are sort key compression terminators. */
+                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
+                        break;
+                    }
+                    primaryDone = FALSE;
+                }
+            }
+            if (secondary == 0) {
+                if (primary != 0) {
+                    log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
+                    break;
+                }
+                secondaryDone = TRUE;
+            } else {
+                if (secondary <= 2 ||
+                    (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
+                ) {
+                    /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
+                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
+                    break;
+                }
+                secondaryDone = FALSE;
+            }
+            if (tertiary == 0) {
+                /* We know that ce != 0. */
+                log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
+                break;
+            }
+            if (tertiary <= 2) {
+                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
+                break;
+            }
+            tertiaryDone = FALSE;
+        } else {
+            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
+                log_err("Empty continuation %08lX\n", (long)ce);
+                break;
+            }
+            if (primaryDone && primary != 0) {
+                log_err("Primary was done but continues in %08lX\n", (long)ce);
+                break;
+            }
+            if (p1 == 0) {
+                if (p2 != 0) {
+                    log_err("Primary 00 xx in %08lX\n", (long)ce);
+                    break;
+                }
+                primaryDone = TRUE;
+            } else {
+                if (p1 <= 2) {
+                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
+                    break;
+                }
+                if (p2 == 0) {
+                    primaryDone = TRUE;
+                } else {
+                    if (p2 <= 3) {
+                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
+                        break;
+                    }
+                }
+            }
+            if (secondaryDone && secondary != 0) {
+                log_err("Secondary was done but continues in %08lX\n", (long)ce);
+                break;
+            }
+            if (secondary == 0) {
+                secondaryDone = TRUE;
+            } else {
+                if (secondary <= 2) {
+                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
+                    break;
+                }
+            }
+            if (tertiaryDone && tertiary != 0) {
+                log_err("Tertiary was done but continues in %08lX\n", (long)ce);
+                break;
+            }
+            if (tertiary == 0) {
+                tertiaryDone = TRUE;
+            } else if (tertiary <= 2) {
+                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
+                break;
+            }
+        }
+    }
+    if (!result) {
+        char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
+        showCodepoints(codepoints, length, codepointText);
+        log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
+    }
+    ucol_closeElements(iter);
+    return result;
  }
  
  static void TestCEValidity()
@@ -1654,14 +1696,15 @@ static void TestCEValidity()
      char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
      const char *loc;
      FileStream *file = NULL;
-    char        line[1024];
-    UChar       codepoints[10];
+    char        line[2048];
+    UChar       codepoints[11];
      int         count = 0;
      int         maxCount = 0;
      UChar       contextCPs[3];
+    UChar32     c;
      UParseError parseError;
      if (U_FAILURE(status)) {
-        log_err("en_US collator creation failed\n");
+        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
          return;
      }
      log_verbose("Testing UCA elements\n");
@@ -1678,23 +1721,29 @@ static void TestCEValidity()
          }
  
          getCodePoints(line, codepoints, contextCPs);
-        checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
+        checkCEValidity(coll, codepoints, u_strlen(codepoints));
      }
  
      log_verbose("Testing UCA elements for the whole range of unicode characters\n");
-    codepoints[0] = 0;
-    while (codepoints[0] < 0xFFFF) {
-        if (u_isdefined((UChar32)codepoints[0])) {
-            checkCEValidity(coll, codepoints, 1, 5, 86);
+    for (c = 0; c <= 0xffff; ++c) {
+        if (u_isdefined(c)) {
+            codepoints[0] = (UChar)c;
+            checkCEValidity(coll, codepoints, 1);
+        }
+    }
+    for (; c <= 0x10ffff; ++c) {
+        if (u_isdefined(c)) {
+            int32_t i = 0;
+            U16_APPEND_UNSAFE(codepoints, i, c);
+            checkCEValidity(coll, codepoints, i);
          }
-        codepoints[0] ++;
      }
  
      ucol_close(coll);
  
      /* testing tailored collation elements */
      log_verbose("Testing tailored elements\n");
-    if(QUICK) {
+    if(getTestOption(QUICK_OPTION)) {
          maxCount = sizeof(locale)/sizeof(locale[0]);
      } else {
          maxCount = uloc_countAvailable();
@@ -1717,7 +1766,7 @@ static void TestCEValidity()
          UColTokenParser src;
          uint32_t strength = 0;
          uint16_t specs = 0;
-        if(QUICK) {
+        if(getTestOption(QUICK_OPTION)) {
              loc = locale[count];
          } else {
              loc = uloc_getAvailable(count);
@@ -1727,6 +1776,8 @@ static void TestCEValidity()
              }
          }
  
+        uprv_memset(&src, 0, sizeof(UColTokenParser));
+
          log_verbose("Testing CEs for %s\n", loc);
  
          coll      = ucol_open(loc, &status);
@@ -1739,7 +1790,7 @@ static void TestCEValidity()
          rules = ucol_getRules(coll, &ruleLen);
  
          if (ruleLen > 0) {
-            rulesCopy = (UChar *)malloc((ruleLen +
+            rulesCopy = (UChar *)uprv_malloc((ruleLen +
                  UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
              uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
              src.current = src.source = rulesCopy;
@@ -1747,6 +1798,8 @@ static void TestCEValidity()
              src.extraCurrent = src.end;
              src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
  
+               /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
+                  the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
              while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
                strength = src.parsedToken.strength;
                chOffset = src.parsedToken.charsOffset;
@@ -1761,9 +1814,9 @@ static void TestCEValidity()
                  uprv_memcpy(codepoints, src.source + chOffset,
                                                         chLen * sizeof(UChar));
                  codepoints[chLen] = 0;
-                checkCEValidity(coll, codepoints, chLen, 4, 85);
+                checkCEValidity(coll, codepoints, chLen);
              }
-            free(rulesCopy);
+            uprv_free(src.source);
          }
  
          ucol_close(coll);
@@ -1802,7 +1855,7 @@ static UBool checkSortKeyValidity(UCollator *coll,
                                        UCOL_TERTIARY, UCOL_QUATERNARY,
                                        UCOL_IDENTICAL};
      int        strengthlen = 5;
-    int        index       = 0;
+    int        strengthIndex = 0;
      int        caselevel   = 0;
  
      while (caselevel < 1) {
@@ -1813,16 +1866,16 @@ static UBool checkSortKeyValidity(UCollator *coll,
              ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
          }
  
-        while (index < strengthlen) {
+        while (strengthIndex < strengthlen) {
              int        count01 = 0;
              uint32_t   count   = 0;
              uint8_t    sortkey[128];
              uint32_t   sklen;
  
-            ucol_setStrength(coll, strength[index]);
+            ucol_setStrength(coll, strength[strengthIndex]);
              sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
              while (sortkey[count] != 0) {
-                if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
+                if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
                      printSortKeyError(codepoints, length, sortkey, sklen);
                      return FALSE;
                  }
@@ -1832,11 +1885,11 @@ static UBool checkSortKeyValidity(UCollator *coll,
                  count ++;
              }
  
-            if (count + 1 != sklen || (count01 != index + caselevel)) {
+            if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
                  printSortKeyError(codepoints, length, sortkey, sklen);
                  return FALSE;
              }
-            index ++;
+            strengthIndex ++;
          }
          caselevel ++;
      }
@@ -1852,13 +1905,13 @@ static void TestSortKeyValidity(void)
      /* tailored locales */
      char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
      FileStream *file = NULL;
-    char        line[1024];
+    char        line[2048];
      UChar       codepoints[10];
      int         count = 0;
      UChar       contextCPs[5];
      UParseError parseError;
      if (U_FAILURE(status)) {
-        log_err("en_US collator creation failed\n");
+        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
          return;
      }
      log_verbose("Testing UCA elements\n");
@@ -1875,6 +1928,10 @@ static void TestSortKeyValidity(void)
          }
  
          getCodePoints(line, codepoints, contextCPs);
+        if(codepoints[0] == 0xFFFE) {
+            /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
+            continue;
+        }
          checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
      }
  
@@ -1911,6 +1968,8 @@ static void TestSortKeyValidity(void)
          uint32_t strength = 0;
          uint16_t specs = 0;
  
+        uprv_memset(&src, 0, sizeof(UColTokenParser));
+
          coll      = ucol_open(locale[count], &status);
          if (U_FAILURE(status)) {
              log_err("%s collator creation failed\n", locale[count]);
@@ -1921,7 +1980,7 @@ static void TestSortKeyValidity(void)
          rules = ucol_getRules(coll, &ruleLen);
  
          if (ruleLen > 0) {
-            rulesCopy = (UChar *)malloc((ruleLen +
+            rulesCopy = (UChar *)uprv_malloc((ruleLen +
                  UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
              uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
              src.current = src.source = rulesCopy;
@@ -1929,6 +1988,8 @@ static void TestSortKeyValidity(void)
              src.extraCurrent = src.end;
              src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
  
+               /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
+                  the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
              while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
                  strength = src.parsedToken.strength;
                  chOffset = src.parsedToken.charsOffset;
@@ -1943,9 +2004,13 @@ static void TestSortKeyValidity(void)
                  uprv_memcpy(codepoints, src.source + chOffset,
                                                         chLen * sizeof(UChar));
                  codepoints[chLen] = 0;
+                if(codepoints[0] == 0xFFFE) {
+                    /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
+                    continue;
+                }
                  checkSortKeyValidity(coll, codepoints, chLen);
              }
-            free(rulesCopy);
+            uprv_free(src.source);
          }
  
          ucol_close(coll);
@@ -1954,4 +2019,147 @@ static void TestSortKeyValidity(void)
      T_FileStream_close(file);
  }
  
+/**
+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
+* normalization on AND jamo tailoring, among other things.
+*/
+static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
+    0x0020, 0xAC00,                 /* simple LV Hangul */
+    0x0020, 0xAC01,                 /* simple LVT Hangul */
+    0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
+    0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
+    0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
+    0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
+    0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
+    0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
+    0x0020, 0x00E6,                 /* small letter ae, expands */
+    0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
+    0x0020
+};
+enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
+
+static const int32_t rootStandardOffsets[] = {
+    0,  1,2,
+    2,  3,4,4,
+    4,  5,6,6,
+    6,  7,8,8,
+    8,  9,10,11,
+    12, 13,14,15,
+    16, 17,18,19,
+    20, 21,22,23,
+    24, 25,26,26,26,
+    26, 27,28,28,
+    28,
+    29
+};
+enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
+
+static const int32_t rootSearchOffsets[] = {
+    0,  1,2,
+    2,  3,4,4,
+    4,  5,6,6,6,
+    6,  7,8,8,8,8,8,8,
+    8,  9,10,11,
+    12, 13,14,15,
+    16, 17,18,19,20,
+    20, 21,22,22,23,23,23,24,
+    24, 25,26,26,26,
+    26, 27,28,28,
+    28,
+    29
+};
+enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
+
+typedef struct {
+    const char *    locale;
+    const int32_t * offsets;
+    int32_t         offsetsLen;
+} TSCEItem;
+
+static const TSCEItem tsceItems[] = {
+    { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
+#if 1
+    /* No jamo tailorings in Apple version of search collator currently */
+    { "root@collation=search", rootStandardOffsets, kLen_rootStandardOffsets },
+#else
+    /* Use this when we do have jamo tailorings */
+    { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
+#endif
+    { NULL,                    NULL,                0                        }
+};
+
+static void TestSearchCollatorElements(void)
+{
+    const TSCEItem * tsceItemPtr;
+    for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
+        UErrorCode status = U_ZERO_ERROR;
+        UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
+        if ( U_SUCCESS(status) ) {
+            UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
+            if ( U_SUCCESS(status) ) {
+                int32_t offset, element;
+                const int32_t * nextOffsetPtr;
+                const int32_t * limitOffsetPtr;
+
+                nextOffsetPtr = tsceItemPtr->offsets;
+                limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
+                do {
+                    offset = ucol_getOffset(uce);
+                    element = ucol_next(uce, &status);
+                    if ( element == 0 ) {
+                        log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
+                    }
+                    if ( nextOffsetPtr < limitOffsetPtr ) {
+                        if (offset != *nextOffsetPtr) {
+                            log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
+                                                            tsceItemPtr->locale, *nextOffsetPtr, offset );
+                            nextOffsetPtr = limitOffsetPtr;
+                            break;
+                        }
+                        nextOffsetPtr++;
+                    } else {
+                        log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
+                    }
+                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
+                if ( nextOffsetPtr < limitOffsetPtr ) {
+                    log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
+                }
+
+                ucol_setOffset(uce, kLen_tsceText, &status);
+                status = U_ZERO_ERROR;
+                nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
+                limitOffsetPtr = tsceItemPtr->offsets;
+                do {
+                    offset = ucol_getOffset(uce);
+                    element = ucol_previous(uce, &status);
+                    if ( element == 0 ) {
+                        log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
+                    }
+                    if ( nextOffsetPtr > limitOffsetPtr ) {
+                        nextOffsetPtr--;
+                        if (offset != *nextOffsetPtr) {
+                            log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
+                                                                tsceItemPtr->locale, *nextOffsetPtr, offset );
+                            nextOffsetPtr = limitOffsetPtr;
+                            break;
+                        }
+                   } else {
+                        log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
+                    }
+                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
+                if ( nextOffsetPtr > limitOffsetPtr ) {
+                    log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
+                }
+
+                ucol_closeElements(uce);
+            } else {
+                log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
+            }
+            ucol_close(ucol);
+        } else {
+            log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
+        }
+    }
+}
+
  #endif /* #if !UCONFIG_NO_COLLATION */