]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/ucol_tok.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_tok.cpp
index c0f460a8e24c4f1294c30b15c21a1b76a5006cac..ca40cc603be0a8deef18f3aae5879a9588da6ecb 100644 (file)
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2004, International Business Machines
+*   Copyright (C) 2001-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 
 #if !UCONFIG_NO_COLLATION
 
+#include "unicode/uscript.h"
 #include "unicode/ustring.h"
 #include "unicode/uchar.h"
 #include "unicode/uniset.h"
 
-#include "ucol_tok.h"
 #include "cmemory.h"
-#include "util.h"
+#include "cstring.h"
+#include "patternprops.h"
+#include "ucol_bld.h"
+#include "ucol_tok.h"
+#include "ulocimp.h"
+#include "uresimp.h"
+
+// Define this only for debugging.
+// #define DEBUG_FOR_COLL_RULES 1
+
+#ifdef DEBUG_FOR_COLL_RULES
+#include <iostream>
+#endif
+
+U_NAMESPACE_USE
 
 U_CDECL_BEGIN
-static int32_t U_EXPORT2 U_CALLCONV
+static int32_t U_CALLCONV
 uhash_hashTokens(const UHashTok k)
 {
     int32_t hash = 0;
     //uint32_t key = (uint32_t)k.integer;
     UColToken *key = (UColToken *)k.pointer;
     if (key != 0) {
-        //int32_t len = (key & 0xFF000000)>>24;
         int32_t len = (key->source & 0xFF000000)>>24;
         int32_t inc = ((len - 32) / 32) + 1;
 
-        //const UChar *p = (key & 0x00FFFFFF) + rulesToParse;
-        const UChar *p = (key->source & 0x00FFFFFF) + key->rulesToParse;
+        const UChar *p = (key->source & 0x00FFFFFF) + *(key->rulesToParseHdl);
         const UChar *limit = p + len;
 
         while (p<limit) {
@@ -54,15 +66,15 @@ uhash_hashTokens(const UHashTok k)
     return hash;
 }
 
-static UBool U_EXPORT2 U_CALLCONV
+static UBool U_CALLCONV
 uhash_compareTokens(const UHashTok key1, const UHashTok key2)
 {
     //uint32_t p1 = (uint32_t) key1.integer;
     //uint32_t p2 = (uint32_t) key2.integer;
     UColToken *p1 = (UColToken *)key1.pointer;
     UColToken *p2 = (UColToken *)key2.pointer;
-    const UChar *s1 = (p1->source & 0x00FFFFFF) + p1->rulesToParse;
-    const UChar *s2 = (p2->source & 0x00FFFFFF) + p2->rulesToParse;
+    const UChar *s1 = (p1->source & 0x00FFFFFF) + *(p1->rulesToParseHdl);
+    const UChar *s2 = (p2->source & 0x00FFFFFF) + *(p2->rulesToParseHdl);
     uint32_t s1L = ((p1->source & 0xFF000000) >> 24);
     uint32_t s2L = ((p2->source & 0xFF000000) >> 24);
     const UChar *end = s1+s1L-1;
@@ -74,34 +86,60 @@ uhash_compareTokens(const UHashTok key1, const UHashTok key2)
         return FALSE;
     }
     if(s1L != s2L) {
-      return FALSE;
+        return FALSE;
     }
     if(p1->source == p2->source) {
-      return TRUE;
+        return TRUE;
     }
     while((s1 < end) && *s1 == *s2) {
-      ++s1;
-      ++s2;
+        ++s1;
+        ++s2;
     }
     if(*s1 == *s2) {
-      return TRUE;
+        return TRUE;
     } else {
-      return FALSE;
+        return FALSE;
     }
 }
 U_CDECL_END
 
-static inline void U_CALLCONV
-uhash_freeBlockWrapper(void *obj) {
-  uhash_freeBlock(obj);
-}
+/*
+ * Debug messages used to pinpoint where a format error occurred.
+ * A better way is to include context-sensitive information in syntaxError() function.
+ *
+ * To turn this debugging on, either uncomment the following line, or define use -DDEBUG_FOR_FORMAT_ERROR
+ * in the compile line.
+ */
+/* #define DEBUG_FOR_FORMAT_ERROR 1 */
+
+#ifdef DEBUG_FOR_FORMAT_ERROR
+#define DBG_FORMAT_ERROR { printf("U_INVALID_FORMAT_ERROR at line %d", __LINE__);}
+#else
+#define DBG_FORMAT_ERROR
+#endif
+
+
+/*
+ * Controls debug messages so that the output can be compared before and after a
+ * big change.  Prints the information of every code point that comes out of the
+ * collation parser and its strength into a file.  When a big change in format
+ * happens, the files before and after the change should be identical.
+ *
+ * To turn this debugging on, either uncomment the following line, or define use -DDEBUG_FOR_CODE_POINTS
+ * in the compile line.
+ */
+// #define DEBUG_FOR_CODE_POINTS 1
+
+#ifdef DEBUG_FOR_CODE_POINTS
+    FILE* dfcp_fp = NULL;
+#endif
 
 
 typedef struct {
-  uint32_t startCE;
-  uint32_t startContCE;
-  uint32_t limitCE;
-  uint32_t limitContCE;
+    uint32_t startCE;
+    uint32_t startContCE;
+    uint32_t limitCE;
+    uint32_t limitContCE;
 } indirectBoundaries;
 
 /* these values are used for finding CE values for indirect positioning. */
@@ -121,44 +159,44 @@ typedef struct {
 static indirectBoundaries ucolIndirectBoundaries[15];
 /*
 static indirectBoundaries ucolIndirectBoundaries[11] = {
-  { UCOL_RESET_TOP_VALUE,               0,
-    UCOL_NEXT_TOP_VALUE,                0 },
-  { UCOL_FIRST_PRIMARY_IGNORABLE,       0,
-    0,                                  0 },
-  { UCOL_LAST_PRIMARY_IGNORABLE,        UCOL_LAST_PRIMARY_IGNORABLE_CONT,
-    0,                                  0 },
-  { UCOL_FIRST_SECONDARY_IGNORABLE,     0,
-    0,                                  0 },
-  { UCOL_LAST_SECONDARY_IGNORABLE,      0,
-    0,                                  0 },
-  { UCOL_FIRST_TERTIARY_IGNORABLE,      0,
-    0,                                  0 },
-  { UCOL_LAST_TERTIARY_IGNORABLE,       0,
-    0,                                  0 },
-  { UCOL_FIRST_VARIABLE,                0,
-    0,                                  0 },
-  { UCOL_LAST_VARIABLE,                 0,
-    0,                                  0 },
-  { UCOL_FIRST_NON_VARIABLE,            0,
-    0,                                  0 },
-  { UCOL_LAST_NON_VARIABLE,             0,
-    0,                                  0 },
+{ UCOL_RESET_TOP_VALUE,               0,
+UCOL_NEXT_TOP_VALUE,                0 },
+{ UCOL_FIRST_PRIMARY_IGNORABLE,       0,
+0,                                  0 },
+{ UCOL_LAST_PRIMARY_IGNORABLE,        UCOL_LAST_PRIMARY_IGNORABLE_CONT,
+0,                                  0 },
+{ UCOL_FIRST_SECONDARY_IGNORABLE,     0,
+0,                                  0 },
+{ UCOL_LAST_SECONDARY_IGNORABLE,      0,
+0,                                  0 },
+{ UCOL_FIRST_TERTIARY_IGNORABLE,      0,
+0,                                  0 },
+{ UCOL_LAST_TERTIARY_IGNORABLE,       0,
+0,                                  0 },
+{ UCOL_FIRST_VARIABLE,                0,
+0,                                  0 },
+{ UCOL_LAST_VARIABLE,                 0,
+0,                                  0 },
+{ UCOL_FIRST_NON_VARIABLE,            0,
+0,                                  0 },
+{ UCOL_LAST_NON_VARIABLE,             0,
+0,                                  0 },
 };
 */
 
 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
 
-  // Set values for the top - TODO: once we have values for all the indirects, we are going
-  // to initalize here.
-  ucolIndirectBoundaries[indexR].startCE = start[0];
-  ucolIndirectBoundaries[indexR].startContCE = start[1];
-  if(end) {
-    ucolIndirectBoundaries[indexR].limitCE = end[0];
-    ucolIndirectBoundaries[indexR].limitContCE = end[1];
-  } else {
-    ucolIndirectBoundaries[indexR].limitCE = 0;
-    ucolIndirectBoundaries[indexR].limitContCE = 0;
-  }
+    // Set values for the top - TODO: once we have values for all the indirects, we are going
+    // to initalize here.
+    ucolIndirectBoundaries[indexR].startCE = start[0];
+    ucolIndirectBoundaries[indexR].startContCE = start[1];
+    if(end) {
+        ucolIndirectBoundaries[indexR].limitCE = end[0];
+        ucolIndirectBoundaries[indexR].limitContCE = end[1];
+    } else {
+        ucolIndirectBoundaries[indexR].limitCE = 0;
+        ucolIndirectBoundaries[indexR].limitContCE = 0;
+    }
 }
 
 
@@ -166,12 +204,13 @@ static inline
 void syntaxError(const UChar* rules,
                  int32_t pos,
                  int32_t rulesLen,
-                 UParseError* parseError) {
+                 UParseError* parseError)
+{
     parseError->offset = pos;
     parseError->line = 0 ; /* we are not using line numbers */
 
     // for pre-context
-    int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
+    int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
     int32_t stop  = pos;
 
     u_memcpy(parseError->preContext,rules+start,stop-start);
@@ -181,51 +220,51 @@ void syntaxError(const UChar* rules,
     //for post-context
     start = pos+1;
     stop  = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN-1)) :
-                                                            rulesLen;
+    rulesLen;
 
     if(start < stop) {
-      u_memcpy(parseError->postContext,rules+start,stop-start);
-      //null terminate the buffer
-      parseError->postContext[stop-start]= 0;
+        u_memcpy(parseError->postContext,rules+start,stop-start);
+        //null terminate the buffer
+        parseError->postContext[stop-start]= 0;
     } else {
-      parseError->postContext[0] = 0;
+        parseError->postContext[0] = 0;
     }
 }
 
 static
 void ucol_uprv_tok_setOptionInImage(UColOptionSet *opts, UColAttribute attrib, UColAttributeValue value) {
-  switch(attrib) {
-  case UCOL_HIRAGANA_QUATERNARY_MODE:
-    opts->hiraganaQ = value;
-    break;
-  case UCOL_FRENCH_COLLATION:
-    opts->frenchCollation = value;
-    break;
-  case UCOL_ALTERNATE_HANDLING:
-    opts->alternateHandling = value;
-    break;
-  case UCOL_CASE_FIRST:
-    opts->caseFirst = value;
-    break;
-  case UCOL_CASE_LEVEL:
-    opts->caseLevel = value;
-    break;
-  case UCOL_NORMALIZATION_MODE:
-    opts->normalizationMode = value;
-    break;
-  case UCOL_STRENGTH:
-    opts->strength = value;
-    break;
-  case UCOL_NUMERIC_COLLATION:
-   opts->numericCollation = value;
-   break;
-  case UCOL_ATTRIBUTE_COUNT:
-  default:
-    break;
-  }
+    switch(attrib) {
+    case UCOL_HIRAGANA_QUATERNARY_MODE:
+        opts->hiraganaQ = value;
+        break;
+    case UCOL_FRENCH_COLLATION:
+        opts->frenchCollation = value;
+        break;
+    case UCOL_ALTERNATE_HANDLING:
+        opts->alternateHandling = value;
+        break;
+    case UCOL_CASE_FIRST:
+        opts->caseFirst = value;
+        break;
+    case UCOL_CASE_LEVEL:
+        opts->caseLevel = value;
+        break;
+    case UCOL_NORMALIZATION_MODE:
+        opts->normalizationMode = value;
+        break;
+    case UCOL_STRENGTH:
+        opts->strength = value;
+        break;
+    case UCOL_NUMERIC_COLLATION:
+        opts->numericCollation = value;
+        break;
+    case UCOL_ATTRIBUTE_COUNT:
+    default:
+        break;
+    }
 }
 
-#define UTOK_OPTION_COUNT 20
+#define UTOK_OPTION_COUNT 22
 
 static UBool didInit = FALSE;
 /* we can be strict, or we can be lenient */
@@ -273,7 +312,8 @@ U_STRING_DECL(option_16,    "last",           4);
 U_STRING_DECL(option_17,    "optimize",       8);
 U_STRING_DECL(option_18,    "suppressContractions",         20);
 U_STRING_DECL(option_19,    "numericOrdering",              15);
-
+U_STRING_DECL(option_20,    "import",         6);
+U_STRING_DECL(option_21,    "reorder",         7);
 
 /*
 [last variable] last variable value
@@ -285,51 +325,51 @@ U_STRING_DECL(option_19,    "numericOrdering",              15);
 
 
 static const ucolTokSuboption alternateSub[2] = {
-  {suboption_00, 13, UCOL_NON_IGNORABLE},
-  {suboption_01,  7, UCOL_SHIFTED}
+    {suboption_00, 13, UCOL_NON_IGNORABLE},
+    {suboption_01,  7, UCOL_SHIFTED}
 };
 
 static const ucolTokSuboption caseFirstSub[3] = {
-  {suboption_02, 5, UCOL_LOWER_FIRST},
-  {suboption_03,  5, UCOL_UPPER_FIRST},
-  {suboption_04,  3, UCOL_OFF},
+    {suboption_02, 5, UCOL_LOWER_FIRST},
+    {suboption_03,  5, UCOL_UPPER_FIRST},
+    {suboption_04,  3, UCOL_OFF},
 };
 
 static const ucolTokSuboption onOffSub[2] = {
-  {suboption_04, 3, UCOL_OFF},
-  {suboption_05, 2, UCOL_ON}
+    {suboption_04, 3, UCOL_OFF},
+    {suboption_05, 2, UCOL_ON}
 };
 
 static const ucolTokSuboption frenchSub[1] = {
-  {suboption_07, 1, UCOL_ON}
+    {suboption_07, 1, UCOL_ON}
 };
 
 static const ucolTokSuboption beforeSub[3] = {
-  {suboption_06, 1, UCOL_PRIMARY},
-  {suboption_07, 1, UCOL_SECONDARY},
-  {suboption_08, 1, UCOL_TERTIARY}
+    {suboption_06, 1, UCOL_PRIMARY},
+    {suboption_07, 1, UCOL_SECONDARY},
+    {suboption_08, 1, UCOL_TERTIARY}
 };
 
 static const ucolTokSuboption strengthSub[5] = {
-  {suboption_06, 1, UCOL_PRIMARY},
-  {suboption_07, 1, UCOL_SECONDARY},
-  {suboption_08, 1, UCOL_TERTIARY},
-  {suboption_09, 1, UCOL_QUATERNARY},
-  {suboption_10, 1, UCOL_IDENTICAL},
+    {suboption_06, 1, UCOL_PRIMARY},
+    {suboption_07, 1, UCOL_SECONDARY},
+    {suboption_08, 1, UCOL_TERTIARY},
+    {suboption_09, 1, UCOL_QUATERNARY},
+    {suboption_10, 1, UCOL_IDENTICAL},
 };
 
 static const ucolTokSuboption firstLastSub[7] = {
-  {suboption_11, 7, UCOL_PRIMARY},
-  {suboption_12, 9, UCOL_PRIMARY},
-  {suboption_13, 8, UCOL_PRIMARY},
-  {suboption_14, 8, UCOL_PRIMARY},
-  {suboption_15, 7, UCOL_PRIMARY},
-  {suboption_16, 8, UCOL_PRIMARY},
-  {suboption_17, 8, UCOL_PRIMARY},
+    {suboption_11, 7, UCOL_PRIMARY},
+    {suboption_12, 9, UCOL_PRIMARY},
+    {suboption_13, 8, UCOL_PRIMARY},
+    {suboption_14, 8, UCOL_PRIMARY},
+    {suboption_15, 7, UCOL_PRIMARY},
+    {suboption_16, 8, UCOL_PRIMARY},
+    {suboption_17, 8, UCOL_PRIMARY},
 };
 
 enum OptionNumber {
-  OPTION_ALTERNATE_HANDLING = 0,
+    OPTION_ALTERNATE_HANDLING = 0,
     OPTION_FRENCH_COLLATION,
     OPTION_CASE_LEVEL,
     OPTION_CASE_FIRST,
@@ -349,36 +389,40 @@ enum OptionNumber {
     OPTION_UNDEFINED,
     OPTION_SCRIPT_ORDER,
     OPTION_CHARSET_NAME,
-    OPTION_CHARSET
+    OPTION_CHARSET,
+    OPTION_IMPORT,
+    OPTION_SCRIPTREORDER
 } ;
 
 static const ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
- /*00*/ {option_02,  9, alternateSub, 2, UCOL_ALTERNATE_HANDLING}, /*"alternate" */
- /*01*/ {option_03,  9, frenchSub, 1, UCOL_FRENCH_COLLATION}, /*"backwards"      */
- /*02*/ {option_07,  9, onOffSub, 2, UCOL_CASE_LEVEL},  /*"caseLevel"      */
- /*03*/ {option_08,  9, caseFirstSub, 3, UCOL_CASE_FIRST}, /*"caseFirst"   */
- /*04*/ {option_06, 13, onOffSub, 2, UCOL_NORMALIZATION_MODE}, /*"normalization" */
- /*05*/ {option_13, 9, onOffSub, 2, UCOL_HIRAGANA_QUATERNARY_MODE}, /*"hiraganaQ" */
- /*06*/ {option_14, 8, strengthSub, 5, UCOL_STRENGTH}, /*"strength" */
- /*07*/ {option_19, 15, onOffSub, 2, UCOL_NUMERIC_COLLATION},  /*"numericOrdering"*/
- /*08*/ {option_04, 12, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"variable top"   */
- /*09*/ {option_01,  9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"rearrange"      */
- /*10*/ {option_12,  6, beforeSub, 3, UCOL_ATTRIBUTE_COUNT}, /*"before"    */
- /*11*/ {option_05,  3, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"top"            */
- /*12*/ {option_15,  5, firstLastSub, 7, UCOL_ATTRIBUTE_COUNT}, /*"first" */
- /*13*/ {option_16,  4, firstLastSub, 7, UCOL_ATTRIBUTE_COUNT}, /*"last" */
- /*14*/ {option_17,  8, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"optimize"      */
- /*15*/ {option_18, 20, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"suppressContractions"      */
- /*16*/ {option_00,  9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"undefined"      */
- /*17*/ {option_09, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"scriptOrder"    */
- /*18*/ {option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname"    */
- /*19*/ {option_11,  7, NULL, 0, UCOL_ATTRIBUTE_COUNT}  /*"charset"        */
+    /*00*/ {option_02,  9, alternateSub, 2, UCOL_ALTERNATE_HANDLING}, /*"alternate" */
+    /*01*/ {option_03,  9, frenchSub, 1, UCOL_FRENCH_COLLATION}, /*"backwards"      */
+    /*02*/ {option_07,  9, onOffSub, 2, UCOL_CASE_LEVEL},  /*"caseLevel"      */
+    /*03*/ {option_08,  9, caseFirstSub, 3, UCOL_CASE_FIRST}, /*"caseFirst"   */
+    /*04*/ {option_06, 13, onOffSub, 2, UCOL_NORMALIZATION_MODE}, /*"normalization" */
+    /*05*/ {option_13, 9, onOffSub, 2, UCOL_HIRAGANA_QUATERNARY_MODE}, /*"hiraganaQ" */
+    /*06*/ {option_14, 8, strengthSub, 5, UCOL_STRENGTH}, /*"strength" */
+    /*07*/ {option_19, 15, onOffSub, 2, UCOL_NUMERIC_COLLATION},  /*"numericOrdering"*/
+    /*08*/ {option_04, 12, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"variable top"   */
+    /*09*/ {option_01,  9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"rearrange"      */
+    /*10*/ {option_12,  6, beforeSub, 3, UCOL_ATTRIBUTE_COUNT}, /*"before"    */
+    /*11*/ {option_05,  3, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"top"            */
+    /*12*/ {option_15,  5, firstLastSub, 7, UCOL_ATTRIBUTE_COUNT}, /*"first" */
+    /*13*/ {option_16,  4, firstLastSub, 7, UCOL_ATTRIBUTE_COUNT}, /*"last" */
+    /*14*/ {option_17,  8, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"optimize"      */
+    /*15*/ {option_18, 20, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"suppressContractions"      */
+    /*16*/ {option_00,  9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"undefined"      */
+    /*17*/ {option_09, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"scriptOrder"    */
+    /*18*/ {option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname"    */
+    /*19*/ {option_11,  7, NULL, 0, UCOL_ATTRIBUTE_COUNT},  /*"charset"        */
+    /*20*/ {option_20,  6, NULL, 0, UCOL_ATTRIBUTE_COUNT},  /*"import"        */
+    /*21*/ {option_21,  7, NULL, 0, UCOL_ATTRIBUTE_COUNT}  /*"reorder"        */
 };
 
 static
 int32_t u_strncmpNoCase(const UChar     *s1,
-     const UChar     *s2,
-     int32_t     n)
+                        const UChar     *s2,
+                        int32_t     n)
 {
     if(n > 0) {
         int32_t rc;
@@ -396,52 +440,54 @@ int32_t u_strncmpNoCase(const UChar     *s1,
 
 static
 void ucol_uprv_tok_initData() {
-  if(!didInit) {
-    U_STRING_INIT(suboption_00, "non-ignorable", 13);
-    U_STRING_INIT(suboption_01, "shifted",        7);
-
-    U_STRING_INIT(suboption_02, "lower",          5);
-    U_STRING_INIT(suboption_03, "upper",          5);
-    U_STRING_INIT(suboption_04, "off",            3);
-    U_STRING_INIT(suboption_05, "on",             2);
-
-    U_STRING_INIT(suboption_06, "1",              1);
-    U_STRING_INIT(suboption_07, "2",              1);
-    U_STRING_INIT(suboption_08, "3",              1);
-    U_STRING_INIT(suboption_09, "4",              1);
-    U_STRING_INIT(suboption_10, "I",              1);
-
-    U_STRING_INIT(suboption_11, "primary",        7);
-    U_STRING_INIT(suboption_12, "secondary",      9);
-    U_STRING_INIT(suboption_13, "tertiary",       8);
-    U_STRING_INIT(suboption_14, "variable",       8);
-    U_STRING_INIT(suboption_15, "regular",        7);
-    U_STRING_INIT(suboption_16, "implicit",       8);
-    U_STRING_INIT(suboption_17, "trailing",       8);
-
-
-    U_STRING_INIT(option_00, "undefined",      9);
-    U_STRING_INIT(option_01, "rearrange",      9);
-    U_STRING_INIT(option_02, "alternate",      9);
-    U_STRING_INIT(option_03, "backwards",      9);
-    U_STRING_INIT(option_04, "variable top",  12);
-    U_STRING_INIT(option_05, "top",            3);
-    U_STRING_INIT(option_06, "normalization", 13);
-    U_STRING_INIT(option_07, "caseLevel",      9);
-    U_STRING_INIT(option_08, "caseFirst",      9);
-    U_STRING_INIT(option_09, "scriptOrder",   11);
-    U_STRING_INIT(option_10, "charsetname",   11);
-    U_STRING_INIT(option_11, "charset",        7);
-    U_STRING_INIT(option_12, "before",         6);
-    U_STRING_INIT(option_13, "hiraganaQ",      9);
-    U_STRING_INIT(option_14, "strength",       8);
-    U_STRING_INIT(option_15, "first",          5);
-    U_STRING_INIT(option_16, "last",           4);
-    U_STRING_INIT(option_17, "optimize",       8);
-    U_STRING_INIT(option_18, "suppressContractions",         20);
-    U_STRING_INIT(option_19, "numericOrdering",      15);
-    didInit = TRUE;
-  }
+    if(!didInit) {
+        U_STRING_INIT(suboption_00, "non-ignorable", 13);
+        U_STRING_INIT(suboption_01, "shifted",        7);
+
+        U_STRING_INIT(suboption_02, "lower",          5);
+        U_STRING_INIT(suboption_03, "upper",          5);
+        U_STRING_INIT(suboption_04, "off",            3);
+        U_STRING_INIT(suboption_05, "on",             2);
+
+        U_STRING_INIT(suboption_06, "1",              1);
+        U_STRING_INIT(suboption_07, "2",              1);
+        U_STRING_INIT(suboption_08, "3",              1);
+        U_STRING_INIT(suboption_09, "4",              1);
+        U_STRING_INIT(suboption_10, "I",              1);
+
+        U_STRING_INIT(suboption_11, "primary",        7);
+        U_STRING_INIT(suboption_12, "secondary",      9);
+        U_STRING_INIT(suboption_13, "tertiary",       8);
+        U_STRING_INIT(suboption_14, "variable",       8);
+        U_STRING_INIT(suboption_15, "regular",        7);
+        U_STRING_INIT(suboption_16, "implicit",       8);
+        U_STRING_INIT(suboption_17, "trailing",       8);
+
+
+        U_STRING_INIT(option_00, "undefined",      9);
+        U_STRING_INIT(option_01, "rearrange",      9);
+        U_STRING_INIT(option_02, "alternate",      9);
+        U_STRING_INIT(option_03, "backwards",      9);
+        U_STRING_INIT(option_04, "variable top",  12);
+        U_STRING_INIT(option_05, "top",            3);
+        U_STRING_INIT(option_06, "normalization", 13);
+        U_STRING_INIT(option_07, "caseLevel",      9);
+        U_STRING_INIT(option_08, "caseFirst",      9);
+        U_STRING_INIT(option_09, "scriptOrder",   11);
+        U_STRING_INIT(option_10, "charsetname",   11);
+        U_STRING_INIT(option_11, "charset",        7);
+        U_STRING_INIT(option_12, "before",         6);
+        U_STRING_INIT(option_13, "hiraganaQ",      9);
+        U_STRING_INIT(option_14, "strength",       8);
+        U_STRING_INIT(option_15, "first",          5);
+        U_STRING_INIT(option_16, "last",           4);
+        U_STRING_INIT(option_17, "optimize",       8);
+        U_STRING_INIT(option_18, "suppressContractions",         20);
+        U_STRING_INIT(option_19, "numericOrdering",      15);
+        U_STRING_INIT(option_20, "import ",        6);
+        U_STRING_INIT(option_21, "reorder",        7);
+        didInit = TRUE;
+    }
 }
 
 
@@ -449,125 +495,202 @@ void ucol_uprv_tok_initData() {
 // used by data driven tests. Should not support build time options
 U_CAPI const UChar * U_EXPORT2
 ucol_tok_getNextArgument(const UChar *start, const UChar *end,
-                               UColAttribute *attrib, UColAttributeValue *value,
-                               UErrorCode *status) {
-  uint32_t i = 0;
-  int32_t j=0;
-  UBool foundOption = FALSE;
-  const UChar *optionArg = NULL;
+                         UColAttribute *attrib, UColAttributeValue *value,
+                         UErrorCode *status)
+{
+    uint32_t i = 0;
+    int32_t j=0;
+    UBool foundOption = FALSE;
+    const UChar *optionArg = NULL;
 
-  ucol_uprv_tok_initData();
+    ucol_uprv_tok_initData();
 
-  while(start < end && u_isWhitespace(*start)) { /* eat whitespace */
-    start++;
-  }
-  if(start >= end) {
-    return NULL;
-  }
-  /* skip opening '[' */
-  if(*start == 0x005b) {
-    start++;
-  } else {
-    *status = U_ILLEGAL_ARGUMENT_ERROR; // no opening '['
-    return NULL;
-  }
+    while(start < end && PatternProps::isWhiteSpace(*start)) { /* eat whitespace */
+        start++;
+    }
+    if(start >= end) {
+        return NULL;
+    }
+    /* skip opening '[' */
+    if(*start == 0x005b) {
+        start++;
+    } else {
+        *status = U_ILLEGAL_ARGUMENT_ERROR; // no opening '['
+        return NULL;
+    }
 
-  while(i < UTOK_OPTION_COUNT) {
-    if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) {
-      foundOption = TRUE;
-      if(end - start > rulesOptions[i].optionLen) {
-        optionArg = start+rulesOptions[i].optionLen+1; /* start of the options, skip space */
-        while(u_isWhitespace(*optionArg)) { /* eat whitespace */
-          optionArg++;
+    while(i < UTOK_OPTION_COUNT) {
+        if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) {
+            foundOption = TRUE;
+            if(end - start > rulesOptions[i].optionLen) {
+                optionArg = start+rulesOptions[i].optionLen+1; /* start of the options, skip space */
+                while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */
+                    optionArg++;
+                }
+            }
+            break;
         }
-      }
-      break;
+        i++;
     }
-    i++;
-  }
 
-  if(!foundOption) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return NULL;
-  }
+    if(!foundOption) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
 
-  if(optionArg) {
-    for(j = 0; j<rulesOptions[i].subSize; j++) {
-      if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
-        //ucol_uprv_tok_setOptionInImage(src->opts, rulesOptions[i].attr, rulesOptions[i].subopts[j].attrVal);
-        *attrib = rulesOptions[i].attr;
-        *value = rulesOptions[i].subopts[j].attrVal;
-        optionArg += rulesOptions[i].subopts[j].subLen;
-        while(u_isWhitespace(*optionArg)) { /* eat whitespace */
-          optionArg++;
-        }
-        if(*optionArg == 0x005d) {
-          optionArg++;
-          return optionArg;
-        } else {
-          *status = U_ILLEGAL_ARGUMENT_ERROR;
-          return NULL;
+    if(optionArg) {
+        for(j = 0; j<rulesOptions[i].subSize; j++) {
+            if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
+                //ucol_uprv_tok_setOptionInImage(src->opts, rulesOptions[i].attr, rulesOptions[i].subopts[j].attrVal);
+                *attrib = rulesOptions[i].attr;
+                *value = rulesOptions[i].subopts[j].attrVal;
+                optionArg += rulesOptions[i].subopts[j].subLen;
+                while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */
+                    optionArg++;
+                }
+                if(*optionArg == 0x005d) {
+                    optionArg++;
+                    return optionArg;
+                } else {
+                    *status = U_ILLEGAL_ARGUMENT_ERROR;
+                    return NULL;
+                }
+            }
         }
-      }
     }
-  }
-  *status = U_ILLEGAL_ARGUMENT_ERROR;
-  return NULL;
+    *status = U_ILLEGAL_ARGUMENT_ERROR;
+    return NULL;
 }
 
 static
 USet *ucol_uprv_tok_readAndSetUnicodeSet(const UChar *start, const UChar *end, UErrorCode *status) {
-  while(*start != 0x005b) { /* advance while we find the first '[' */
-    start++;
-  }
-  // now we need to get a balanced set of '[]'. The problem is that a set can have
-  // many, and *end point to the first closing '['
-  int32_t noOpenBraces = 1;
-  int32_t current = 1; // skip the opening brace
-  while(start+current < end && noOpenBraces != 0) {
-    if(start[current] == 0x005b) {
-      noOpenBraces++;
-    } else if(start[current] == 0x005D) { // closing brace
-      noOpenBraces--;
-    }
-    current++;
-  }
-  UChar *nextBrace = NULL;
+    while(*start != 0x005b) { /* advance while we find the first '[' */
+        start++;
+    }
+    // now we need to get a balanced set of '[]'. The problem is that a set can have
+    // many, and *end point to the first closing '['
+    int32_t noOpenBraces = 1;
+    int32_t current = 1; // skip the opening brace
+    while(start+current < end && noOpenBraces != 0) {
+        if(start[current] == 0x005b) {
+            noOpenBraces++;
+        } else if(start[current] == 0x005D) { // closing brace
+            noOpenBraces--;
+        }
+        current++;
+    }
 
-  if(noOpenBraces != 0 || (nextBrace = u_strchr(start+current, 0x005d /*']'*/)) == NULL) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return NULL;
-  }
-  return uset_openPattern(start, current, status);
+    if(noOpenBraces != 0 || u_strchr(start+current, 0x005d /*']'*/) == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    return uset_openPattern(start, current, status);
 }
 
+/**
+ * Reads an option and matches the option name with the predefined options. (Case-insensitive.)
+ * @param start Pointer to the start UChar.
+ * @param end Pointer to the last valid pointer beyond which the option will not extend.
+ * @param optionArg Address of the pointer at which the options start (after the option name)
+ * @return The index of the option, or -1 if the option is not valid.
+ */
 static
 int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UChar **optionArg) {
-  int32_t i = 0;
-  ucol_uprv_tok_initData();
+    int32_t i = 0;
+    ucol_uprv_tok_initData();
 
-  while(u_isWhitespace(*start)) { /* eat whitespace */
-    start++;
-  }
-  while(i < UTOK_OPTION_COUNT) {
-    if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) {
-      if(end - start > rulesOptions[i].optionLen) {
-        *optionArg = start+rulesOptions[i].optionLen; /* start of the options*/
-        while(u_isWhitespace(**optionArg)) { /* eat whitespace */
-          (*optionArg)++;
+    while(PatternProps::isWhiteSpace(*start)) { /* eat whitespace */
+        start++;
+    }
+    while(i < UTOK_OPTION_COUNT) {
+        if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) {
+            if(end - start > rulesOptions[i].optionLen) {
+                *optionArg = start+rulesOptions[i].optionLen; /* End of option name; start of the options */
+                while(PatternProps::isWhiteSpace(**optionArg)) { /* eat whitespace */
+                    (*optionArg)++;
+                }
+            }
+            break;
         }
-      }
-      break;
+        i++;
     }
-    i++;
-  }
-  if(i == UTOK_OPTION_COUNT) {
-    i = -1; // didn't find an option
-  }
-  return i;
+    if(i == UTOK_OPTION_COUNT) {
+        i = -1; // didn't find an option
+    }
+    return i;
 }
 
 
+static
+void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status) {
+    int32_t codeCount = 0;
+    int32_t codeIndex = 0;
+    char conversion[64];
+    int32_t tokenLength = 0;
+    const UChar* space;
+    
+    const UChar* current = src->current;
+    const UChar* end = u_memchr(src->current, 0x005d, src->end - src->current);
+
+    // eat leading whitespace
+    while(current < end && u_isWhitespace(*current)) {
+        current++;
+    }
+
+    while(current < end) {    
+        space = u_memchr(current, 0x0020, end - current);
+        space = space == 0 ? end : space;
+        tokenLength = space - current;
+        if (tokenLength < 4) {
+            *status = U_INVALID_FORMAT_ERROR;
+            return;
+        }
+        codeCount++;
+        current += tokenLength;
+        while(current < end && u_isWhitespace(*current)) { /* eat whitespace */
+            ++current;
+        }
+    }
+
+    if (codeCount == 0) {
+        *status = U_INVALID_FORMAT_ERROR;
+    }
+    
+    src->reorderCodesLength = codeCount;
+    src->reorderCodes = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
+    current = src->current;
+    
+    // eat leading whitespace
+    while(current < end && u_isWhitespace(*current)) {
+        current++;
+    }
+
+    while(current < end) {    
+        space = u_memchr(current, 0x0020, end - current);
+        space = space == 0 ? end : space;
+        tokenLength = space - current;
+        if (tokenLength < 4) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        } else {
+            u_UCharsToChars(current, conversion, tokenLength);
+            conversion[tokenLength] = '\0';
+            src->reorderCodes[codeIndex] = ucol_findReorderingEntry(conversion);
+            if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
+                src->reorderCodes[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
+            }
+            if (src->reorderCodes[codeIndex] == USCRIPT_INVALID_CODE) {
+                *status = U_ILLEGAL_ARGUMENT_ERROR;
+            }
+        }
+        codeIndex++;
+        current += tokenLength;
+        while(current < end && u_isWhitespace(*current)) { /* eat whitespace */
+            ++current;
+        }
+    }
+}
+
 // reads and conforms to various options in rules
 // end is the position of the first closing ']'
 // However, some of the options take an UnicodeSet definition
@@ -577,24 +700,24 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh
 // caller will set the current to it.
 static
 uint8_t ucol_uprv_tok_readAndSetOption(UColTokenParser *src, UErrorCode *status) {
-  const UChar* start = src->current;
-  int32_t i = 0;
-  int32_t j=0;
-  const UChar *optionArg = NULL;
+    const UChar* start = src->current;
+    int32_t i = 0;
+    int32_t j=0;
+    const UChar *optionArg = NULL;
 
-  uint8_t result = 0;
+    uint8_t result = 0;
 
-  start++; /*skip opening '['*/
-  i = ucol_uprv_tok_readOption(start, src->end, &optionArg);
-  if(optionArg) {
-    src->current = optionArg;
-  }
+    start++; /*skip opening '['*/
+    i = ucol_uprv_tok_readOption(start, src->end, &optionArg);
+    if(optionArg) {
+        src->current = optionArg;
+    }
 
-  if(i < 0) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-  } else {
-    int32_t noOpenBraces = 1;
-    switch(i) {
+    if(i < 0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+    } else {
+        int32_t noOpenBraces = 1;
+        switch(i) {
     case OPTION_ALTERNATE_HANDLING:
     case OPTION_FRENCH_COLLATION:
     case OPTION_CASE_LEVEL:
@@ -603,123 +726,138 @@ uint8_t ucol_uprv_tok_readAndSetOption(UColTokenParser *src, UErrorCode *status)
     case OPTION_HIRAGANA_QUATERNARY:
     case OPTION_STRENGTH:
     case OPTION_NUMERIC_COLLATION:
-      if(optionArg) {
-        for(j = 0; j<rulesOptions[i].subSize; j++) {
-          if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
-            ucol_uprv_tok_setOptionInImage(src->opts, rulesOptions[i].attr, rulesOptions[i].subopts[j].attrVal);
-            result =  UCOL_TOK_SUCCESS;
-          }
+        if(optionArg) {
+            for(j = 0; j<rulesOptions[i].subSize; j++) {
+                if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
+                    ucol_uprv_tok_setOptionInImage(src->opts, rulesOptions[i].attr, rulesOptions[i].subopts[j].attrVal);
+                    result =  UCOL_TOK_SUCCESS;
+                }
+            }
         }
-      }
-      if(result == 0) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-      }
-      break;
+        if(result == 0) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+        break;
     case OPTION_VARIABLE_TOP:
-      result = UCOL_TOK_SUCCESS | UCOL_TOK_VARIABLE_TOP;
-      break;
+        result = UCOL_TOK_SUCCESS | UCOL_TOK_VARIABLE_TOP;
+        break;
     case OPTION_REARRANGE:
-      result = UCOL_TOK_SUCCESS;
-      break;
+        result = UCOL_TOK_SUCCESS;
+        break;
     case OPTION_BEFORE:
-      if(optionArg) {
-        for(j = 0; j<rulesOptions[i].subSize; j++) {
-          if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
-          result = UCOL_TOK_SUCCESS | rulesOptions[i].subopts[j].attrVal + 1;
-          }
+        if(optionArg) {
+            for(j = 0; j<rulesOptions[i].subSize; j++) {
+                if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
+                    result = UCOL_TOK_SUCCESS | (rulesOptions[i].subopts[j].attrVal + 1);
+                }
+            }
         }
-      }
-      if(result == 0) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-      }
-      break;
+        if(result == 0) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+        break;
     case OPTION_TOP: /* we are going to have an array with structures of limit CEs */
-      /* index to this array will be src->parsedToken.indirectIndex*/
-      src->parsedToken.indirectIndex = 0;
-      result = UCOL_TOK_SUCCESS | UCOL_TOK_TOP;
-      break;
+        /* index to this array will be src->parsedToken.indirectIndex*/
+        src->parsedToken.indirectIndex = 0;
+        result = UCOL_TOK_SUCCESS | UCOL_TOK_TOP;
+        break;
     case OPTION_FIRST:
     case OPTION_LAST: /* first, last */
-      for(j = 0; j<rulesOptions[i].subSize; j++) {
-        if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
-          // the calculation below assumes that OPTION_FIRST and OPTION_LAST are at i and i+1 and that the first
-          // element of indirect boundaries is reserved for top.
-          src->parsedToken.indirectIndex = (uint16_t)(i-OPTION_FIRST+1+j*2);
-          result =  UCOL_TOK_SUCCESS | UCOL_TOK_TOP;;
+        for(j = 0; j<rulesOptions[i].subSize; j++) {
+            if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
+                // the calculation below assumes that OPTION_FIRST and OPTION_LAST are at i and i+1 and that the first
+                // element of indirect boundaries is reserved for top.
+                src->parsedToken.indirectIndex = (uint16_t)(i-OPTION_FIRST+1+j*2);
+                result =  UCOL_TOK_SUCCESS | UCOL_TOK_TOP;;
+            }
         }
-      }
-      if(result == 0) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-      }
-      break;
+        if(result == 0) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+        break;
     case OPTION_OPTIMIZE:
     case OPTION_SUPPRESS_CONTRACTIONS:  // copy and remove are handled before normalization
-      // we need to move end here
-      src->current++; // skip opening brace
-      while(src->current < src->end && noOpenBraces != 0) {
-        if(*src->current == 0x005b) {
-          noOpenBraces++;
-        } else if(*src->current == 0x005D) { // closing brace
-          noOpenBraces--;
+        // we need to move end here
+        src->current++; // skip opening brace
+        while(src->current < src->end && noOpenBraces != 0) {
+            if(*src->current == 0x005b) {
+                noOpenBraces++;
+            } else if(*src->current == 0x005D) { // closing brace
+                noOpenBraces--;
+            }
+            src->current++;
         }
-        src->current++;
-      }
-      result = UCOL_TOK_SUCCESS;
-      break;
+        result = UCOL_TOK_SUCCESS;
+        break;
+    case OPTION_SCRIPTREORDER:
+        ucol_tok_parseScriptReorder(src, status);
+        break;
     default:
-      *status = U_UNSUPPORTED_ERROR;
-      break;
+        *status = U_UNSUPPORTED_ERROR;
+        break;
+        }
     }
-  }
-  src->current = u_memchr(src->current, 0x005d, src->end-src->current);
-  return result;
+    src->current = u_memchr(src->current, 0x005d, (int32_t)(src->end-src->current));
+    return result;
 }
 
 
 inline void ucol_tok_addToExtraCurrent(UColTokenParser *src, const UChar *stuff, int32_t len, UErrorCode *status) {
-      if(src->extraCurrent+len >= src->extraEnd) {
+    if (stuff == NULL || len <= 0) {
+        return;
+    }
+    UnicodeString tempStuff(FALSE, stuff, len);
+    if(src->extraCurrent+len >= src->extraEnd) {
         /* reallocate */
+        if (stuff >= src->source && stuff <= src->end) {
+            // Copy the "stuff" contents into tempStuff's own buffer.
+            // UnicodeString is copy-on-write.
+            if (len > 0) {
+                tempStuff.setCharAt(0, tempStuff[0]);
+            } else {
+                tempStuff.remove();
+            }
+        }
         UChar *newSrc = (UChar *)uprv_realloc(src->source, (src->extraEnd-src->source)*2*sizeof(UChar));
         if(newSrc != NULL) {
-          src->current = newSrc + (src->current - src->source);
-          src->extraCurrent = newSrc + (src->extraCurrent - src->source);
-          src->end = newSrc + (src->end - src->source);
-          src->extraEnd = newSrc + (src->extraEnd-src->source)*2;
-          src->sourceCurrent = newSrc + (src->sourceCurrent-src->source);
-          src->source = newSrc;
+            src->current = newSrc + (src->current - src->source);
+            src->extraCurrent = newSrc + (src->extraCurrent - src->source);
+            src->end = newSrc + (src->end - src->source);
+            src->extraEnd = newSrc + (src->extraEnd-src->source)*2;
+            src->sourceCurrent = newSrc + (src->sourceCurrent-src->source);
+            src->source = newSrc;
         } else {
-          *status = U_MEMORY_ALLOCATION_ERROR;
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return;
         }
-      }
-      if(len == 1) {
-          *src->extraCurrent++ = *stuff;
-      } else {
-        uprv_memcpy(src->extraCurrent, stuff, len*sizeof(UChar));
+    }
+    if(len == 1) {
+        *src->extraCurrent++ = tempStuff[0];
+    } else {
+        u_memcpy(src->extraCurrent, tempStuff.getBuffer(), len);
         src->extraCurrent += len;
-      }
-
-
+    }
 }
 
 inline UBool ucol_tok_doSetTop(UColTokenParser *src, UErrorCode *status) {
-  /*
-  top = TRUE;
-  */
-  UChar buff[5];
-  src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-  buff[0] = 0xFFFE;
-  buff[1] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE >> 16);
-  buff[2] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE & 0xFFFF);
-  if(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE == 0) {
-    src->parsedToken.charsLen = 3;
-    ucol_tok_addToExtraCurrent(src, buff, 3, status);
-  } else {
-    buff[3] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE >> 16);
-    buff[4] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE & 0xFFFF);
-    src->parsedToken.charsLen = 5;
-    ucol_tok_addToExtraCurrent(src, buff, 5, status);
-  }
-  return TRUE;
+    /*
+    top = TRUE;
+    */
+    UChar buff[5];
+    src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+    buff[0] = 0xFFFE;
+    buff[1] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE >> 16);
+    buff[2] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE & 0xFFFF);
+    if(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE == 0) {
+        src->parsedToken.charsLen = 3;
+        ucol_tok_addToExtraCurrent(src, buff, 3, status);
+    } else {
+        buff[3] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE >> 16);
+        buff[4] = (UChar)(ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE & 0xFFFF);
+        src->parsedToken.charsLen = 5;
+        ucol_tok_addToExtraCurrent(src, buff, 5, status);
+    }
+    return TRUE;
 }
 
 static UBool isCharNewLine(UChar c){
@@ -736,596 +874,814 @@ static UBool isCharNewLine(UChar c){
     }
 }
 
-U_CAPI const UChar* U_EXPORT2
-ucol_tok_parseNextToken(UColTokenParser *src,
-                        UBool startOfRules,
-                        UParseError *parseError,
-                        UErrorCode *status) {
-/* parsing part */
-  UBool variableTop = FALSE;
-  UBool top = FALSE;
-  UBool inChars = TRUE;
-  UBool inQuote = FALSE;
-  UBool wasInQuote = FALSE;
-  UChar *optionEnd = NULL;
-  uint8_t before = 0;
-  UBool isEscaped = FALSE;
-  // TODO: replace these variables with src->parsedToken counterparts
-  // no need to use them anymore since we have src->parsedToken.
-  // Ideally, token parser would be a nice class... Once, when I have
-  // more time (around 2020 probably).
-  uint32_t newExtensionLen = 0;
-  uint32_t extensionOffset = 0;
-  uint32_t newStrength = UCOL_TOK_UNSET;
-  UChar buff[10];
-
-  src->parsedToken.charsOffset = 0;  src->parsedToken.charsLen = 0;
-  src->parsedToken.prefixOffset = 0; src->parsedToken.prefixLen = 0;
-  src->parsedToken.indirectIndex = 0;
-
-  while (src->current < src->end) {
-    UChar ch = *(src->current);
-
-    if (inQuote) {
-      if (ch == 0x0027/*'\''*/) {
-          inQuote = FALSE;
-      } else {
-        if ((src->parsedToken.charsLen == 0) || inChars) {
-          if(src->parsedToken.charsLen == 0) {
-            src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-          }
-          src->parsedToken.charsLen++;
-        } else {
-          if(newExtensionLen == 0) {
-            extensionOffset = (uint32_t)(src->extraCurrent - src->source);
-          }
-          newExtensionLen++;
-        }
-      }
-    }else if(isEscaped){
-      isEscaped =FALSE;
-      if (newStrength == UCOL_TOK_UNSET) {
-        *status = U_INVALID_FORMAT_ERROR;
-        syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-        return NULL;
-        // enabling rules to start with non-tokens a < b
-        // newStrength = UCOL_TOK_RESET;
-      }
-      if(ch != 0x0000  && src->current != src->end) {
-          if (inChars) {
-            if(src->parsedToken.charsLen == 0) {
-              src->parsedToken.charsOffset = (uint32_t)(src->current - src->source);
-            }
-            src->parsedToken.charsLen++;
-          } else {
-            if(newExtensionLen == 0) {
-              extensionOffset = (uint32_t)(src->current - src->source);
-            }
-            newExtensionLen++;
-          }
-      }
-    }else {
-      if(!uprv_isRuleWhiteSpace(ch)) {
-        /* Sets the strength for this entry */
-        switch (ch) {
-          case 0x003D/*'='*/ :
-            if (newStrength != UCOL_TOK_UNSET) {
-              goto EndOfLoop;
-            }
+/*
+ * This function is called several times when a range is processed.  Each time, the next code point
+ * is processed.
+ * The following variables must be set before calling this function:
+ *   src->currentRangeCp:  The current code point to process.
+ *   src->lastRangeCp: The last code point in the range.
+ * Pre-requisite: src->currentRangeCp <= src->lastRangeCp.
+ */
+static const UChar*
+ucol_tok_processNextCodePointInRange(UColTokenParser *src,
+                                     UErrorCode *status)
+{
+  // Append current code point to source
+  UChar buff[U16_MAX_LENGTH];
+  uint32_t i = 0;
 
-            /* if we start with strength, we'll reset to top */
-            if(startOfRules == TRUE) {
-              src->parsedToken.indirectIndex = 5;
-              top = ucol_tok_doSetTop(src, status);
-              newStrength = UCOL_TOK_RESET;
-              goto EndOfLoop;
-            }
-            newStrength = UCOL_IDENTICAL;
-            break;
+  uint32_t nChars = U16_LENGTH(src->currentRangeCp);
+  src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+  src->parsedToken.charsLen = nChars;
 
-          case 0x002C/*','*/:
-            if (newStrength != UCOL_TOK_UNSET) {
-              goto EndOfLoop;
-            }
+  U16_APPEND_UNSAFE(buff, i, src->currentRangeCp);
+  ucol_tok_addToExtraCurrent(src, buff, nChars, status);
 
-            /* if we start with strength, we'll reset to top */
-            if(startOfRules == TRUE) {
-              src->parsedToken.indirectIndex = 5;
-              top = ucol_tok_doSetTop(src, status);
-              newStrength = UCOL_TOK_RESET;
-              goto EndOfLoop;
-            }
-            newStrength = UCOL_TERTIARY;
-            break;
+  ++src->currentRangeCp;
+  if (src->currentRangeCp > src->lastRangeCp) {
+    src->inRange = FALSE;
 
-          case  0x003B/*';'*/:
-            if (newStrength != UCOL_TOK_UNSET) {
-              goto EndOfLoop;
-            }
+    if (src->currentStarredCharIndex > src->lastStarredCharIndex) {
+      src->isStarred = FALSE;
+    }
+  } else {
+    src->previousCp = src->currentRangeCp;
+  }
+  return src->current;
+}
 
-            /* if we start with strength, we'll reset to top */
-            if(startOfRules == TRUE) {
-              src->parsedToken.indirectIndex = 5;
-              top = ucol_tok_doSetTop(src, status);
-              newStrength = UCOL_TOK_RESET;
-              goto EndOfLoop;
-            }
-            newStrength = UCOL_SECONDARY;
-            break;
+/*
+ * This function is called several times when a starred list is processed.  Each time, the next code point
+ * in the list is processed.
+ * The following variables must be set before calling this function:
+ *   src->currentStarredCharIndex:  Index (in src->source) of the first char of the current code point.
+ *   src->lastStarredCharIndex: Index to the last character in the list.
+ * Pre-requisite: src->currentStarredCharIndex <= src->lastStarredCharIndex.
+ */
+static const UChar*
+ucol_tok_processNextTokenInStarredList(UColTokenParser *src)
+{
+  // Extract the characters corresponding to the next code point.
+  UChar32 cp;
+  src->parsedToken.charsOffset = src->currentStarredCharIndex;
+  int32_t prev = src->currentStarredCharIndex;
+  U16_NEXT(src->source, src->currentStarredCharIndex, (uint32_t)(src->end - src->source), cp);
+  src->parsedToken.charsLen = src->currentStarredCharIndex - prev;
+
+  // When we are done parsing the starred string, turn the flag off so that
+  // the normal processing is restored.
+  if (src->currentStarredCharIndex > src->lastStarredCharIndex) {
+    src->isStarred = FALSE;
+  }
+  src->previousCp = cp;
+  return src->current;
+}
 
-          case 0x003C/*'<'*/:
-            if (newStrength != UCOL_TOK_UNSET) {
-              goto EndOfLoop;
+/*
+ * Partially parses the next token, keeps the indices in src->parsedToken, and updates the counters.
+ *
+ * This routine parses and separates almost all tokens. The following are the syntax characters recognized.
+ *  # : Comment character
+ *  & : Reset operator
+ *  = : Equality
+ *  < : Primary collation
+ *  << : Secondary collation
+ *  <<< : Tertiary collation
+ *  ; : Secondary collation
+ *  , : Tertiary collation
+ *  / : Expansions
+ *  | : Prefix
+ *  - : Range
+
+ *  ! : Java Thai modifier, ignored
+ *  @ : French only
+
+ * [] : Options
+ * '' : Quotes
+ *
+ *  Along with operators =, <, <<, <<<, the operator * is supported to indicate a list.  For example, &a<*bcdexyz
+ *  is equivalent to &a<b<c<d<e<x<y<z.  In lists, ranges also can be given, so &a*b-ex-z is equivalent to the above.
+ *  This function do not separate the tokens in a list.  Instead, &a<*b-ex-z is parsed as three tokens - "&a",
+ *  "<*b", "-ex", "-z".  The strength (< in this case), whether in a list, whether in a range and the previous
+ *  character returned as cached so that the calling program can do further splitting.
+ */
+static const UChar*
+ucol_tok_parseNextTokenInternal(UColTokenParser *src,
+                                UBool startOfRules,
+                                UParseError *parseError,
+                                UErrorCode *status)
+{
+    UBool variableTop = FALSE;
+    UBool top = FALSE;
+    UBool inChars = TRUE;
+    UBool inQuote = FALSE;
+    UBool wasInQuote = FALSE;
+    uint8_t before = 0;
+    UBool isEscaped = FALSE;
+
+    // TODO: replace these variables with src->parsedToken counterparts
+    // no need to use them anymore since we have src->parsedToken.
+    // Ideally, token parser would be a nice class... Once, when I have
+    // more time (around 2020 probably).
+    uint32_t newExtensionLen = 0;
+    uint32_t extensionOffset = 0;
+    uint32_t newStrength = UCOL_TOK_UNSET;
+    UChar buff[10];
+
+    src->parsedToken.charsOffset = 0;  src->parsedToken.charsLen = 0;
+    src->parsedToken.prefixOffset = 0; src->parsedToken.prefixLen = 0;
+    src->parsedToken.indirectIndex = 0;
+
+    while (src->current < src->end) {
+        UChar ch = *(src->current);
+
+        if (inQuote) {
+            if (ch == 0x0027/*'\''*/) {
+                inQuote = FALSE;
+            } else {
+                if ((src->parsedToken.charsLen == 0) || inChars) {
+                    if(src->parsedToken.charsLen == 0) {
+                        src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+                    }
+                    src->parsedToken.charsLen++;
+                } else {
+                    if(newExtensionLen == 0) {
+                        extensionOffset = (uint32_t)(src->extraCurrent - src->source);
+                    }
+                    newExtensionLen++;
+                }
             }
-
-            /* if we start with strength, we'll reset to top */
-            if(startOfRules == TRUE) {
-              src->parsedToken.indirectIndex = 5;
-              top = ucol_tok_doSetTop(src, status);
-              newStrength = UCOL_TOK_RESET;
-              goto EndOfLoop;
+        }else if(isEscaped){
+            isEscaped =FALSE;
+            if (newStrength == UCOL_TOK_UNSET) {
+                *status = U_INVALID_FORMAT_ERROR;
+                syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                DBG_FORMAT_ERROR
+                return NULL;
+                // enabling rules to start with non-tokens a < b
+                // newStrength = UCOL_TOK_RESET;
             }
-            /* before this, do a scan to verify whether this is */
-            /* another strength */
-            if(*(src->current+1) == 0x003C) {
-              src->current++;
-              if(*(src->current+1) == 0x003C) {
-                src->current++; /* three in a row! */
-                newStrength = UCOL_TERTIARY;
-              } else { /* two in a row */
-                newStrength = UCOL_SECONDARY;
-              }
-            } else { /* just one */
-              newStrength = UCOL_PRIMARY;
+            if(ch != 0x0000  && src->current != src->end) {
+                if (inChars) {
+                    if(src->parsedToken.charsLen == 0) {
+                        src->parsedToken.charsOffset = (uint32_t)(src->current - src->source);
+                    }
+                    src->parsedToken.charsLen++;
+                } else {
+                    if(newExtensionLen == 0) {
+                        extensionOffset = (uint32_t)(src->current - src->source);
+                    }
+                    newExtensionLen++;
+                }
             }
-            break;
+        }else {
+            if(!PatternProps::isWhiteSpace(ch)) {
+                /* Sets the strength for this entry */
+                switch (ch) {
+                case 0x003D/*'='*/ :
+                    if (newStrength != UCOL_TOK_UNSET) {
+                        goto EndOfLoop;
+                    }
 
-          case 0x0026/*'&'*/:
-            if (newStrength != UCOL_TOK_UNSET) {
-              /**/
-              goto EndOfLoop;
-            }
+                    /* if we start with strength, we'll reset to top */
+                    if(startOfRules == TRUE) {
+                        src->parsedToken.indirectIndex = 5;
+                        top = ucol_tok_doSetTop(src, status);
+                        newStrength = UCOL_TOK_RESET;
+                        goto EndOfLoop;
+                    }
+                    newStrength = UCOL_IDENTICAL;
+                    if(*(src->current+1) == 0x002A) {/*'*'*/
+                        src->current++;
+                        src->isStarred = TRUE;
+                    }
+                    break;
 
-            newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
-            break;
+                case 0x002C/*','*/:
+                    if (newStrength != UCOL_TOK_UNSET) {
+                        goto EndOfLoop;
+                    }
 
-          case 0x005b/*'['*/:
-            /* options - read an option, analyze it */
-            if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) {
-              uint8_t result = ucol_uprv_tok_readAndSetOption(src, status);
-              //src->current = optionEnd;
-              if(U_SUCCESS(*status)) {
-                if(result & UCOL_TOK_TOP) {
-                  if(newStrength == UCOL_TOK_RESET) {
-                    top = ucol_tok_doSetTop(src, status);
-                    if(before) { // This is a combination of before and indirection like '&[before 2][first regular]<b'
-                      src->parsedToken.charsLen+=2;
-                      buff[0] = 0x002d;
-                      buff[1] = before;
-                      ucol_tok_addToExtraCurrent(src, buff, 2, status);
+                    /* if we start with strength, we'll reset to top */
+                    if(startOfRules == TRUE) {
+                        src->parsedToken.indirectIndex = 5;
+                        top = ucol_tok_doSetTop(src, status);
+                        newStrength = UCOL_TOK_RESET;
+                        goto EndOfLoop;
                     }
+                    newStrength = UCOL_TERTIARY;
+                    break;
 
-                    src->current++;
-                    goto EndOfLoop;
-                  } else {
-                    *status = U_INVALID_FORMAT_ERROR;
-                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-                  }
-                } else if(result & UCOL_TOK_VARIABLE_TOP) {
-                  if(newStrength != UCOL_TOK_RESET && newStrength != UCOL_TOK_UNSET) {
-                    variableTop = TRUE;
-                    src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-                    src->parsedToken.charsLen = 1;
-                    buff[0] = 0xFFFF;
-                    ucol_tok_addToExtraCurrent(src, buff, 1, status);
-                    src->current++;
-                    goto EndOfLoop;
-                  } else {
-                    *status = U_INVALID_FORMAT_ERROR;
-                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-                  }
-                } else if (result & UCOL_TOK_BEFORE){
-                  if(newStrength == UCOL_TOK_RESET) {
-                    before = result & UCOL_TOK_BEFORE;
-                  } else {
-                    *status = U_INVALID_FORMAT_ERROR;
-                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                case  0x003B/*';'*/:
+                    if (newStrength != UCOL_TOK_UNSET) {
+                        goto EndOfLoop;
+                    }
 
-                  }
-                }
-              } else {
-                *status = U_INVALID_FORMAT_ERROR;
-                syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-                return NULL;
-              }
-            }
-            break;
-          case 0x0021/*! skip java thai modifier reordering*/:
-              break;
-          case 0x002F/*'/'*/:
-            wasInQuote = FALSE; /* if we were copying source characters, we want to stop now */
-            inChars = FALSE; /* we're now processing expansion */
-            break;
-          case 0x005C /* back slash for escaped chars */:
-              isEscaped = TRUE;
-              break;
-          /* found a quote, we're gonna start copying */
-          case 0x0027/*'\''*/:
-            if (newStrength == UCOL_TOK_UNSET) { /* quote is illegal until we have a strength */
-              *status = U_INVALID_FORMAT_ERROR;
-              syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-              return NULL;
-              // enabling rules to start with a non-token character a < b
-              // newStrength = UCOL_TOK_RESET;
-            }
+                    /* if we start with strength, we'll reset to top */
+                    if(startOfRules == TRUE) {
+                        src->parsedToken.indirectIndex = 5;
+                        top = ucol_tok_doSetTop(src, status);
+                        newStrength = UCOL_TOK_RESET;
+                        goto EndOfLoop;
+                    }
+                    newStrength = UCOL_SECONDARY;
+                    break;
 
-            inQuote = TRUE;
-
-            if(inChars) { /* we're doing characters */
-              if(wasInQuote == FALSE) {
-                src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-              }
-              if (src->parsedToken.charsLen != 0) {
-                  ucol_tok_addToExtraCurrent(src, src->current - src->parsedToken.charsLen, src->parsedToken.charsLen, status);
-              }
-              src->parsedToken.charsLen++;
-            } else { /* we're doing an expansion */
-              if(wasInQuote == FALSE) {
-                extensionOffset = (uint32_t)(src->extraCurrent - src->source);
-              }
-              if (newExtensionLen != 0) {
-                ucol_tok_addToExtraCurrent(src, src->current - newExtensionLen, newExtensionLen, status);
-              }
-              newExtensionLen++;
-            }
+                case 0x003C/*'<'*/:
+                    if (newStrength != UCOL_TOK_UNSET) {
+                        goto EndOfLoop;
+                    }
 
-            wasInQuote = TRUE;
+                    /* if we start with strength, we'll reset to top */
+                    if(startOfRules == TRUE) {
+                        src->parsedToken.indirectIndex = 5;
+                        top = ucol_tok_doSetTop(src, status);
+                        newStrength = UCOL_TOK_RESET;
+                        goto EndOfLoop;
+                    }
+                    /* before this, do a scan to verify whether this is */
+                    /* another strength */
+                    if(*(src->current+1) == 0x003C) {
+                        src->current++;
+                        if(*(src->current+1) == 0x003C) {
+                            src->current++; /* three in a row! */
+                            newStrength = UCOL_TERTIARY;
+                        } else { /* two in a row */
+                            newStrength = UCOL_SECONDARY;
+                        }
+                    } else { /* just one */
+                        newStrength = UCOL_PRIMARY;
+                    }
+                    if(*(src->current+1) == 0x002A) {/*'*'*/
+                        src->current++;
+                        src->isStarred = TRUE;
+                    }
+                    break;
 
-            ch = *(++(src->current));
-            if(ch == 0x0027) { /* copy the double quote */
-              ucol_tok_addToExtraCurrent(src, &ch, 1, status);
-              inQuote = FALSE;
-            }
-            break;
+                case 0x0026/*'&'*/:
+                    if (newStrength != UCOL_TOK_UNSET) {
+                        /**/
+                        goto EndOfLoop;
+                    }
 
-          /* '@' is french only if the strength is not currently set */
-          /* if it is, it's just a regular character in collation rules */
-          case 0x0040/*'@'*/:
-            if (newStrength == UCOL_TOK_UNSET) {
-              src->opts->frenchCollation = UCOL_ON;
-              break;
-            }
+                    newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
+                    break;
+
+                case 0x005b/*'['*/:
+                    /* options - read an option, analyze it */
+                    if(u_strchr(src->current, 0x005d /*']'*/) != NULL) {
+                        uint8_t result = ucol_uprv_tok_readAndSetOption(src, status);
+                        if(U_SUCCESS(*status)) {
+                            if(result & UCOL_TOK_TOP) {
+                                if(newStrength == UCOL_TOK_RESET) {
+                                    top = ucol_tok_doSetTop(src, status);
+                                    if(before) { // This is a combination of before and indirection like '&[before 2][first regular]<b'
+                                        src->parsedToken.charsLen+=2;
+                                        buff[0] = 0x002d;
+                                        buff[1] = before;
+                                        ucol_tok_addToExtraCurrent(src, buff, 2, status);
+                                    }
+
+                                    src->current++;
+                                    goto EndOfLoop;
+                                } else {
+                                    *status = U_INVALID_FORMAT_ERROR;
+                                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                                    DBG_FORMAT_ERROR
+                                }
+                            } else if(result & UCOL_TOK_VARIABLE_TOP) {
+                                if(newStrength != UCOL_TOK_RESET && newStrength != UCOL_TOK_UNSET) {
+                                    variableTop = TRUE;
+                                    src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+                                    src->parsedToken.charsLen = 1;
+                                    buff[0] = 0xFFFF;
+                                    ucol_tok_addToExtraCurrent(src, buff, 1, status);
+                                    src->current++;
+                                    goto EndOfLoop;
+                                } else {
+                                    *status = U_INVALID_FORMAT_ERROR;
+                                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                                    DBG_FORMAT_ERROR
+                                }
+                            } else if (result & UCOL_TOK_BEFORE){
+                                if(newStrength == UCOL_TOK_RESET) {
+                                    before = result & UCOL_TOK_BEFORE;
+                                } else {
+                                    *status = U_INVALID_FORMAT_ERROR;
+                                    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                                    DBG_FORMAT_ERROR
+                                }
+                            }
+                        } else {
+                            *status = U_INVALID_FORMAT_ERROR;
+                            syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                            DBG_FORMAT_ERROR
+                            return NULL;
+                        }
+                    }
+                    break;
+                case 0x0021/*! skip java thai modifier reordering*/:
+                    break;
+                case 0x002F/*'/'*/:
+                    wasInQuote = FALSE; /* if we were copying source characters, we want to stop now */
+                    inChars = FALSE; /* we're now processing expansion */
+                    break;
+                case 0x005C /* back slash for escaped chars */:
+                    isEscaped = TRUE;
+                    break;
+                    /* found a quote, we're gonna start copying */
+                case 0x0027/*'\''*/:
+                    if (newStrength == UCOL_TOK_UNSET) { /* quote is illegal until we have a strength */
+                      *status = U_INVALID_FORMAT_ERROR;
+                      syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                      DBG_FORMAT_ERROR
+                      return NULL;
+                      // enabling rules to start with a non-token character a < b
+                      // newStrength = UCOL_TOK_RESET;
+                    }
 
-          case 0x007C /*|*/: /* this means we have actually been reading prefix part */
-            // we want to store read characters to the prefix part and continue reading
-            // the characters (proper way would be to restart reading the chars, but in
-            // that case we would have to complicate the token hasher, which I do not
-            // intend to play with. Instead, we will do prefixes when prefixes are due
-            // (before adding the elements).
-            src->parsedToken.prefixOffset = src->parsedToken.charsOffset;
-            src->parsedToken.prefixLen = src->parsedToken.charsLen;
-
-            if(inChars) { /* we're doing characters */
-              if(wasInQuote == FALSE) {
-                src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-              }
-              if (src->parsedToken.charsLen != 0) {
-                  ucol_tok_addToExtraCurrent(src, src->current - src->parsedToken.charsLen, src->parsedToken.charsLen, status);
-              }
-              src->parsedToken.charsLen++;
-            }
+                    inQuote = TRUE;
+
+                    if(inChars) { /* we're doing characters */
+                        if(wasInQuote == FALSE) {
+                            src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+                        }
+                        if (src->parsedToken.charsLen != 0) {
+                            ucol_tok_addToExtraCurrent(src, src->current - src->parsedToken.charsLen, src->parsedToken.charsLen, status);
+                        }
+                        src->parsedToken.charsLen++;
+                    } else { /* we're doing an expansion */
+                        if(wasInQuote == FALSE) {
+                            extensionOffset = (uint32_t)(src->extraCurrent - src->source);
+                        }
+                        if (newExtensionLen != 0) {
+                            ucol_tok_addToExtraCurrent(src, src->current - newExtensionLen, newExtensionLen, status);
+                        }
+                        newExtensionLen++;
+                    }
 
-            wasInQuote = TRUE;
+                    wasInQuote = TRUE;
 
-            do {
-              ch = *(++(src->current));
-              // skip whitespace between '|' and the character
-            } while (uprv_isRuleWhiteSpace(ch));
-            break;
+                    ch = *(++(src->current));
+                    if(ch == 0x0027) { /* copy the double quote */
+                        ucol_tok_addToExtraCurrent(src, &ch, 1, status);
+                        inQuote = FALSE;
+                    }
+                    break;
+
+                    /* '@' is french only if the strength is not currently set */
+                    /* if it is, it's just a regular character in collation rules */
+                case 0x0040/*'@'*/:
+                    if (newStrength == UCOL_TOK_UNSET) {
+                        src->opts->frenchCollation = UCOL_ON;
+                        break;
+                    }
 
-            //charsOffset = 0;
-            //newCharsLen = 0;
-            //break; // We want to store the whole prefix/character sequence. If we break
-                     // the '|' is going to get lost.
-          case 0x0023 /*#*/: /* this is a comment, skip everything through the end of line */
-            do {
-                ch = *(++(src->current));
-            } while (!isCharNewLine(ch));
+                case 0x007C /*|*/: /* this means we have actually been reading prefix part */
+                    // we want to store read characters to the prefix part and continue reading
+                    // the characters (proper way would be to restart reading the chars, but in
+                    // that case we would have to complicate the token hasher, which I do not
+                    // intend to play with. Instead, we will do prefixes when prefixes are due
+                    // (before adding the elements).
+                    src->parsedToken.prefixOffset = src->parsedToken.charsOffset;
+                    src->parsedToken.prefixLen = src->parsedToken.charsLen;
+
+                    if(inChars) { /* we're doing characters */
+                        if(wasInQuote == FALSE) {
+                            src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+                        }
+                        if (src->parsedToken.charsLen != 0) {
+                            ucol_tok_addToExtraCurrent(src, src->current - src->parsedToken.charsLen, src->parsedToken.charsLen, status);
+                        }
+                        src->parsedToken.charsLen++;
+                    }
 
-            break;
-          default:
-            if (newStrength == UCOL_TOK_UNSET) {
-              *status = U_INVALID_FORMAT_ERROR;
-              syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-              return NULL;
-            }
+                    wasInQuote = TRUE;
+
+                    do {
+                        ch = *(++(src->current));
+                        // skip whitespace between '|' and the character
+                    } while (PatternProps::isWhiteSpace(ch));
+                    break;
+
+                    //charsOffset = 0;
+                    //newCharsLen = 0;
+                    //break; // We want to store the whole prefix/character sequence. If we break
+                    // the '|' is going to get lost.
+
+                case 0x002D /*-*/: /* A range. */
+                    if (newStrength != UCOL_TOK_UNSET) {
+                      // While processing the pending token, the isStarred field
+                      // is reset, so it needs to be saved for the next
+                      // invocation.
+                      src->savedIsStarred = src->isStarred;
+                      goto EndOfLoop;
+                   }
+                   src->isStarred = src->savedIsStarred;
+
+                   // Ranges are valid only in starred tokens.
+                   if (!src->isStarred) {
+                     *status = U_INVALID_FORMAT_ERROR;
+                     syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                     DBG_FORMAT_ERROR
+                     return NULL;
+                   }
+                   newStrength = src->parsedToken.strength;
+                   src->inRange = TRUE;
+                   break;
+
+                case 0x0023 /*#*/: /* this is a comment, skip everything through the end of line */
+                    do {
+                        ch = *(++(src->current));
+                    } while (!isCharNewLine(ch));
+
+                    break;
+                default:
+                    if (newStrength == UCOL_TOK_UNSET) {
+                      *status = U_INVALID_FORMAT_ERROR;
+                      syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                      DBG_FORMAT_ERROR
+                      return NULL;
+                    }
 
-            if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
-              *status = U_INVALID_FORMAT_ERROR;
-              syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-              return NULL;
-            }
+                    if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
+                        *status = U_INVALID_FORMAT_ERROR;
+                        syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+                        DBG_FORMAT_ERROR
+                        return NULL;
+                    }
+
+                    if(ch == 0x0000 && src->current+1 == src->end) {
+                        break;
+                    }
 
-            if(ch == 0x0000 && src->current+1 == src->end) {
-              break;
+                    if (inChars) {
+                        if(src->parsedToken.charsLen == 0) {
+                            src->parsedToken.charsOffset = (uint32_t)(src->current - src->source);
+                        }
+                        src->parsedToken.charsLen++;
+                    } else {
+                        if(newExtensionLen == 0) {
+                            extensionOffset = (uint32_t)(src->current - src->source);
+                        }
+                        newExtensionLen++;
+                    }
+
+                    break;
+                }
             }
+        }
 
-            if (inChars) {
-              if(src->parsedToken.charsLen == 0) {
-                src->parsedToken.charsOffset = (uint32_t)(src->current - src->source);
-              }
-              src->parsedToken.charsLen++;
-            } else {
-              if(newExtensionLen == 0) {
-                extensionOffset = (uint32_t)(src->current - src->source);
-              }
-              newExtensionLen++;
+        if(wasInQuote) {
+            if(ch != 0x27) {
+                if(inQuote || !PatternProps::isWhiteSpace(ch)) {
+                    ucol_tok_addToExtraCurrent(src, &ch, 1, status);
+                }
             }
+        }
 
-            break;
-          }
-       }
+        src->current++;
     }
 
-    if(wasInQuote) {
-      if(ch != 0x27) {
-          if(inQuote || !uprv_isRuleWhiteSpace(ch)) {
-            ucol_tok_addToExtraCurrent(src, &ch, 1, status);
-          }
-      }
+EndOfLoop:
+    wasInQuote = FALSE;
+    if (newStrength == UCOL_TOK_UNSET) {
+        return NULL;
     }
 
-      src->current++;
+    if (src->parsedToken.charsLen == 0 && top == FALSE) {
+        syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
+        *status = U_INVALID_FORMAT_ERROR;
+        DBG_FORMAT_ERROR
+        return NULL;
     }
 
- EndOfLoop:
-    wasInQuote = FALSE;
-  if (newStrength == UCOL_TOK_UNSET) {
-    return NULL;
+    src->parsedToken.strength = newStrength;
+    src->parsedToken.extensionOffset = extensionOffset;
+    src->parsedToken.extensionLen = newExtensionLen;
+    src->parsedToken.flags = (UCOL_TOK_VARIABLE_TOP * (variableTop?1:0)) | (UCOL_TOK_TOP * (top?1:0)) | before;
+
+    return src->current;
+}
+
+/*
+ * Parses the next token, keeps the indices in src->parsedToken, and updates the counters.
+ * @see ucol_tok_parseNextTokenInternal() for the description of what operators are supported.
+ *
+ * In addition to what ucol_tok_parseNextTokenInternal() does, this function does the following:
+ *  1) ucol_tok_parseNextTokenInternal() returns a range as a single token.  This function separates
+ *     it to separate tokens and returns one by one.  In order to do that, the necessary states are
+ *     cached as member variables of the token parser.
+ *  2) When encountering a range, ucol_tok_parseNextTokenInternal() processes characters up to the
+ *     starting character as a single list token (which is separated into individual characters here)
+ *     and as another list token starting with the last character in the range.  Before expanding it
+ *     as a list of tokens, this function expands the range by filling the intermediate characters and
+ *     returns them one by one as separate tokens.
+ * Necessary checks are done for invalid combinations.
+ */
+U_CAPI const UChar* U_EXPORT2
+ucol_tok_parseNextToken(UColTokenParser *src,
+                        UBool startOfRules,
+                        UParseError *parseError,
+                        UErrorCode *status)
+{
+  const UChar *nextToken;
+
+  if (src->inRange) {
+    // We are not done processing a range.  Continue it.
+    return ucol_tok_processNextCodePointInRange(src, status);
+  } else if (src->isStarred) {
+    // We are not done processing a starred token.  Continue it.
+    return ucol_tok_processNextTokenInStarredList(src);
   }
 
-  if (src->parsedToken.charsLen == 0 && top == FALSE) {
-    syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
-    *status = U_INVALID_FORMAT_ERROR;
+  // Get the next token.
+  nextToken = ucol_tok_parseNextTokenInternal(src, startOfRules, parseError, status);
+
+  if (nextToken == NULL) {
     return NULL;
   }
 
-  src->parsedToken.strength = newStrength;
-  src->parsedToken.extensionOffset = extensionOffset;
-  src->parsedToken.extensionLen = newExtensionLen;
-  src->parsedToken.flags = (UCOL_TOK_VARIABLE_TOP * (variableTop?1:0)) | (UCOL_TOK_TOP * (top?1:0)) | before;
+  if (src->inRange) {
+    // A new range has started.
+    // Check whether it is a chain of ranges with more than one hyphen.
+    if (src->lastRangeCp > 0 && src->lastRangeCp == src->previousCp) {
+        *status = U_INVALID_FORMAT_ERROR;
+        syntaxError(src->source,src->parsedToken.charsOffset-1,
+                    src->parsedToken.charsOffset+src->parsedToken.charsLen, parseError);
+        DBG_FORMAT_ERROR
+        return NULL;
+    }
 
-  return src->current;
+    // The current token indicates the second code point of the range.
+    // Process just that, and then proceed with the star.
+    src->currentStarredCharIndex = src->parsedToken.charsOffset;
+    U16_NEXT(src->source, src->currentStarredCharIndex, 
+             (uint32_t)(src->end - src->source), src->lastRangeCp);
+    if (src->lastRangeCp <= src->previousCp) {
+        *status = U_INVALID_FORMAT_ERROR;
+        syntaxError(src->source,src->parsedToken.charsOffset-1,
+                    src->parsedToken.charsOffset+src->parsedToken.charsLen,parseError);
+        DBG_FORMAT_ERROR
+        return NULL;
+    }
+
+    // Set current range code point to process the range loop
+    src->currentRangeCp = src->previousCp + 1;
+
+    src->lastStarredCharIndex = src->parsedToken.charsOffset + src->parsedToken.charsLen - 1;
+
+    return ucol_tok_processNextCodePointInRange(src, status);
+ } else if (src->isStarred) {
+    // We define two indices m_currentStarredCharIndex_ and m_lastStarredCharIndex_ so that
+    // [m_currentStarredCharIndex_ .. m_lastStarredCharIndex_], both inclusive, need to be
+    // separated into several tokens and returned.
+    src->currentStarredCharIndex = src->parsedToken.charsOffset;
+    src->lastStarredCharIndex =  src->parsedToken.charsOffset + src->parsedToken.charsLen - 1;
+
+    return ucol_tok_processNextTokenInStarredList(src);
+  } else {
+    // Set previous codepoint
+    U16_GET(src->source, 0, src->parsedToken.charsOffset, (uint32_t)(src->end - src->source), src->previousCp);
+  }
+  return nextToken;
 }
 
+
 /*
 Processing Description
-  1 Build a ListList. Each list has a header, which contains two lists (positive
-  and negative), a reset token, a baseCE, nextCE, and previousCE. The lists and
-  reset may be null.
-  2 As you process, you keep a LAST pointer that points to the last token you
-  handled.
+1 Build a ListList. Each list has a header, which contains two lists (positive
+and negative), a reset token, a baseCE, nextCE, and previousCE. The lists and
+reset may be null.
+2 As you process, you keep a LAST pointer that points to the last token you
+handled.
+
 */
 
-static UColToken *ucol_tok_initAReset(UColTokenParser *src, UChar *expand, uint32_t *expandNext,
-                                      UParseError *parseError, UErrorCode *status) {
-  if(src->resultLen == src->listCapacity) {
-    // Unfortunately, this won't work, as we store addresses of lhs in token
-    src->listCapacity *= 2;
-    src->lh = (UColTokListHeader *)uprv_realloc(src->lh, src->listCapacity*sizeof(UColTokListHeader));
-    if(src->lh == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
+static UColToken *ucol_tok_initAReset(UColTokenParser *src, const UChar *expand, uint32_t *expandNext,
+                                      UParseError *parseError, UErrorCode *status)
+{
+    if(src->resultLen == src->listCapacity) {
+        // Unfortunately, this won't work, as we store addresses of lhs in token
+        src->listCapacity *= 2;
+        src->lh = (UColTokListHeader *)uprv_realloc(src->lh, src->listCapacity*sizeof(UColTokListHeader));
+        if(src->lh == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
     }
-  }
-  /* do the reset thing */
-  UColToken *sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
-  /* test for NULL */
-  if (sourceToken == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-  }
-  sourceToken->rulesToParse = src->source;
-  sourceToken->source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
-  sourceToken->expansion = src->parsedToken.extensionLen << 24 | src->parsedToken.extensionOffset;
+    /* do the reset thing */
+    UColToken *sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
+    /* test for NULL */
+    if (sourceToken == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    sourceToken->rulesToParseHdl = &(src->source);
+    sourceToken->source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
+    sourceToken->expansion = src->parsedToken.extensionLen << 24 | src->parsedToken.extensionOffset;
 
-  sourceToken->debugSource = *(src->source + src->parsedToken.charsOffset);
-  sourceToken->debugExpansion = *(src->source + src->parsedToken.extensionOffset);
+    sourceToken->debugSource = *(src->source + src->parsedToken.charsOffset);
+    sourceToken->debugExpansion = *(src->source + src->parsedToken.extensionOffset);
 
-  // keep the flags around so that we know about before
-  sourceToken->flags = src->parsedToken.flags;
+    // keep the flags around so that we know about before
+    sourceToken->flags = src->parsedToken.flags;
 
-  if(src->parsedToken.prefixOffset != 0) {
-    // this is a syntax error
-    *status = U_INVALID_FORMAT_ERROR;
-    syntaxError(src->source,src->parsedToken.charsOffset-1,src->parsedToken.charsOffset+src->parsedToken.charsLen,parseError);
-    return 0;
-  } else {
-    sourceToken->prefix = 0;
-  }
+    if(src->parsedToken.prefixOffset != 0) {
+        // this is a syntax error
+        *status = U_INVALID_FORMAT_ERROR;
+        syntaxError(src->source,src->parsedToken.charsOffset-1,src->parsedToken.charsOffset+src->parsedToken.charsLen,parseError);
+        DBG_FORMAT_ERROR
+        uprv_free(sourceToken);
+        return 0;
+    } else {
+        sourceToken->prefix = 0;
+    }
 
-  sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
-  sourceToken->strength = UCOL_TOK_RESET;
-  sourceToken->next = NULL;
-  sourceToken->previous = NULL;
-  sourceToken->noOfCEs = 0;
-  sourceToken->noOfExpCEs = 0;
-  sourceToken->listHeader = &src->lh[src->resultLen];
+    sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
+    sourceToken->strength = UCOL_TOK_RESET;
+    sourceToken->next = NULL;
+    sourceToken->previous = NULL;
+    sourceToken->noOfCEs = 0;
+    sourceToken->noOfExpCEs = 0;
+    sourceToken->listHeader = &src->lh[src->resultLen];
 
-  src->lh[src->resultLen].first = NULL;
-  src->lh[src->resultLen].last = NULL;
-  src->lh[src->resultLen].first = NULL;
-  src->lh[src->resultLen].last = NULL;
+    src->lh[src->resultLen].first = NULL;
+    src->lh[src->resultLen].last = NULL;
+    src->lh[src->resultLen].first = NULL;
+    src->lh[src->resultLen].last = NULL;
 
-  src->lh[src->resultLen].reset = sourceToken;
+    src->lh[src->resultLen].reset = sourceToken;
 
-  /*
+    /*
     3 Consider each item: relation, source, and expansion: e.g. ...< x / y ...
-      First convert all expansions into normal form. Examples:
-        If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *
-        d * ... into &x * c/y * d * ...
-        Note: reset values can never have expansions, although they can cause the
-        very next item to have one. They may be contractions, if they are found
-        earlier in the list.
-  */
-  if(expand != NULL) {
-    /* check to see if there is an expansion */
-    if(src->parsedToken.charsLen > 1) {
-      uint32_t resetCharsOffset;
-      resetCharsOffset = (uint32_t)(expand - src->source);
-      sourceToken->source = ((resetCharsOffset - src->parsedToken.charsOffset ) << 24) | src->parsedToken.charsOffset;
-      *expandNext = ((src->parsedToken.charsLen + src->parsedToken.charsOffset - resetCharsOffset)<<24) | (resetCharsOffset);
-    } else {
-      *expandNext = 0;
+    First convert all expansions into normal form. Examples:
+    If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *
+    d * ... into &x * c/y * d * ...
+    Note: reset values can never have expansions, although they can cause the
+    very next item to have one. They may be contractions, if they are found
+    earlier in the list.
+    */
+    *expandNext = 0;
+    if(expand != NULL) {
+        /* check to see if there is an expansion */
+        if(src->parsedToken.charsLen > 1) {
+            uint32_t resetCharsOffset;
+            resetCharsOffset = (uint32_t)(expand - src->source);
+            sourceToken->source = ((resetCharsOffset - src->parsedToken.charsOffset ) << 24) | src->parsedToken.charsOffset;
+            *expandNext = ((src->parsedToken.charsLen + src->parsedToken.charsOffset - resetCharsOffset)<<24) | (resetCharsOffset);
+        }
     }
-  }
 
-  src->resultLen++;
+    src->resultLen++;
 
-  uhash_put(src->tailored, sourceToken, sourceToken, status);
+    uhash_put(src->tailored, sourceToken, sourceToken, status);
 
-  return sourceToken;
+    return sourceToken;
 }
 
 static
 inline UColToken *getVirginBefore(UColTokenParser *src, UColToken *sourceToken, uint8_t strength, UParseError *parseError, UErrorCode *status) {
-  if(U_FAILURE(*status)) {
-    return NULL;
-  }
-      /* this is a virgin before - we need to fish the anchor from the UCA */
-  collIterate s;
-  uint32_t baseCE = UCOL_NOT_FOUND, baseContCE = UCOL_NOT_FOUND;
-  uint32_t CE, SecondCE;
-  uint32_t invPos;
-  if(sourceToken != NULL) {
-    uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s);
-  } else {
-    uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s);
-  }
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
+    /* this is a virgin before - we need to fish the anchor from the UCA */
+    collIterate s;
+    uint32_t baseCE = UCOL_NOT_FOUND, baseContCE = UCOL_NOT_FOUND;
+    uint32_t CE, SecondCE;
+    // uint32_t invPos;
+    if(sourceToken != NULL) {
+        uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s, status);
+    } else {
+        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s, status);
+    }
+    if(U_FAILURE(*status)) {
+        return NULL;
+    }
 
-  baseCE = ucol_getNextCE(src->UCA, &s, status) & 0xFFFFFF3F;
-  baseContCE = ucol_getNextCE(src->UCA, &s, status);
-  if(baseContCE == UCOL_NO_MORE_CES) {
-    baseContCE = 0;
-  }
+    baseCE = ucol_getNextCE(src->UCA, &s, status) & 0xFFFFFF3F;
+    baseContCE = ucol_getNextCE(src->UCA, &s, status);
+    if(baseContCE == UCOL_NO_MORE_CES) {
+        baseContCE = 0;
+    }
 
 
-  UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
-  uint32_t ch = 0;
-  uint32_t expandNext = 0;
-  UColToken key;
+    UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
+    uint32_t ch = 0;
+    uint32_t expandNext = 0;
+    UColToken key;
 
-  if((baseCE & 0xFF000000) >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
-      uint32_t primary = baseCE & UCOL_PRIMARYMASK | (baseContCE & UCOL_PRIMARYMASK) >> 16;
-      uint32_t raw = uprv_uca_getRawFromImplicit(primary);
-      ch = uprv_uca_getCodePointFromRaw(raw-1);
-      uint32_t primaryCE = uprv_uca_getImplicitFromRaw(raw-1);
-      CE = primaryCE & UCOL_PRIMARYMASK | 0x0505;
-      SecondCE = (primaryCE << 16) & UCOL_PRIMARYMASK | UCOL_CONTINUATION_MARKER;
+    if((baseCE & 0xFF000000) >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
+        uint32_t primary = (baseCE & UCOL_PRIMARYMASK) | ((baseContCE & UCOL_PRIMARYMASK) >> 16);
+        uint32_t raw = uprv_uca_getRawFromImplicit(primary);
+        ch = uprv_uca_getCodePointFromRaw(raw-1);
+        uint32_t primaryCE = uprv_uca_getImplicitFromRaw(raw-1);
+        CE = (primaryCE & UCOL_PRIMARYMASK) | 0x0505;
+        SecondCE = ((primaryCE << 16) & UCOL_PRIMARYMASK) | UCOL_CONTINUATION_MARKER;
 
-      src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
-      *src->extraCurrent++ = 0xFFFE;
-      *src->extraCurrent++ = (UChar)ch;
-      src->parsedToken.charsLen++;
+        src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source);
+        *src->extraCurrent++ = 0xFFFE;
+        *src->extraCurrent++ = (UChar)ch;
+        src->parsedToken.charsLen++;
 
-      key.source = (src->parsedToken.charsLen/**newCharsLen*/ << 24) | src->parsedToken.charsOffset/**charsOffset*/;
-      key.rulesToParse = src->source;
+        key.source = (src->parsedToken.charsLen/**newCharsLen*/ << 24) | src->parsedToken.charsOffset/**charsOffset*/;
+        key.rulesToParseHdl = &(src->source);
 
-      //sourceToken = (UColToken *)uhash_iget(src->tailored, (int32_t)key);
-      sourceToken = (UColToken *)uhash_get(src->tailored, &key);
+        //sourceToken = (UColToken *)uhash_iget(src->tailored, (int32_t)key);
+        sourceToken = (UColToken *)uhash_get(src->tailored, &key);
 
-      if(sourceToken == NULL) {
-          src->lh[src->resultLen].baseCE = CE & 0xFFFFFF3F;
-          if(isContinuation(SecondCE)) {
-            src->lh[src->resultLen].baseContCE = SecondCE;
-          } else {
-            src->lh[src->resultLen].baseContCE = 0;
-          }
-          src->lh[src->resultLen].nextCE = 0;
-          src->lh[src->resultLen].nextContCE = 0;
-          src->lh[src->resultLen].previousCE = 0;
-          src->lh[src->resultLen].previousContCE = 0;
+        if(sourceToken == NULL) {
+            src->lh[src->resultLen].baseCE = CE & 0xFFFFFF3F;
+            if(isContinuation(SecondCE)) {
+                src->lh[src->resultLen].baseContCE = SecondCE;
+            } else {
+                src->lh[src->resultLen].baseContCE = 0;
+            }
+            src->lh[src->resultLen].nextCE = 0;
+            src->lh[src->resultLen].nextContCE = 0;
+            src->lh[src->resultLen].previousCE = 0;
+            src->lh[src->resultLen].previousContCE = 0;
 
-          src->lh[src->resultLen].indirect = FALSE;
+            src->lh[src->resultLen].indirect = FALSE;
 
-          sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
-      }
+            sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
+        }
 
-  } else {
-      invPos = ucol_inv_getPrevCE(src, baseCE, baseContCE, &CE, &SecondCE, strength);
-
-      // we got the previous CE. Now we need to see if the difference between
-      // the two CEs is really of the requested strength.
-      // if it's a bigger difference (we asked for secondary and got primary), we
-      // need to modify the CE.
-      if(ucol_getCEStrengthDifference(baseCE, baseContCE, CE, SecondCE) < strength) {
-          // adjust the strength
-          // now we are in the situation where our baseCE should actually be modified in
-          // order to get the CE in the right position.
-          if(strength == UCOL_SECONDARY) {
-              CE = baseCE - 0x0200;
-          } else { // strength == UCOL_TERTIARY
-              CE = baseCE - 0x02;
-          }
-          if(baseContCE) {
+    } else {
+        /* invPos = */ ucol_inv_getPrevCE(src, baseCE, baseContCE, &CE, &SecondCE, strength);
+
+        // we got the previous CE. Now we need to see if the difference between
+        // the two CEs is really of the requested strength.
+        // if it's a bigger difference (we asked for secondary and got primary), we
+        // need to modify the CE.
+        if(ucol_getCEStrengthDifference(baseCE, baseContCE, CE, SecondCE) < strength) {
+            // adjust the strength
+            // now we are in the situation where our baseCE should actually be modified in
+            // order to get the CE in the right position.
             if(strength == UCOL_SECONDARY) {
-                SecondCE = baseContCE - 0x0200;
+                CE = baseCE - 0x0200;
             } else { // strength == UCOL_TERTIARY
-                SecondCE = baseContCE - 0x02;
+                CE = baseCE - 0x02;
+            }
+            if(baseContCE) {
+                if(strength == UCOL_SECONDARY) {
+                    SecondCE = baseContCE - 0x0200;
+                } else { // strength == UCOL_TERTIARY
+                    SecondCE = baseContCE - 0x02;
+                }
             }
-          }
-      }
+        }
 
 #if 0
-      // the code below relies on getting a code point from the inverse table, in order to be
-      // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
-      // 1. There are many code points that have the same CE
-      // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
-      // Also, in case when there is no equivalent strength before an element, we have to actually
-      // construct one. For example, &[before 2]a << x won't result in x << a, because the element
-      // before a is a primary difference.
+        // the code below relies on getting a code point from the inverse table, in order to be
+        // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
+        // 1. There are many code points that have the same CE
+        // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
+        // Also, in case when there is no equivalent strength before an element, we have to actually
+        // construct one. For example, &[before 2]a << x won't result in x << a, because the element
+        // before a is a primary difference.
 
-      //uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
+        //uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
 
 
-      ch = CETable[3*invPos+2];
+        ch = CETable[3*invPos+2];
 
-      if((ch &  UCOL_INV_SIZEMASK) != 0) {
-        uint16_t *conts = (uint16_t *)((uint8_t *)src->invUCA+src->invUCA->conts);
-        uint32_t offset = (ch & UCOL_INV_OFFSETMASK);
-        ch = conts[offset];
-      }
+        if((ch &  UCOL_INV_SIZEMASK) != 0) {
+            uint16_t *conts = (uint16_t *)((uint8_t *)src->invUCA+src->invUCA->conts);
+            uint32_t offset = (ch & UCOL_INV_OFFSETMASK);
+            ch = conts[offset];
+        }
 
-      *src->extraCurrent++ = (UChar)ch;
-      src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source - 1);
-      src->parsedToken.charsLen = 1;
+        *src->extraCurrent++ = (UChar)ch;
+        src->parsedToken.charsOffset = (uint32_t)(src->extraCurrent - src->source - 1);
+        src->parsedToken.charsLen = 1;
 
-      // We got an UCA before. However, this might have been tailored.
-      // example:
-      // &\u30ca = \u306a
-      // &[before 3]\u306a<<<\u306a|\u309d
+        // We got an UCA before. However, this might have been tailored.
+        // example:
+        // &\u30ca = \u306a
+        // &[before 3]\u306a<<<\u306a|\u309d
 
 
-      // uint32_t key = (*newCharsLen << 24) | *charsOffset;
-      key.source = (src->parsedToken.charsLen/**newCharsLen*/ << 24) | src->parsedToken.charsOffset/**charsOffset*/;
-      key.rulesToParse = src->source;
+        // uint32_t key = (*newCharsLen << 24) | *charsOffset;
+        key.source = (src->parsedToken.charsLen/**newCharsLen*/ << 24) | src->parsedToken.charsOffset/**charsOffset*/;
+        key.rulesToParseHdl = &(src->source);
 
-      //sourceToken = (UColToken *)uhash_iget(src->tailored, (int32_t)key);
-      sourceToken = (UColToken *)uhash_get(src->tailored, &key);
+        //sourceToken = (UColToken *)uhash_iget(src->tailored, (int32_t)key);
+        sourceToken = (UColToken *)uhash_get(src->tailored, &key);
 #endif
 
-      // here is how it should be. The situation such as &[before 1]a < x, should be
-      // resolved exactly as if we wrote &a > x.
-      // therefore, I don't really care if the UCA value before a has been changed.
-      // However, I do care if the strength between my element and the previous element
-      // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll
-      // have to construct the base CE.
+        // here is how it should be. The situation such as &[before 1]a < x, should be
+        // resolved exactly as if we wrote &a > x.
+        // therefore, I don't really care if the UCA value before a has been changed.
+        // However, I do care if the strength between my element and the previous element
+        // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll
+        // have to construct the base CE.
 
 
 
-      // if we found a tailored thing, we have to use the UCA value and construct
-      // a new reset token with constructed name
-      //if(sourceToken != NULL && sourceToken->strength != UCOL_TOK_RESET) {
+        // if we found a tailored thing, we have to use the UCA value and construct
+        // a new reset token with constructed name
+        //if(sourceToken != NULL && sourceToken->strength != UCOL_TOK_RESET) {
         // character to which we want to anchor is already tailored.
         // We need to construct a new token which will be the anchor
         // point
@@ -1336,9 +1692,9 @@ inline UColToken *getVirginBefore(UColTokenParser *src, UColToken *sourceToken,
         src->parsedToken.charsLen += 10;
         src->lh[src->resultLen].baseCE = CE & 0xFFFFFF3F;
         if(isContinuation(SecondCE)) {
-          src->lh[src->resultLen].baseContCE = SecondCE;
+            src->lh[src->resultLen].baseContCE = SecondCE;
         } else {
-          src->lh[src->resultLen].baseContCE = 0;
+            src->lh[src->resultLen].baseContCE = 0;
         }
         src->lh[src->resultLen].nextCE = 0;
         src->lh[src->resultLen].nextContCE = 0;
@@ -1348,561 +1704,743 @@ inline UColToken *getVirginBefore(UColTokenParser *src, UColToken *sourceToken,
         src->lh[src->resultLen].indirect = FALSE;
 
         sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
-      //}
-  }
+        //}
+    }
 
-  return sourceToken;
+    return sourceToken;
 
 }
 
 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseError, UErrorCode *status) {
-  UColToken *lastToken = NULL;
-  const UChar *parseEnd = NULL;
-  uint32_t expandNext = 0;
-  UBool variableTop = FALSE;
-  UBool top = FALSE;
-  uint16_t specs = 0;
-  UColTokListHeader *ListList = NULL;
+    UColToken *lastToken = NULL;
+    const UChar *parseEnd = NULL;
+    uint32_t expandNext = 0;
+    UBool variableTop = FALSE;
+    UBool top = FALSE;
+    uint16_t specs = 0;
+    UColTokListHeader *ListList = NULL;
 
-  src->parsedToken.strength = UCOL_TOK_UNSET;
+    src->parsedToken.strength = UCOL_TOK_UNSET;
 
-  ListList = src->lh;
+    ListList = src->lh;
 
-  if(U_FAILURE(*status)) {
-      return 0;
-  }
+    if(U_FAILURE(*status)) {
+        return 0;
+    }
+#ifdef DEBUG_FOR_CODE_POINTS
+    char filename[35];
+    sprintf(filename, "/tmp/debug_for_cp_%09d.txt", getpid());
+    dfcp_fp = fopen(filename, "a");
+    fprintf(stdout, "Output is in the file %s.\n", filename);
+#endif
 
-  while(src->current < src->end) {
-    src->parsedToken.prefixOffset = 0;
+#ifdef DEBUG_FOR_COLL_RULES
+    std::string s3;
+    UnicodeString(src->source).toUTF8String(s3);
+    std::cout << "src->source = " << s3 << std::endl;
+#endif
 
-    parseEnd = ucol_tok_parseNextToken(src,
-                        (UBool)(lastToken == NULL),
-                        parseError,
-                        status);
+    while(src->current < src->end || src->isStarred) {
+        src->parsedToken.prefixOffset = 0;
 
-    specs = src->parsedToken.flags;
+        parseEnd = ucol_tok_parseNextToken(src,
+            (UBool)(lastToken == NULL),
+            parseError,
+            status);
 
+        specs = src->parsedToken.flags;
 
-    variableTop = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
-    top = ((specs & UCOL_TOK_TOP) != 0);
 
-    if(U_SUCCESS(*status) && parseEnd != NULL) {
-      UColToken *sourceToken = NULL;
-      //uint32_t key = 0;
-      uint32_t lastStrength = UCOL_TOK_UNSET;
+        variableTop = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
+        top = ((specs & UCOL_TOK_TOP) != 0);
 
-      if(lastToken != NULL ) {
-        lastStrength = lastToken->strength;
-      }
+        if(U_SUCCESS(*status) && parseEnd != NULL) {
+            UColToken *sourceToken = NULL;
+            //uint32_t key = 0;
+            uint32_t lastStrength = UCOL_TOK_UNSET;
 
-      //key = newCharsLen << 24 | charsOffset;
-      UColToken key;
-      key.source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
-      key.rulesToParse = src->source;
+            if(lastToken != NULL ) {
+                lastStrength = lastToken->strength;
+            }
 
-      /*  4 Lookup each source in the CharsToToken map, and find a sourceToken */
-      sourceToken = (UColToken *)uhash_get(src->tailored, &key);
+#ifdef DEBUG_FOR_CODE_POINTS
+            UChar32 cp;
+            U16_GET(src->source, 0, src->parsedToken.charsOffset, (uint32_t)(src->extraEnd - src->source), cp);
+            fprintf(dfcp_fp, "Code point = %x, Strength = %x\n", cp, src->parsedToken.strength);
+#endif
+            //key = newCharsLen << 24 | charsOffset;
+            UColToken key;
+            key.source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
+            key.rulesToParseHdl = &(src->source);
 
-      if(src->parsedToken.strength != UCOL_TOK_RESET) {
-        if(lastToken == NULL) { /* this means that rules haven't started properly */
-          *status = U_INVALID_FORMAT_ERROR;
-          syntaxError(src->source,0,(int32_t)(src->end-src->source),parseError);
-          return 0;
-        }
-      /*  6 Otherwise (when relation != reset) */
-        if(sourceToken == NULL) {
-          /* If sourceToken is null, create new one, */
-          sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
-          /* test for NULL */
-          if (sourceToken == NULL) {
-              *status = U_MEMORY_ALLOCATION_ERROR;
-              return 0;
-          }
-          sourceToken->rulesToParse = src->source;
-          sourceToken->source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
-
-          sourceToken->debugSource = *(src->source + src->parsedToken.charsOffset);
-
-          sourceToken->prefix = src->parsedToken.prefixLen << 24 | src->parsedToken.prefixOffset;
-          sourceToken->debugPrefix = *(src->source + src->parsedToken.prefixOffset);
-
-          sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
-          sourceToken->next = NULL;
-          sourceToken->previous = NULL;
-          sourceToken->noOfCEs = 0;
-          sourceToken->noOfExpCEs = 0;
-          // keep the flags around so that we know about before
-          sourceToken->flags = src->parsedToken.flags;
-          uhash_put(src->tailored, sourceToken, sourceToken, status);
-        } else {
-          /* we could have fished out a reset here */
-          if(sourceToken->strength != UCOL_TOK_RESET && lastToken != sourceToken) {
-            /* otherwise remove sourceToken from where it was. */
-            if(sourceToken->next != NULL) {
-              if(sourceToken->next->strength > sourceToken->strength) {
-                sourceToken->next->strength = sourceToken->strength;
-              }
-              sourceToken->next->previous = sourceToken->previous;
-            } else {
-              sourceToken->listHeader->last = sourceToken->previous;
-            }
+            /*  4 Lookup each source in the CharsToToken map, and find a sourceToken */
+            sourceToken = (UColToken *)uhash_get(src->tailored, &key);
 
-            if(sourceToken->previous != NULL) {
-              sourceToken->previous->next = sourceToken->next;
-            } else {
-              sourceToken->listHeader->first = sourceToken->next;
-            }
-            sourceToken->next = NULL;
-            sourceToken->previous = NULL;
-          }
-        }
+            if(src->parsedToken.strength != UCOL_TOK_RESET) {
+                if(lastToken == NULL) { /* this means that rules haven't started properly */
+                    *status = U_INVALID_FORMAT_ERROR;
+                    syntaxError(src->source,0,(int32_t)(src->end-src->source),parseError);
+                    DBG_FORMAT_ERROR
+                    return 0;
+                }
+                /*  6 Otherwise (when relation != reset) */
+                if(sourceToken == NULL) {
+                    /* If sourceToken is null, create new one, */
+                    sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
+                    /* test for NULL */
+                    if (sourceToken == NULL) {
+                        *status = U_MEMORY_ALLOCATION_ERROR;
+                        return 0;
+                    }
+                    sourceToken->rulesToParseHdl = &(src->source);
+                    sourceToken->source = src->parsedToken.charsLen << 24 | src->parsedToken.charsOffset;
+
+                    sourceToken->debugSource = *(src->source + src->parsedToken.charsOffset);
+
+                    sourceToken->prefix = src->parsedToken.prefixLen << 24 | src->parsedToken.prefixOffset;
+                    sourceToken->debugPrefix = *(src->source + src->parsedToken.prefixOffset);
+
+                    sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
+                    sourceToken->next = NULL;
+                    sourceToken->previous = NULL;
+                    sourceToken->noOfCEs = 0;
+                    sourceToken->noOfExpCEs = 0;
+                    // keep the flags around so that we know about before
+                    sourceToken->flags = src->parsedToken.flags;
+                    uhash_put(src->tailored, sourceToken, sourceToken, status);
+                    if(U_FAILURE(*status)) {
+                        return 0;
+                    }
+                } else {
+                    /* we could have fished out a reset here */
+                    if(sourceToken->strength != UCOL_TOK_RESET && lastToken != sourceToken) {
+                        /* otherwise remove sourceToken from where it was. */
+                        if(sourceToken->next != NULL) {
+                            if(sourceToken->next->strength > sourceToken->strength) {
+                                sourceToken->next->strength = sourceToken->strength;
+                            }
+                            sourceToken->next->previous = sourceToken->previous;
+                        } else {
+                            sourceToken->listHeader->last = sourceToken->previous;
+                        }
+
+                        if(sourceToken->previous != NULL) {
+                            sourceToken->previous->next = sourceToken->next;
+                        } else {
+                            sourceToken->listHeader->first = sourceToken->next;
+                        }
+                        sourceToken->next = NULL;
+                        sourceToken->previous = NULL;
+                    }
+                }
 
-        sourceToken->strength = src->parsedToken.strength;
-        sourceToken->listHeader = lastToken->listHeader;
-
-        /*
-        1.  Find the strongest strength in each list, and set strongestP and strongestN
-        accordingly in the headers.
-        */
-        if(lastStrength == UCOL_TOK_RESET
-          || sourceToken->listHeader->first == 0) {
-        /* If LAST is a reset
-              insert sourceToken in the list. */
-          if(sourceToken->listHeader->first == 0) {
-            sourceToken->listHeader->first = sourceToken;
-            sourceToken->listHeader->last = sourceToken;
-          } else { /* we need to find a place for us */
-            /* and we'll get in front of the same strength */
-            if(sourceToken->listHeader->first->strength <= sourceToken->strength) {
-              sourceToken->next = sourceToken->listHeader->first;
-              sourceToken->next->previous = sourceToken;
-              sourceToken->listHeader->first = sourceToken;
-              sourceToken->previous = NULL;
-            } else {
-              lastToken = sourceToken->listHeader->first;
-              while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {
-                lastToken = lastToken->next;
-              }
-              if(lastToken->next != NULL) {
-                lastToken->next->previous = sourceToken;
-              } else {
-                sourceToken->listHeader->last = sourceToken;
-              }
-              sourceToken->previous = lastToken;
-              sourceToken->next = lastToken->next;
-              lastToken->next = sourceToken;
-            }
-          }
-        } else {
-        /* Otherwise (when LAST is not a reset)
-              if polarity (LAST) == polarity(relation), insert sourceToken after LAST,
-              otherwise insert before.
-              when inserting after or before, search to the next position with the same
-              strength in that direction. (This is called postpone insertion).         */
-          if(sourceToken != lastToken) {
-            if(lastToken->polarity == sourceToken->polarity) {
-              while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {
-                lastToken = lastToken->next;
-              }
-              sourceToken->previous = lastToken;
-              if(lastToken->next != NULL) {
-                lastToken->next->previous = sourceToken;
-              } else {
-                sourceToken->listHeader->last = sourceToken;
-              }
-
-              sourceToken->next = lastToken->next;
-              lastToken->next = sourceToken;
+                sourceToken->strength = src->parsedToken.strength;
+                sourceToken->listHeader = lastToken->listHeader;
+
+                /*
+                1.  Find the strongest strength in each list, and set strongestP and strongestN
+                accordingly in the headers.
+                */
+                if(lastStrength == UCOL_TOK_RESET
+                    || sourceToken->listHeader->first == 0) {
+                        /* If LAST is a reset
+                        insert sourceToken in the list. */
+                        if(sourceToken->listHeader->first == 0) {
+                            sourceToken->listHeader->first = sourceToken;
+                            sourceToken->listHeader->last = sourceToken;
+                        } else { /* we need to find a place for us */
+                            /* and we'll get in front of the same strength */
+                            if(sourceToken->listHeader->first->strength <= sourceToken->strength) {
+                                sourceToken->next = sourceToken->listHeader->first;
+                                sourceToken->next->previous = sourceToken;
+                                sourceToken->listHeader->first = sourceToken;
+                                sourceToken->previous = NULL;
+                            } else {
+                                lastToken = sourceToken->listHeader->first;
+                                while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {
+                                    lastToken = lastToken->next;
+                                }
+                                if(lastToken->next != NULL) {
+                                    lastToken->next->previous = sourceToken;
+                                } else {
+                                    sourceToken->listHeader->last = sourceToken;
+                                }
+                                sourceToken->previous = lastToken;
+                                sourceToken->next = lastToken->next;
+                                lastToken->next = sourceToken;
+                            }
+                        }
+                    } else {
+                        /* Otherwise (when LAST is not a reset)
+                        if polarity (LAST) == polarity(relation), insert sourceToken after LAST,
+                        otherwise insert before.
+                        when inserting after or before, search to the next position with the same
+                        strength in that direction. (This is called postpone insertion).         */
+                        if(sourceToken != lastToken) {
+                            if(lastToken->polarity == sourceToken->polarity) {
+                                while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {
+                                    lastToken = lastToken->next;
+                                }
+                                sourceToken->previous = lastToken;
+                                if(lastToken->next != NULL) {
+                                    lastToken->next->previous = sourceToken;
+                                } else {
+                                    sourceToken->listHeader->last = sourceToken;
+                                }
+
+                                sourceToken->next = lastToken->next;
+                                lastToken->next = sourceToken;
+                            } else {
+                                while(lastToken->previous != NULL && lastToken->previous->strength > sourceToken->strength) {
+                                    lastToken = lastToken->previous;
+                                }
+                                sourceToken->next = lastToken;
+                                if(lastToken->previous != NULL) {
+                                    lastToken->previous->next = sourceToken;
+                                } else {
+                                    sourceToken->listHeader->first = sourceToken;
+                                }
+                                sourceToken->previous = lastToken->previous;
+                                lastToken->previous = sourceToken;
+                            }
+                        } else { /* repeated one thing twice in rules, stay with the stronger strength */
+                            if(lastStrength < sourceToken->strength) {
+                                sourceToken->strength = lastStrength;
+                            }
+                        }
+                    }
+
+                    /* if the token was a variable top, we're gonna put it in */
+                    if(variableTop == TRUE && src->varTop == NULL) {
+                        variableTop = FALSE;
+                        src->varTop = sourceToken;
+                    }
+
+                    // Treat the expansions.
+                    // There are two types of expansions: explicit (x / y) and reset based propagating expansions
+                    // (&abc * d * e <=> &ab * d / c * e / c)
+                    // if both of them are in effect for a token, they are combined.
+
+                    sourceToken->expansion = src->parsedToken.extensionLen << 24 | src->parsedToken.extensionOffset;
+
+                    if(expandNext != 0) {
+                        if(sourceToken->strength == UCOL_PRIMARY) { /* primary strength kills off the implicit expansion */
+                            expandNext = 0;
+                        } else if(sourceToken->expansion == 0) { /* if there is no expansion, implicit is just added to the token */
+                            sourceToken->expansion = expandNext;
+                        } else { /* there is both explicit and implicit expansion. We need to make a combination */
+                            uprv_memcpy(src->extraCurrent, src->source + (expandNext & 0xFFFFFF), (expandNext >> 24)*sizeof(UChar));
+                            uprv_memcpy(src->extraCurrent+(expandNext >> 24), src->source + src->parsedToken.extensionOffset, src->parsedToken.extensionLen*sizeof(UChar));
+                            sourceToken->expansion = (uint32_t)(((expandNext >> 24) + src->parsedToken.extensionLen)<<24 | (uint32_t)(src->extraCurrent - src->source));
+                            src->extraCurrent += (expandNext >> 24) + src->parsedToken.extensionLen;
+                        }
+                    }
+
+                    // This is just for debugging purposes
+                    if(sourceToken->expansion != 0) {
+                        sourceToken->debugExpansion = *(src->source + src->parsedToken.extensionOffset);
+                    } else {
+                        sourceToken->debugExpansion = 0;
+                    }
+                    // if the previous token was a reset before, the strength of this
+                    // token must match the strength of before. Otherwise we have an
+                    // undefined situation.
+                    // In other words, we currently have a cludge which we use to
+                    // represent &a >> x. This is written as &[before 2]a << x.
+                    if((lastToken->flags & UCOL_TOK_BEFORE) != 0) {
+                        uint8_t beforeStrength = (lastToken->flags & UCOL_TOK_BEFORE) - 1;
+                        if(beforeStrength != sourceToken->strength) {
+                            *status = U_INVALID_FORMAT_ERROR;
+                            syntaxError(src->source,0,(int32_t)(src->end-src->source),parseError);
+                            DBG_FORMAT_ERROR
+                            return 0;
+                        }
+                    }
             } else {
-              while(lastToken->previous != NULL && lastToken->previous->strength > sourceToken->strength) {
-                lastToken = lastToken->previous;
-              }
-              sourceToken->next = lastToken;
-              if(lastToken->previous != NULL) {
-                lastToken->previous->next = sourceToken;
-              } else {
-                sourceToken->listHeader->first = sourceToken;
-              }
-              sourceToken->previous = lastToken->previous;
-              lastToken->previous = sourceToken;
-            }
-          } else { /* repeated one thing twice in rules, stay with the stronger strength */
-            if(lastStrength < sourceToken->strength) {
-              sourceToken->strength = lastStrength;
-            }
-          }
-        }
+                if(lastToken != NULL && lastStrength == UCOL_TOK_RESET) {
+                    /* if the previous token was also a reset, */
+                    /*this means that we have two consecutive resets */
+                    /* and we want to remove the previous one if empty*/
+                    if(src->resultLen > 0 && ListList[src->resultLen-1].first == NULL) {
+                        src->resultLen--;
+                    }
+                }
 
-        /* if the token was a variable top, we're gonna put it in */
-        if(variableTop == TRUE && src->varTop == NULL) {
-          variableTop = FALSE;
-          src->varTop = sourceToken;
-        }
+                if(sourceToken == NULL) { /* this is a reset, but it might still be somewhere in the tailoring, in shorter form */
+                    uint32_t searchCharsLen = src->parsedToken.charsLen;
+                    while(searchCharsLen > 1 && sourceToken == NULL) {
+                        searchCharsLen--;
+                        //key = searchCharsLen << 24 | charsOffset;
+                        UColToken key;
+                        key.source = searchCharsLen << 24 | src->parsedToken.charsOffset;
+                        key.rulesToParseHdl = &(src->source);
+                        sourceToken = (UColToken *)uhash_get(src->tailored, &key);
+                    }
+                    if(sourceToken != NULL) {
+                        expandNext = (src->parsedToken.charsLen - searchCharsLen) << 24 | (src->parsedToken.charsOffset + searchCharsLen);
+                    }
+                }
 
-       // Treat the expansions.
-       // There are two types of expansions: explicit (x / y) and reset based propagating expansions
-       // (&abc * d * e <=> &ab * d / c * e / c)
-       // if both of them are in effect for a token, they are combined.
-
-        sourceToken->expansion = src->parsedToken.extensionLen << 24 | src->parsedToken.extensionOffset;
-
-        if(expandNext != 0) {
-          if(sourceToken->strength == UCOL_PRIMARY) { /* primary strength kills off the implicit expansion */
-            expandNext = 0;
-          } else if(sourceToken->expansion == 0) { /* if there is no expansion, implicit is just added to the token */
-            sourceToken->expansion = expandNext;
-          } else { /* there is both explicit and implicit expansion. We need to make a combination */
-            uprv_memcpy(src->extraCurrent, src->source + (expandNext & 0xFFFFFF), (expandNext >> 24)*sizeof(UChar));
-            uprv_memcpy(src->extraCurrent+(expandNext >> 24), src->source + src->parsedToken.extensionOffset, src->parsedToken.extensionLen*sizeof(UChar));
-            sourceToken->expansion = (uint32_t)(((expandNext >> 24) + src->parsedToken.extensionLen)<<24 | (src->extraCurrent - src->source));
-            src->extraCurrent += (expandNext >> 24) + src->parsedToken.extensionLen;
-          }
-        }
+                if((specs & UCOL_TOK_BEFORE) != 0) { /* we're doing before */
+                    if(top == FALSE) { /* there is no indirection */
+                        uint8_t strength = (specs & UCOL_TOK_BEFORE) - 1;
+                        if(sourceToken != NULL && sourceToken->strength != UCOL_TOK_RESET) {
+                            /* this is a before that is already ordered in the UCA - so we need to get the previous with good strength */
+                            while(sourceToken->strength > strength && sourceToken->previous != NULL) {
+                                sourceToken = sourceToken->previous;
+                            }
+                            /* here, either we hit the strength or NULL */
+                            if(sourceToken->strength == strength) {
+                                if(sourceToken->previous != NULL) {
+                                    sourceToken = sourceToken->previous;
+                                } else { /* start of list */
+                                    sourceToken = sourceToken->listHeader->reset;
+                                }
+                            } else { /* we hit NULL */
+                                /* we should be doing the else part */
+                                sourceToken = sourceToken->listHeader->reset;
+                                sourceToken = getVirginBefore(src, sourceToken, strength, parseError, status);
+                            }
+                        } else {
+                            sourceToken = getVirginBefore(src, sourceToken, strength, parseError, status);
+                        }
+                    } else { /* this is both before and indirection */
+                        top = FALSE;
+                        ListList[src->resultLen].previousCE = 0;
+                        ListList[src->resultLen].previousContCE = 0;
+                        ListList[src->resultLen].indirect = TRUE;
+                        /* we need to do slightly more work. we need to get the baseCE using the */
+                        /* inverse UCA & getPrevious. The next bound is not set, and will be decided */
+                        /* in ucol_bld */
+                        uint8_t strength = (specs & UCOL_TOK_BEFORE) - 1;
+                        uint32_t baseCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE;
+                        uint32_t baseContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE;//&0xFFFFFF3F;
+                        uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
+
+                        UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
+                        if((baseCE & 0xFF000000) >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && 
+                           (baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
+                            uint32_t primary = (baseCE & UCOL_PRIMARYMASK) | ((baseContCE & UCOL_PRIMARYMASK) >> 16);
+                            uint32_t raw = uprv_uca_getRawFromImplicit(primary);
+                            uint32_t primaryCE = uprv_uca_getImplicitFromRaw(raw-1);
+                            CE = (primaryCE & UCOL_PRIMARYMASK) | 0x0505;
+                            SecondCE = ((primaryCE << 16) & UCOL_PRIMARYMASK) | UCOL_CONTINUATION_MARKER;
+                        } else {
+                            /*int32_t invPos = ucol_inv_getPrevCE(baseCE, baseContCE, &CE, &SecondCE, strength);*/
+                            ucol_inv_getPrevCE(src, baseCE, baseContCE, &CE, &SecondCE, strength);
+                        }
+
+                        ListList[src->resultLen].baseCE = CE;
+                        ListList[src->resultLen].baseContCE = SecondCE;
+                        ListList[src->resultLen].nextCE = 0;
+                        ListList[src->resultLen].nextContCE = 0;
+
+                        sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
+                    }
+                }
+
+
+                /*  5 If the relation is a reset:
+                If sourceToken is null
+                Create new list, create new sourceToken, make the baseCE from source, put
+                the sourceToken in ListHeader of the new list */
+                if(sourceToken == NULL) {
+                    /*
+                    3 Consider each item: relation, source, and expansion: e.g. ...< x / y ...
+                    First convert all expansions into normal form. Examples:
+                    If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *
+                    d * ... into &x * c/y * d * ...
+                    Note: reset values can never have expansions, although they can cause the
+                    very next item to have one. They may be contractions, if they are found
+                    earlier in the list.
+                    */
+                    if(top == FALSE) {
+                        collIterate s;
+                        uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
+
+                        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s, status);
+
+                        CE = ucol_getNextCE(src->UCA, &s, status);
+                        const UChar *expand = s.pos;
+                        SecondCE = ucol_getNextCE(src->UCA, &s, status);
+
+                        ListList[src->resultLen].baseCE = CE & 0xFFFFFF3F;
+                        if(isContinuation(SecondCE)) {
+                            ListList[src->resultLen].baseContCE = SecondCE;
+                        } else {
+                            ListList[src->resultLen].baseContCE = 0;
+                        }
+                        ListList[src->resultLen].nextCE = 0;
+                        ListList[src->resultLen].nextContCE = 0;
+                        ListList[src->resultLen].previousCE = 0;
+                        ListList[src->resultLen].previousContCE = 0;
+                        ListList[src->resultLen].indirect = FALSE;
+                        sourceToken = ucol_tok_initAReset(src, expand, &expandNext, parseError, status);
+                    } else { /* top == TRUE */
+                        /* just use the supplied values */
+                        top = FALSE;
+                        ListList[src->resultLen].previousCE = 0;
+                        ListList[src->resultLen].previousContCE = 0;
+                        ListList[src->resultLen].indirect = TRUE;
+                        ListList[src->resultLen].baseCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE;
+                        ListList[src->resultLen].baseContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE;
+                        ListList[src->resultLen].nextCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].limitCE;
+                        ListList[src->resultLen].nextContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].limitContCE;
+
+                        sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
 
-        // This is just for debugging purposes
-        if(sourceToken->expansion != 0) {
-          sourceToken->debugExpansion = *(src->source + src->parsedToken.extensionOffset);
+                    }
+                } else { /* reset to something already in rules */
+                    top = FALSE;
+                }
+            }
+            /*  7 After all this, set LAST to point to sourceToken, and goto step 3. */
+            lastToken = sourceToken;
         } else {
-          sourceToken->debugExpansion = 0;
-        }
-        // if the previous token was a reset before, the strength of this
-        // token must match the strength of before. Otherwise we have an
-        // undefined situation.
-        // In other words, we currently have a cludge which we use to
-        // represent &a >> x. This is written as &[before 2]a << x.
-        if((lastToken->flags & UCOL_TOK_BEFORE) != 0) {
-            uint8_t beforeStrength = (lastToken->flags & UCOL_TOK_BEFORE) - 1;
-            if(beforeStrength != sourceToken->strength) {
-                *status = U_INVALID_FORMAT_ERROR;
-                syntaxError(src->source,0,(int32_t)(src->end-src->source),parseError);
+            if(U_FAILURE(*status)) {
                 return 0;
             }
         }
-      } else {
-        if(lastToken != NULL && lastStrength == UCOL_TOK_RESET) {
-          /* if the previous token was also a reset, */
-          /*this means that we have two consecutive resets */
-          /* and we want to remove the previous one if empty*/
-          if(ListList[src->resultLen-1].first == NULL) {
-            src->resultLen--;
-          }
-        }
+    }
+#ifdef DEBUG_FOR_CODE_POINTS
+    fclose(dfcp_fp);
+#endif
 
-        if(sourceToken == NULL) { /* this is a reset, but it might still be somewhere in the tailoring, in shorter form */
-          uint32_t searchCharsLen = src->parsedToken.charsLen;
-          while(searchCharsLen > 1 && sourceToken == NULL) {
-            searchCharsLen--;
-            //key = searchCharsLen << 24 | charsOffset;
-            UColToken key;
-            key.source = searchCharsLen << 24 | src->parsedToken.charsOffset;
-            key.rulesToParse = src->source;
-            sourceToken = (UColToken *)uhash_get(src->tailored, &key);
-          }
-          if(sourceToken != NULL) {
-            expandNext = (src->parsedToken.charsLen - searchCharsLen) << 24 | (src->parsedToken.charsOffset + searchCharsLen);
-          }
-        }
 
-        if((specs & UCOL_TOK_BEFORE) != 0) { /* we're doing before */
-          if(top == FALSE) { /* there is no indirection */
-            uint8_t strength = (specs & UCOL_TOK_BEFORE) - 1;
-            if(sourceToken != NULL && sourceToken->strength != UCOL_TOK_RESET) {
-              /* this is a before that is already ordered in the UCA - so we need to get the previous with good strength */
-              while(sourceToken->strength > strength && sourceToken->previous != NULL) {
-                sourceToken = sourceToken->previous;
-              }
-              /* here, either we hit the strength or NULL */
-              if(sourceToken->strength == strength) {
-                if(sourceToken->previous != NULL) {
-                  sourceToken = sourceToken->previous;
-                } else { /* start of list */
-                  sourceToken = sourceToken->listHeader->reset;
+    if(src->resultLen > 0 && ListList[src->resultLen-1].first == NULL) {
+        src->resultLen--;
+    }
+    return src->resultLen;
+}
+
+const UChar* ucol_tok_getRulesFromBundle(
+    void* /*context*/,
+    const char* locale,
+    const char* type,
+    int32_t* pLength,
+    UErrorCode* status)
+{
+    const UChar* rules = NULL;
+    UResourceBundle* bundle;
+    UResourceBundle* collations;
+    UResourceBundle* collation;
+
+    *pLength = 0;
+
+    bundle = ures_open(U_ICUDATA_COLL, locale, status);
+    if(U_SUCCESS(*status)){
+        collations = ures_getByKey(bundle, "collations", NULL, status);
+        if(U_SUCCESS(*status)){
+            collation = ures_getByKey(collations, type, NULL, status);
+            if(U_SUCCESS(*status)){
+                rules = ures_getStringByKey(collation, "Sequence", pLength, status);
+                if(U_FAILURE(*status)){
+                    *pLength = 0;
+                    rules = NULL;
                 }
-              } else { /* we hit NULL */
-                /* we should be doing the else part */
-                sourceToken = sourceToken->listHeader->reset;
-                sourceToken = getVirginBefore(src, sourceToken, strength, parseError, status);
-              }
-            } else {
-              sourceToken = getVirginBefore(src, sourceToken, strength, parseError, status);
-            }
-          } else { /* this is both before and indirection */
-            top = FALSE;
-            ListList[src->resultLen].previousCE = 0;
-            ListList[src->resultLen].previousContCE = 0;
-            ListList[src->resultLen].indirect = TRUE;
-            /* we need to do slightly more work. we need to get the baseCE using the */
-            /* inverse UCA & getPrevious. The next bound is not set, and will be decided */
-            /* in ucol_bld */
-            uint8_t strength = (specs & UCOL_TOK_BEFORE) - 1;
-            uint32_t baseCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE;
-            uint32_t baseContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE;//&0xFFFFFF3F;
-            uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
-
-            UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
-            if((baseCE & 0xFF000000) >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
-              uint32_t primary = baseCE & UCOL_PRIMARYMASK | (baseContCE & UCOL_PRIMARYMASK) >> 16;
-              uint32_t raw = uprv_uca_getRawFromImplicit(primary);
-              uint32_t primaryCE = uprv_uca_getImplicitFromRaw(raw-1);
-              CE = primaryCE & UCOL_PRIMARYMASK | 0x0505;
-              SecondCE = (primaryCE << 16) & UCOL_PRIMARYMASK | UCOL_CONTINUATION_MARKER;
-            } else {
-                /*int32_t invPos = ucol_inv_getPrevCE(baseCE, baseContCE, &CE, &SecondCE, strength);*/
-                ucol_inv_getPrevCE(src, baseCE, baseContCE, &CE, &SecondCE, strength);
+                ures_close(collation);
             }
+            ures_close(collations);
+        }
+    }
 
-            ListList[src->resultLen].baseCE = CE;
-            ListList[src->resultLen].baseContCE = SecondCE;
-            ListList[src->resultLen].nextCE = 0;
-            ListList[src->resultLen].nextContCE = 0;
+    ures_close(bundle);
 
-            sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
-          }
-        }
+    return rules;
+}
 
+void ucol_tok_initTokenList(
+    UColTokenParser *src,
+    const UChar *rules,
+    uint32_t rulesLength,
+    const UCollator *UCA,
+    GetCollationRulesFunction importFunc,
+    void* context, 
+    UErrorCode *status) {
+    U_NAMESPACE_USE
 
-      /*  5 If the relation is a reset:
-          If sourceToken is null
-            Create new list, create new sourceToken, make the baseCE from source, put
-            the sourceToken in ListHeader of the new list */
-        if(sourceToken == NULL) {
-          /*
-            3 Consider each item: relation, source, and expansion: e.g. ...< x / y ...
-              First convert all expansions into normal form. Examples:
-                If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *
-                d * ... into &x * c/y * d * ...
-                Note: reset values can never have expansions, although they can cause the
-                very next item to have one. They may be contractions, if they are found
-                earlier in the list.
-          */
-          if(top == FALSE) {
-            collIterate s;
-            uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
-
-            uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s);
-
-            CE = ucol_getNextCE(src->UCA, &s, status);
-            UChar *expand = s.pos;
-            SecondCE = ucol_getNextCE(src->UCA, &s, status);
-
-            ListList[src->resultLen].baseCE = CE & 0xFFFFFF3F;
-            if(isContinuation(SecondCE)) {
-              ListList[src->resultLen].baseContCE = SecondCE;
-            } else {
-              ListList[src->resultLen].baseContCE = 0;
-            }
-            ListList[src->resultLen].nextCE = 0;
-            ListList[src->resultLen].nextContCE = 0;
-            ListList[src->resultLen].previousCE = 0;
-            ListList[src->resultLen].previousContCE = 0;
-            ListList[src->resultLen].indirect = FALSE;
-            sourceToken = ucol_tok_initAReset(src, expand, &expandNext, parseError, status);
-          } else { /* top == TRUE */
-            /* just use the supplied values */
-            top = FALSE;
-            ListList[src->resultLen].previousCE = 0;
-            ListList[src->resultLen].previousContCE = 0;
-            ListList[src->resultLen].indirect = TRUE;
-            ListList[src->resultLen].baseCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startCE;
-            ListList[src->resultLen].baseContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].startContCE;
-            ListList[src->resultLen].nextCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].limitCE;
-            ListList[src->resultLen].nextContCE = ucolIndirectBoundaries[src->parsedToken.indirectIndex].limitContCE;
+    uint32_t nSize = 0;
+    uint32_t estimatedSize = (2*rulesLength+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
 
-            sourceToken = ucol_tok_initAReset(src, 0, &expandNext, parseError, status);
+    bool needToDeallocRules = false;
 
-          }
-        } else { /* reset to something already in rules */
-          top = FALSE;
-        }
-      }
-      /*  7 After all this, set LAST to point to sourceToken, and goto step 3. */
-      lastToken = sourceToken;
-    } else {
-      if(U_FAILURE(*status)) {
-        return 0;
-      }
+    if(U_FAILURE(*status)) {
+        return;
     }
-  }
 
-  if(src->resultLen > 0 && ListList[src->resultLen-1].first == NULL) {
-    src->resultLen--;
-  }
-  return src->resultLen;
-}
+    // set everything to zero, so that we can clean up gracefully
+    uprv_memset(src, 0, sizeof(UColTokenParser));
+
+    // first we need to find options that don't like to be normalized,
+    // like copy and remove...
+    //const UChar *openBrace = rules;
+    int32_t optionNumber = -1;
+    const UChar *setStart = NULL;
+    uint32_t i = 0;
+    while(i < rulesLength) {
+        if(rules[i] == 0x005B) {    // '[': start of an option
+            /* Gets the following:
+               optionNumber: The index of the option.
+               setStart: The pointer at which the option arguments start.
+             */
+            optionNumber = ucol_uprv_tok_readOption(rules+i+1, rules+rulesLength, &setStart);
+
+            if(optionNumber == OPTION_OPTIMIZE) { /* copy - parts of UCA to tailoring */
+                // [optimize]
+                USet *newSet = ucol_uprv_tok_readAndSetUnicodeSet(setStart, rules+rulesLength, status);
+                if(U_SUCCESS(*status)) {
+                    if(src->copySet == NULL) {
+                        src->copySet = newSet;
+                    } else {
+                        uset_addAll(src->copySet, newSet);
+                        uset_close(newSet);
+                    }
+                } else {
+                    return;
+                }
+            } else if(optionNumber == OPTION_SUPPRESS_CONTRACTIONS) {
+                USet *newSet = ucol_uprv_tok_readAndSetUnicodeSet(setStart, rules+rulesLength, status);
+                if(U_SUCCESS(*status)) {
+                    if(src->removeSet == NULL) {
+                        src->removeSet = newSet;
+                    } else {
+                        uset_addAll(src->removeSet, newSet);
+                        uset_close(newSet);
+                    }
+                } else {
+                    return;
+                }
+            } else if(optionNumber == OPTION_IMPORT){
+                // [import <collation-name>]
+
+                // Find the address of the closing ].
+                UChar* import_end = u_strchr(setStart, 0x005D);
+                int32_t optionEndOffset = (int32_t)(import_end + 1 - rules);
+                // Ignore trailing whitespace.
+                while(PatternProps::isWhiteSpace(*(import_end-1))) {
+                    --import_end;
+                }
 
-void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint32_t rulesLength, const UCollator *UCA, UErrorCode *status) {
-  uint32_t nSize = 0;
-  uint32_t estimatedSize = (2*rulesLength+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
-  if(U_FAILURE(*status)) {
-    return;
-  }
+                int32_t optionLength = (int32_t)(import_end - setStart);
+                char option[50];
+                if(optionLength >= (int32_t)sizeof(option)) {
+                    *status = U_ILLEGAL_ARGUMENT_ERROR;
+                    return;
+                }
+                u_UCharsToChars(setStart, option, optionLength);
+                option[optionLength] = 0;
+
+                *status = U_ZERO_ERROR;
+                char locale[50];
+                int32_t templ;
+                uloc_forLanguageTag(option, locale, (int32_t)sizeof(locale), &templ, status);
+                if(U_FAILURE(*status)) {
+                    *status = U_ILLEGAL_ARGUMENT_ERROR;
+                    return;
+                }
+
+                char type[50];
+                if (uloc_getKeywordValue(locale, "collation", type, (int32_t)sizeof(type), status) <= 0 ||
+                    U_FAILURE(*status)
+                ) {
+                    *status = U_ZERO_ERROR;
+                    uprv_strcpy(type, "standard");
+                }
 
-  // set everything to zero, so that we can clean up gracefully
-  uprv_memset(src, 0, sizeof(UColTokenParser));
+                // TODO: Use public functions when available, see ticket #8134.
+                char *keywords = (char *)locale_getKeywordsStart(locale);
+                if(keywords != NULL) {
+                    *keywords = 0;
+                }
 
-  // first we need to find options that don't like to be normalized,
-  // like copy and remove...
-  //const UChar *openBrace = rules;
-  int32_t optionNumber = -1;
-  const UChar *setStart;
-  uint32_t i = 0;
-  while(i < rulesLength) {
-    if(rules[i] == 0x005B) {
-      // while((openBrace = u_strchr(openBrace, 0x005B)) != NULL) { // find open braces
-      //optionNumber = ucol_uprv_tok_readOption(openBrace+1, rules+rulesLength, &setStart);
-      optionNumber = ucol_uprv_tok_readOption(rules+i+1, rules+rulesLength, &setStart);
-      if(optionNumber == OPTION_OPTIMIZE) { /* copy - parts of UCA to tailoring */
-        USet *newSet = ucol_uprv_tok_readAndSetUnicodeSet(setStart, rules+rulesLength, status);
-        if(U_SUCCESS(*status)) {
-          if(src->copySet == NULL) {
-            src->copySet = newSet;
-          } else {
-            ((UnicodeSet *)src->copySet)->addAll(*((UnicodeSet *)newSet));
-            uset_close(newSet);
-          }
-        } else {
-          return;
-        }
-      } else if(optionNumber == OPTION_SUPPRESS_CONTRACTIONS) {
-        USet *newSet = ucol_uprv_tok_readAndSetUnicodeSet(setStart, rules+rulesLength, status);
-        if(U_SUCCESS(*status)) {
-          if(src->removeSet == NULL) {
-            src->removeSet = newSet;
-          } else {
-            ((UnicodeSet *)src->removeSet)->addAll(*((UnicodeSet *)newSet));
-            uset_close(newSet);
-          }
-        } else {
-          return;
+                int32_t importRulesLength = 0;
+                const UChar* importRules = importFunc(context, locale, type, &importRulesLength, status);
+
+#ifdef DEBUG_FOR_COLL_RULES
+                std::string s;
+                UnicodeString(importRules).toUTF8String(s);
+                std::cout << "Import rules = " << s << std::endl;
+#endif
+
+                // Add the length of the imported rules to length of the original rules,
+                // and subtract the length of the import option.
+                uint32_t newRulesLength = rulesLength + importRulesLength - (optionEndOffset - i);
+
+                UChar* newRules = (UChar*)uprv_malloc(newRulesLength*sizeof(UChar));
+
+#ifdef DEBUG_FOR_COLL_RULES
+                std::string s1;
+                UnicodeString(rules).toUTF8String(s1);
+                std::cout << "Original rules = " << s1 << std::endl;
+#endif
+
+
+                // Copy the section of the original rules leading up to the import
+                uprv_memcpy(newRules, rules, i*sizeof(UChar));
+                // Copy the imported rules
+                uprv_memcpy(newRules+i, importRules, importRulesLength*sizeof(UChar));
+                // Copy the rest of the original rules (minus the import option itself)
+                uprv_memcpy(newRules+i+importRulesLength,
+                            rules+optionEndOffset,
+                            (rulesLength-optionEndOffset)*sizeof(UChar));
+
+#ifdef DEBUG_FOR_COLL_RULES
+                std::string s2;
+                UnicodeString(newRules).toUTF8String(s2);
+                std::cout << "Resulting rules = " << s2 << std::endl;
+#endif
+
+                if(needToDeallocRules){
+                    // if needToDeallocRules is set, then we allocated rules, so it's safe to cast and free
+                    uprv_free((void*)rules);
+                }
+                needToDeallocRules = true;
+                rules = newRules;
+                rulesLength = newRulesLength;
+
+                estimatedSize += importRulesLength*2;
+
+                // First character of the new rules needs to be processed
+                i--;
+            }
         }
-      }
+        //openBrace++;
+        i++;
     }
-    //openBrace++;
-    i++;
-  }
 
-  src->source = (UChar *)uprv_malloc(estimatedSize*sizeof(UChar));
-  /* test for NULL */
-  if (src->source == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return;
-  }
-  uprv_memset(src->source, 0, estimatedSize*sizeof(UChar));
-  nSize = unorm_normalize(rules, rulesLength, UNORM_NFD, 0, src->source, estimatedSize, status);
-  if(nSize > estimatedSize || *status == U_BUFFER_OVERFLOW_ERROR) {
-    *status = U_ZERO_ERROR;
-    src->source = (UChar *)uprv_realloc(src->source, (nSize+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
+    src->source = (UChar *)uprv_malloc(estimatedSize*sizeof(UChar));
     /* test for NULL */
     if (src->source == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
-    nSize = unorm_normalize(rules, rulesLength, UNORM_NFD, 0, src->source, nSize+UCOL_TOK_EXTRA_RULE_SPACE_SIZE, status);
-  }
-  src->current = src->source;
-  src->end = src->source+nSize;
-  src->sourceCurrent = src->source;
-  src->extraCurrent = src->end+1; // Preserve terminating zero in the rule string so that option scanning works correctly
-  src->extraEnd = src->source+estimatedSize; //src->end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
-  src->varTop = NULL;
-  src->UCA = UCA;
-  src->invUCA = ucol_initInverseUCA(status);
-  src->parsedToken.charsLen = 0;
-  src->parsedToken.charsOffset = 0;
-  src->parsedToken.extensionLen = 0;
-  src->parsedToken.extensionOffset = 0;
-  src->parsedToken.prefixLen = 0;
-  src->parsedToken.prefixOffset = 0;
-  src->parsedToken.flags = 0;
-  src->parsedToken.strength = UCOL_TOK_UNSET;
-
-
-  if(U_FAILURE(*status)) {
-    return;
-  }
-  src->tailored = uhash_open(uhash_hashTokens, uhash_compareTokens, status);
-  if(U_FAILURE(*status)) {
-    return;
-  }
-  uhash_setValueDeleter(src->tailored, uhash_freeBlock);
+    uprv_memset(src->source, 0, estimatedSize*sizeof(UChar));
+    nSize = unorm_normalize(rules, rulesLength, UNORM_NFD, 0, src->source, estimatedSize, status);
+    if(nSize > estimatedSize || *status == U_BUFFER_OVERFLOW_ERROR) {
+        *status = U_ZERO_ERROR;
+        src->source = (UChar *)uprv_realloc(src->source, (nSize+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
+        /* test for NULL */
+        if (src->source == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        nSize = unorm_normalize(rules, rulesLength, UNORM_NFD, 0, src->source, nSize+UCOL_TOK_EXTRA_RULE_SPACE_SIZE, status);
+    }
+    if(needToDeallocRules){
+        // if needToDeallocRules is set, then we allocated rules, so it's safe to cast and free
+        uprv_free((void*)rules);
+    }
 
-  src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
-  /* test for NULL */
-  if (src->opts == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return;
-  }
 
-  uprv_memcpy(src->opts, UCA->options, sizeof(UColOptionSet));
+    src->current = src->source;
+    src->end = src->source+nSize;
+    src->sourceCurrent = src->source;
+    src->extraCurrent = src->end+1; // Preserve terminating zero in the rule string so that option scanning works correctly
+    src->extraEnd = src->source+estimatedSize; //src->end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
+    src->varTop = NULL;
+    src->UCA = UCA;
+    src->invUCA = ucol_initInverseUCA(status);
+    src->parsedToken.charsLen = 0;
+    src->parsedToken.charsOffset = 0;
+    src->parsedToken.extensionLen = 0;
+    src->parsedToken.extensionOffset = 0;
+    src->parsedToken.prefixLen = 0;
+    src->parsedToken.prefixOffset = 0;
+    src->parsedToken.flags = 0;
+    src->parsedToken.strength = UCOL_TOK_UNSET;
+    src->buildCCTabFlag = FALSE;
+    src->isStarred = FALSE;
+    src->inRange = FALSE;
+    src->lastRangeCp = 0;
+    src->previousCp = 0;
+
+    if(U_FAILURE(*status)) {
+        return;
+    }
+    src->tailored = uhash_open(uhash_hashTokens, uhash_compareTokens, NULL, status);
+    if(U_FAILURE(*status)) {
+        return;
+    }
+    uhash_setValueDeleter(src->tailored, uprv_free);
 
-  // rulesToParse = src->source;
-  src->lh = 0;
-  src->listCapacity = 1024;
-  src->lh = (UColTokListHeader *)uprv_malloc(src->listCapacity*sizeof(UColTokListHeader));
-  //Test for NULL
-  if (src->lh == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return;
-  }
-  uprv_memset(src->lh, 0, src->listCapacity*sizeof(UColTokListHeader));
-  src->resultLen = 0;
-
-  UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
-
-  // UCOL_RESET_TOP_VALUE
-  setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
-  // UCOL_FIRST_PRIMARY_IGNORABLE
-  setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
-  // UCOL_LAST_PRIMARY_IGNORABLE
-  setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
-  // UCOL_FIRST_SECONDARY_IGNORABLE
-  setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
-  // UCOL_LAST_SECONDARY_IGNORABLE
-  setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
-  // UCOL_FIRST_TERTIARY_IGNORABLE
-  setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
-  // UCOL_LAST_TERTIARY_IGNORABLE
-  setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
-  // UCOL_FIRST_VARIABLE
-  setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
-  // UCOL_LAST_VARIABLE
-  setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
-  // UCOL_FIRST_NON_VARIABLE
-  setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
-  // UCOL_LAST_NON_VARIABLE
-  setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
-  // UCOL_FIRST_IMPLICIT
-  setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
-  // UCOL_LAST_IMPLICIT
-  setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
-  // UCOL_FIRST_TRAILING
-  setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
-  // UCOL_LAST_TRAILING
-  setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
-  ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
+    src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
+    /* test for NULL */
+    if (src->opts == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    uprv_memcpy(src->opts, UCA->options, sizeof(UColOptionSet));
+
+    src->lh = 0;
+    src->listCapacity = 1024;
+    src->lh = (UColTokListHeader *)uprv_malloc(src->listCapacity*sizeof(UColTokListHeader));
+    //Test for NULL
+    if (src->lh == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uprv_memset(src->lh, 0, src->listCapacity*sizeof(UColTokListHeader));
+    src->resultLen = 0;
+
+    UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
+
+    // UCOL_RESET_TOP_VALUE
+    setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
+    // UCOL_FIRST_PRIMARY_IGNORABLE
+    setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
+    // UCOL_LAST_PRIMARY_IGNORABLE
+    setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
+    // UCOL_FIRST_SECONDARY_IGNORABLE
+    setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
+    // UCOL_LAST_SECONDARY_IGNORABLE
+    setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
+    // UCOL_FIRST_TERTIARY_IGNORABLE
+    setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
+    // UCOL_LAST_TERTIARY_IGNORABLE
+    setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
+    // UCOL_FIRST_VARIABLE
+    setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
+    // UCOL_LAST_VARIABLE
+    setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
+    // UCOL_FIRST_NON_VARIABLE
+    setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
+    // UCOL_LAST_NON_VARIABLE
+    setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
+    // UCOL_FIRST_IMPLICIT
+    setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
+    // UCOL_LAST_IMPLICIT
+    setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
+    // UCOL_FIRST_TRAILING
+    setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
+    // UCOL_LAST_TRAILING
+    setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
+    ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
 }
 
 
 void ucol_tok_closeTokenList(UColTokenParser *src) {
-  if(src->copySet != NULL) {
-    uset_close(src->copySet);
-  }
-  if(src->removeSet != NULL) {
-    uset_close(src->removeSet);
-  }
-  if(src->tailored != NULL) {
-    uhash_close(src->tailored);
-  }
-  if(src->lh != NULL) {
-    uprv_free(src->lh);
-  }
-  if(src->source != NULL) {
-    uprv_free(src->source);
-  }
-  if(src->opts != NULL) {
-    uprv_free(src->opts);
-  }
+    if(src->copySet != NULL) {
+        uset_close(src->copySet);
+    }
+    if(src->removeSet != NULL) {
+        uset_close(src->removeSet);
+    }
+    if(src->tailored != NULL) {
+        uhash_close(src->tailored);
+    }
+    if(src->lh != NULL) {
+        uprv_free(src->lh);
+    }
+    if(src->source != NULL) {
+        uprv_free(src->source);
+    }
+    if(src->opts != NULL) {
+        uprv_free(src->opts);
+    }
+    if (src->reorderCodes != NULL) {
+        uprv_free(src->reorderCodes);
+    }
 }
 
 #endif /* #if !UCONFIG_NO_COLLATION */
-