X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/test/cintltst/usrchdat.c diff --git a/icuSources/test/cintltst/usrchdat.c b/icuSources/test/cintltst/usrchdat.c index 3ebbbe63..301f188f 100644 --- a/icuSources/test/cintltst/usrchdat.c +++ b/icuSources/test/cintltst/usrchdat.c @@ -1,5 +1,7 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** - * Copyright (c) 2001-2003 International Business Machines + * Copyright (c) 2001-2011,2015 International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * File USRCHDAT.H @@ -8,24 +10,32 @@ * synwee July 31 2001 creation ********************************************************************/ + +/* +Note: This file is included by other C and C++ files. This file should not be directly compiled. +*/ #ifndef USRCHDAT_C #define USRCHDAT_C -#include "unicode/utypes.h" +#include "unicode/ucol.h" #if !UCONFIG_NO_COLLATION -#include "unicode/ucol.h" +/* Set to 1 if matches must be on grapheme boundaries */ +#define GRAPHEME_BOUNDARIES 1 +U_CDECL_BEGIN struct SearchData { const char *text; const char *pattern; - const char *collator; + const char *collator; /* currently supported "fr" "es" "de", plus NULL/other => "en" */ UCollationStrength strength; - const char *breaker; - int32_t offset[32]; - uint32_t size[32]; + USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_COMPARISON attribute */ + const char *breaker; /* currently supported "wordbreaker" for EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */ + int8_t offset[32]; + uint8_t size[32]; }; +U_CDECL_END typedef struct SearchData SearchData; @@ -34,189 +44,300 @@ static const char *TESTCOLLATORRULE = "& o,O ; p,P"; static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; static const SearchData BASIC[] = { - {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, + {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1}, {6}}, - {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, + {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, 20, -1}, {6, 6}}, - {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, + {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {6, 20, -1}, {6, 6}}, - {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, + {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1}, {6, 6}}, - {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, - {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, - {2}}, - {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"\\u00c9", "e", NULL, UCOL_PRIMARY, NULL, {0, -1}, {1}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}}, + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, +#endif + + {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData BREAKITERATOREXACT[] = { - {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1}, {3, 3}}, - {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {10, 14, -1}, {3, 2}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {10, -1}, {3}}, {"Channel, another channel, more channels, and one last Channel", - "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, + "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}}, /* jitterbug 1745 */ - {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, + {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, {"testing that string ab\\u00e9cd does not match e", "e", NULL, - UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, - {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, + {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}}, +#if 0 + /* Problem reported by Dave Bertoni, same as ticket 4279? */ + {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}}, +#endif + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; +#define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \ + "\\u00E7a p\\u00E8che par, " \ + "p\\u00E9cher, " \ + "une p\\u00EAche, " \ + "un p\\u00EAcher, " \ + "j\\u2019ai p\\u00EAch\\u00E9, " \ + "un p\\u00E9cheur, " \ + "\\u201Cp\\u00E9che\\u201D, " \ + "decomp peche\\u0301, " \ + "base peche" +/* in the above, the interesting words and their offsets are: + 3 pe<301>che<301> + 13 pe<300>che + 24 pe<301>cher + 36 pe<302>che + 46 pe<302>cher + 59 pe<302>che<301> + 69 pe<301>cheur + 79 pe<301>che + 94 peche<+301> +107 peche +*/ + static const SearchData STRENGTH[] = { - /*012345678901234567890123456789012345678901234567890123456789*/ - {"The quick brown fox jumps over the lazy foxes", "fox", "en", - UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, - {"The quick brown fox jumps over the lazy foxes", "fox", "en", - UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, - {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", - "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, - {10, 14, -1}, {3, 2}}, - {"A channel, another CHANNEL, more Channels, and one last channel...", - "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, - {7, 7, 7, 7}}, - {"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, - NULL, {0, -1}, {1, 0}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + /*012345678901234567890123456789012345678901234567890123456789*/ + /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, + /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}}, + /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", + "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, + /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, + {10, 14, -1}, {3, 2}}, + /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...", + "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, + /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, USEARCH_STANDARD_ELEMENT_COMPARISON, + NULL, {0, -1}, {1, 0}}, + /* some tests for modified element comparison, ticket #7093 */ + /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}}, + /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, + /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, + /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, + /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, + /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}, + /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, + /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, + /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, + /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, + /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, + /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /* more tests for modified element comparison (with fr), ticket #7093 */ + /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}}, + /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, + /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, + /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, + /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, + /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}, + /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, + /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, + /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, + /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, + /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, + /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, + /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, + +#if 0 + /* Ticket 5382 */ + {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData VARIABLE[] = { /*012345678901234567890123456789012345678901234567890123456789*/ {"blackbirds black blackbirds blackbird black-bird", - "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, + "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1}, {9, 9, 9, 10}}, /* to see that it doesn't go into an infinite loop if the start of text is a ignorable character */ - {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, + {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, /* testing tightest match */ - {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {3}}, /*012345678901234567890123456789012345678901234567890123456789 */ - {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, /* totally ignorable text */ - {" ---------------", "abc", NULL, UCOL_SECONDARY, + {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData NORMEXACT[] = { - {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, - {2}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, + +#if GRAPHEME_BOUNDARIES + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData NONNORMEXACT[] = { - {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, - {0}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData OVERLAP[] = { - {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, + {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1}, {4, 4, 4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData NONOVERLAP[] = { - {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData COLLATOR[] = { /* english */ - {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, + {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, /* tailored */ - {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData PATTERN[] = { {"The quick brown fox jumps over the lazy foxes", "the", NULL, - UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}}, {"The quick brown fox jumps over the lazy foxes", "fox", NULL, - UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData TEXT[] = { - {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, + {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1}, {3, 3}}, - {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, + {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1}, {3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData COMPOSITEBOUNDARIES[] = { - {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, - {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, - {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, +#if GRAPHEME_BOUNDARIES + {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, +#else + {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, - {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, +#endif + + {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, /* A + 030A + 0301 */ - {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, +#endif + + {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + + /* Ticket 5024 */ + {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + + /* Ticket 5420 */ + {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, + {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, + {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData MATCH[] = { - {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, + {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, -1}, {3, 3}}, /* 012345678901234567890123456789012345678901234567890 */ {"a busy bee is a very busy beeee with no bee life", "bee", NULL, - UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData SUPPLEMENTARY[] = { /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", - "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, + "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1}, {2, 2, 2, 2, 2}}, {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {2}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const char *CONTRACTIONRULE = @@ -224,261 +345,425 @@ static const char *CONTRACTIONRULE = static const SearchData CONTRACTION[] = { /* common discontiguous */ - {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, - {2}}, + {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, +#endif + /* contraction prefix */ - {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, + {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}}, +#endif + /* discontiguous problem here for backwards iteration. accents not found because discontiguous stores all information */ - {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1}, + {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, /* ends not with a contraction character */ - {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, + {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, - {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, + {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, - {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, + {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, /* blocked discontiguous */ - {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, + {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, - {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + +#if GRAPHEME_BOUNDARIES + /* + * "ab" generates a contraction that's an expansion. The "z" matches the + * first CE of the expansion but the match fails because it ends in the + * middle of an expansion... + */ + {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const char *IGNORABLERULE = "&a = \\u0300"; static const SearchData IGNORABLE[] = { - {"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL, - {0, 3, -1}, {2, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +#if GRAPHEME_BOUNDARIES + /* + * This isn't much of a test when matches have to be on + * grapheme boundiaries. The match at 0 only works because + * it's at the start of the text. + */ + {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, + {0, -1}, {2}}, +#else + {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, + {0, 3, -1}, {2, 2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData BASICCANONICAL[] = { - {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, + {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1}, {6}}, - {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, + {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, 20, -1}, {6, 6}}, - {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, + {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {6, 20, -1}, {6, 6}}, - {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, + {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1}, {6, 6}}, - {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, - {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}}, + +#if GRAPHEME_BOUNDARIES + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, + NULL, {-1}, {0}}, + {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, + NULL, {-1}, {0}}, + {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", + "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, - {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {1, -1}, {3}}, - {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {3}}, + {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {5}}, - {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, + {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {5}}, {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", - "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}}, - {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 12, -1}, {5, 3}}, +#endif + + {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; + static const SearchData NORMCANONICAL[] = { - {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, +#if GRAPHEME_BOUNDARIES + /* + * These tests don't really mean anything. With matches restricted to grapheme + * boundaries, isCanonicalMatch doesn't mean anything unless normalization is + * also turned on... + */ + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, - {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData BREAKITERATORCANONICAL[] = { - {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1}, {3, 3}}, - {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {10, 14, -1}, {3, 2}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {10, -1}, {3}}, {"Channel, another channel, more channels, and one last Channel", - "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, + "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}}, /* jitterbug 1745 */ - {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, + {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, {"testing that string ab\\u00e9cd does not match e", "e", NULL, - UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, - {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, + {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData STRENGTHCANONICAL[] = { /*012345678901234567890123456789012345678901234567890123456789 */ {"The quick brown fox jumps over the lazy foxes", "fox", "en", - UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, {"The quick brown fox jumps over the lazy foxes", "fox", "en", - UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}}, {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", - "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, - {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, + "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, 14, -1}, {3, 2}}, {"A channel, another CHANNEL, more Channels, and one last channel...", - "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, + "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData VARIABLECANONICAL[] = { /*012345678901234567890123456789012345678901234567890123456789 */ {"blackbirds black blackbirds blackbird black-bird", - "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, + "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1}, {9, 9, 9, 10}}, /* to see that it doesn't go into an infinite loop if the start of text is a ignorable character */ - {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, + {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, /* testing tightest match */ - {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {3}}, /*012345678901234567890123456789012345678901234567890123456789 */ - {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, /* totally ignorable text */ - {" ---------------", "abc", NULL, UCOL_SECONDARY, + {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData OVERLAPCANONICAL[] = { - {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, + {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1}, {4, 4, 4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData NONOVERLAPCANONICAL[] = { - {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData COLLATORCANONICAL[] = { /* english */ - {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, + {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, /* tailored */ - {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData PATTERNCANONICAL[] = { {"The quick brown fox jumps over the lazy foxes", "the", NULL, - UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}}, {"The quick brown fox jumps over the lazy foxes", "fox", NULL, - UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData TEXTCANONICAL[] = { - {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, + {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1}, {3, 3}}, - {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, + {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1}, {3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { - {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, - {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, - {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, +#if GRAPHEME_BOUNDARIES + {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + + /* first one matches only because it's at the start of the text */ + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + + /* \\u0300 blocked by \\u0300 */ + {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, /* \\u0300 blocked by \\u0300 */ - {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, +#endif + /* A + 030A + 0301 */ - {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + +#if GRAPHEME_BOUNDARIES + {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, +#endif + + {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, +#endif + /* blocked accent */ - {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, - {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, - {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, + {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, +#endif + + {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + +#if GRAPHEME_BOUNDARIES + {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, +#endif + + {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, + +#if GRAPHEME_BOUNDARIES + {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", + NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1}, {2}}, +#else + {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", - NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData MATCHCANONICAL[] = { - {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, + {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, -1}, {3, 3}}, /*012345678901234567890123456789012345678901234567890 */ {"a busy bee is a very busy beeee with no bee life", "bee", NULL, - UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData SUPPLEMENTARYCANONICAL[] = { /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", - "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, + "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1}, {2, 2, 2, 2, 2}}, {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {2}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, - UCOL_TERTIARY, NULL, {3, -1}, {4}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; static const SearchData CONTRACTIONCANONICAL[] = { /* common discontiguous */ - {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, - {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, - {2}}, +#if GRAPHEME_BOUNDARIES + {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, +#endif + /* contraction prefix */ - {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, - {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, + {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + +#if GRAPHEME_BOUNDARIES + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, +#else + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}}, +#endif + /* discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3 */ - /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1}, + /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, */ /* ends not with a contraction character */ - {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, - {0}}, - {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, - {0, -1}, {3}}, - {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, - {0, -1}, {4}}, + {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, + +#if GRAPHEME_BOUNDARIES + {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + /* blocked discontiguous */ - {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, - {1, -1}, {4}}, - {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, - {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} + {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, + + /* + * "ab" generates a contraction that's an expansion. The "z" matches the + * first CE of the expansion but the match fails because it ends in the + * middle of an expansion... + */ + {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {2}}, +#else + {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, + + /* blocked discontiguous */ + {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}}, + + {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, +#endif + + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} +}; + +static const SearchData DIACRITICMATCH[] = { + {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-1}, {4, 3}}, + {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, + {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020", + "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} +}; + +static const SearchData INDICPREFIXMATCH[] = { + {"\\u0915\\u0020\\u0915\\u0901\\u0020\\u0915\\u0902\\u0020\\u0915\\u0903\\u0020\\u0915\\u0940\\u0020\\u0915\\u093F\\u0020\\u0915\\u0943\\u0020\\u0915\\u093C\\u0020\\u0958", + "\\u0915", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 5, 8, 11, 14, 17, 20, 23,-1}, {1, 2, 2, 2, 1, 1, 1, 2, 1}}, + {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924\\u0947", + "\\u0915\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 3, 7, 11, -1}, {2, 2, 2, 2}}, + {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924\\u0947", + "\\u0915\\u0943\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 19, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} }; #endif /* #if !UCONFIG_NO_COLLATION */