ICU-400.39.tar.gz

[apple/icu.git] / icuSources / i18n / usearch.cpp
diff --git a/icuSources/i18n/usearch.cpp b/icuSources/i18n/usearch.cpp

index 9c06182b3551a6d2cd1d1ad4d9e2ef67ac1c839c..4e1e0e49916df2b6639360159d97b9ce19b47ba5 100644 (file)
--- a/icuSources/i18n/usearch.cpp
+++ b/icuSources/i18n/usearch.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001-2003 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
  **********************************************************************
  *   Date        Name        Description
  *  07/02/2001   synwee      Creation.
@@ -9,7 +9,7 @@
  
  #include "unicode/utypes.h"
  
-#if !UCONFIG_NO_COLLATION
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
  
  #include "unicode/usearch.h"
  #include "unicode/ustring.h"
@@ -18,6 +18,15 @@
  #include "ucol_imp.h"
  #include "usrchimp.h"
  #include "cmemory.h"
+#include "ucln_in.h"
+#include "uassert.h"
+
+U_NAMESPACE_USE
+
+// don't use Boyer-Moore
+#define BOYER_MOORE 0
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  
  // internal definition ---------------------------------------------------
  
@@ -39,13 +48,17 @@ static
  inline void setColEIterOffset(UCollationElements *elems,
                        int32_t             offset)
  {
-       collIterate *ci = &(elems->iteratordata_);
-       ci->pos         = ci->string + offset;
-       ci->CEpos       = ci->toReturn = ci->CEs;
-       if (ci->flags & UCOL_ITER_INNORMBUF) {
-               ci->flags = ci->origFlags;
-       }
-       ci->fcdPosition = NULL;
+    collIterate *ci = &(elems->iteratordata_);
+    ci->pos         = ci->string + offset;
+    ci->CEpos       = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs;
+    if (ci->flags & UCOL_ITER_INNORMBUF) {
+        ci->flags = ci->origFlags;
+    }
+    ci->fcdPosition = NULL;
+
+       ci->offsetReturn = NULL;
+    ci->offsetStore  = ci->offsetBuffer;
+       ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
  }
  
  /**
@@ -83,6 +96,14 @@ inline int hash(uint32_t ce)
      return UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_;
  }
  
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+usearch_cleanup(void) {
+    FCD_ = NULL;
+    return TRUE;
+}
+U_CDECL_END
+
  /**
  * Initializing the fcd tables.
  * Internal method, status assumed to be a success.
@@ -94,6 +115,7 @@ inline void initializeFCD(UErrorCode *status)
  {
      if (FCD_ == NULL) {
          FCD_ = unorm_getFCDTrie(status);
+        ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
      }
  }
  
@@ -108,7 +130,7 @@ inline void initializeFCD(UErrorCode *status)
  * @return fcd value
  */
  static
-inline uint16_t getFCD(const UChar   *str, int32_t *offset, 
+uint16_t getFCD(const UChar   *str, int32_t *offset, 
                               int32_t  strlength)
  {
      int32_t temp = *offset;
@@ -138,7 +160,7 @@ inline uint16_t getFCD(const UChar   *str, int32_t *offset,
  * @return the modified collation element
  */
  static
-inline uint32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
+inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
  {
      // note for tertiary we can't use the collator->tertiaryMask, that
      // is a preprocessed mask that takes into account case options. since
@@ -199,26 +221,70 @@ inline void * allocateMemory(uint32_t size, UErrorCode *status)
  * @return new destination array, destination if there was no new allocation
  */
  static
-inline uint32_t * addTouint32_tArray(uint32_t   *destination,       
-                                     uint32_t    offset, 
-                                     uint32_t   *destinationlength, 
-                                     uint32_t    value,
-                                     uint32_t    increments, 
-                                     UErrorCode *status) 
+inline int32_t * addTouint32_tArray(int32_t    *destination,       
+                                    uint32_t    offset, 
+                                    uint32_t   *destinationlength, 
+                                    uint32_t    value,
+                                    uint32_t    increments, 
+                                    UErrorCode *status) 
+{
+    uint32_t newlength = *destinationlength;
+    if (offset + 1 == newlength) {
+        newlength += increments;
+        int32_t *temp = (int32_t *)allocateMemory(
+                                         sizeof(int32_t) * newlength, status);
+        if (U_FAILURE(*status)) {
+            return NULL;
+        }
+        uprv_memcpy(temp, destination, sizeof(int32_t) * offset);
+        *destinationlength = newlength;
+        destination        = temp;
+    }
+    destination[offset] = value;
+    return destination;
+}
+
+/**
+* Adds a uint64_t value to a destination array.
+* Creates a new array if we run out of space. The caller will have to 
+* manually deallocate the newly allocated array.
+* Internal method, status assumed to be success, caller has to check status 
+* before calling this method. destination not to be NULL and has at least 
+* size destinationlength.
+* @param destination target array
+* @param offset destination offset to add value
+* @param destinationlength target array size, return value for the new size
+* @param value to be added
+* @param increments incremental size expected
+* @param status output error if any, caller to check status before calling 
+*               method, status assumed to be success when passed in.
+* @return new destination array, destination if there was no new allocation
+*/
+static
+inline int64_t * addTouint64_tArray(int64_t    *destination,       
+                                    uint32_t    offset, 
+                                    uint32_t   *destinationlength, 
+                                    uint64_t    value,
+                                    uint32_t    increments, 
+                                    UErrorCode *status) 
  {
      uint32_t newlength = *destinationlength;
      if (offset + 1 == newlength) {
          newlength += increments;
-        uint32_t *temp = (uint32_t *)allocateMemory(
-                                         sizeof(uint32_t) * newlength, status);
+        int64_t *temp = (int64_t *)allocateMemory(
+                                         sizeof(int64_t) * newlength, status);
+ 
          if (U_FAILURE(*status)) {
              return NULL;
          }
-        uprv_memcpy(temp, destination, sizeof(uint32_t) * offset);
+
+        uprv_memcpy(temp, destination, sizeof(int64_t) * offset);
          *destinationlength = newlength;
          destination        = temp;
      }
+
      destination[offset] = value;
+
      return destination;
  }
  
@@ -240,7 +306,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
  {
      UPattern *pattern            = &(strsrch->pattern);
      uint32_t  cetablesize        = INITIAL_ARRAY_SIZE_;
-    uint32_t *cetable            = pattern->CEBuffer;
+    int32_t  *cetable            = pattern->CEBuffer;
      uint32_t  patternlength      = pattern->textLength;
      UCollationElements *coleiter = strsrch->utilIter;
              
@@ -264,13 +330,13 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
          
      uint16_t  offset      = 0;
      uint16_t  result      = 0;
-    uint32_t  ce;
+    int32_t   ce;
  
      while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER &&
             U_SUCCESS(*status)) {
          uint32_t newce = getCE(strsrch, ce);
          if (newce) {
-            uint32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, 
+            int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, 
                                    newce,
                                    patternlength - ucol_getOffset(coleiter) + 1, 
                                    status);
@@ -293,6 +359,82 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
      return result;
  }
  
+/**
+* Initializing the pce table for a pattern.
+* Stores non-ignorable collation keys.
+* Table size will be estimated by the size of the pattern text. Table 
+* expansion will be perform as we go along. Adding 1 to ensure that the table 
+* size definitely increases.
+* Internal method, status assumed to be a success.
+* @param strsrch string search data
+* @param status output error if any, caller to check status before calling 
+*               method, status assumed to be success when passed in.
+* @return total number of expansions 
+*/
+static
+inline uint16_t initializePatternPCETable(UStringSearch *strsrch, 
+                                          UErrorCode    *status)
+{
+    UPattern *pattern            = &(strsrch->pattern);
+    uint32_t  pcetablesize       = INITIAL_ARRAY_SIZE_;
+    int64_t  *pcetable           = pattern->PCEBuffer;
+    uint32_t  patternlength      = pattern->textLength;
+    UCollationElements *coleiter = strsrch->utilIter;
+            
+    if (coleiter == NULL) {
+        coleiter = ucol_openElements(strsrch->collator, pattern->text, 
+                                     patternlength, status);
+        // status will be checked in ucol_next(..) later and if it is an 
+        // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be 
+        // returned.
+        strsrch->utilIter = coleiter;
+    } else {
+        uprv_init_collIterate(strsrch->collator, pattern->text,
+                              pattern->textLength,
+                              &coleiter->iteratordata_);
+    }
+        
+    if (pattern->PCE != pcetable && pattern->PCE != NULL) {
+        uprv_free(pattern->PCE);
+    }
+        
+    uint16_t  offset = 0;
+    uint16_t  result = 0;
+    int64_t   pce;
+
+    uprv_init_pce(coleiter);
+
+    // ** Should processed CEs be signed or unsigned?
+    // ** (the rest of the code in this file seems to play fast-and-loose with 
+    // **  whether a CE is signed or unsigned. For example, look at routine above this one.)
+    while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
+           U_SUCCESS(*status)) {
+        int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, 
+                              pce,
+                              patternlength - ucol_getOffset(coleiter) + 1, 
+                              status);
+
+        if (U_FAILURE(*status)) {
+            return 0;
+        }
+
+        offset += 1;
+
+        if (pcetable != temp && pcetable != pattern->PCEBuffer) {
+            uprv_free(pcetable);
+        }
+
+        pcetable = temp;
+        //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
+    }
+
+    pcetable[offset]   = 0;
+    pattern->PCE       = pcetable;
+    pattern->PCELength = offset;
+
+    return result;
+}
+
  /**
  * Initializes the pattern struct.
  * Internal method, status assumed to be success.
@@ -308,13 +450,29 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
      const UChar      *patterntext = pattern->text;
            int32_t     length      = pattern->textLength;
            int32_t index       = 0;
+    
+    // Since the strength is primary, accents are ignored in the pattern.
+    if (strsrch->strength == UCOL_PRIMARY) {
+       pattern->hasPrefixAccents = 0;
+       pattern->hasSuffixAccents = 0;
+    } else {
+           pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> 
+                                                            SECOND_LAST_BYTE_SHIFT_;
+           index = length;
+           UTF_BACK_1(patterntext, 0, index);
+           pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & 
+                                                                    LAST_BYTE_MASK_;
+    }
+
+    // ** HACK **
+    if (strsrch->pattern.PCE != NULL) {
+        if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
+            uprv_free(strsrch->pattern.PCE);
+        }
+
+        strsrch->pattern.PCE = NULL;
+    }
  
-    pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> 
-                                                     SECOND_LAST_BYTE_SHIFT_;
-    index = length;
-    UTF_BACK_1(patterntext, 0, index);
-    pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & 
-                                                             LAST_BYTE_MASK_;
      // since intializePattern is an internal method status is a success.
      return initializePatternCETable(strsrch, status);   
  }
@@ -332,7 +490,7 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
  */
  static
  inline void setShiftTable(int16_t   shift[], int16_t backshift[], 
-                          uint32_t *cetable, int32_t cesize, 
+                          int32_t  *cetable, int32_t cesize, 
                            int16_t   expansionsize,
                            int16_t   defaultforward,
                            int16_t   defaultbackward)
@@ -404,7 +562,7 @@ inline void initialize(UStringSearch *strsrch, UErrorCode *status)
          int32_t   cesize  = pattern->CELength;
  
          int16_t minlength = cesize > expandlength 
-                                       ? (int16_t)cesize - expandlength : 1;
+                            ? (int16_t)cesize - expandlength : 1;
          pattern->defaultShiftSize    = minlength;
          setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
                        cesize, expandlength, minlength, minlength);
@@ -413,6 +571,37 @@ inline void initialize(UStringSearch *strsrch, UErrorCode *status)
      strsrch->pattern.defaultShiftSize = 0;
  }
  
+#if BOYER_MOORE
+/**
+* Check to make sure that the match length is at the end of the character by 
+* using the breakiterator.
+* @param strsrch string search data 
+* @param start target text start offset
+* @param end target text end offset
+*/
+static
+void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/, 
+                               int32_t *end)
+{
+#if !UCONFIG_NO_BREAK_ITERATION
+    UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
+    if (breakiterator) {
+           int32_t matchend = *end;
+           //int32_t matchstart = *start;
+           
+           if (!ubrk_isBoundary(breakiterator, matchend)) {
+               *end = ubrk_following(breakiterator, matchend);
+        }
+           
+           /* Check the start of the matched text to make sure it doesn't have any accents 
+            * before it.  This code may not be necessary and so it is commented out */
+           /*if (!ubrk_isBoundary(breakiterator, matchstart) && !ubrk_isBoundary(breakiterator, matchstart-1)) {
+               *start = ubrk_preceding(breakiterator, matchstart);
+           }*/
+    }
+#endif
+}
+
  /**
  * Determine whether the target text in UStringSearch bounded by the offset 
  * start and end is one or more whole units of text as 
@@ -422,11 +611,12 @@ inline void initialize(UStringSearch *strsrch, UErrorCode *status)
  * @param end target text end offset
  */
  static
-inline UBool isBreakUnit(const UStringSearch *strsrch, int32_t start, 
+UBool isBreakUnit(const UStringSearch *strsrch, int32_t start, 
                                 int32_t    end)
  {
  #if !UCONFIG_NO_BREAK_ITERATION
      UBreakIterator *breakiterator = strsrch->search->breakIter;
+    //TODO: Add here.
      if (breakiterator) {
          int32_t startindex = ubrk_first(breakiterator);
          int32_t endindex   = ubrk_last(breakiterator);
@@ -452,7 +642,7 @@ inline UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
              ucol_setText(coleiter, text, end - start, &status);
              for (int32_t count = 0; count < strsrch->pattern.CELength;
                   count ++) {
-                uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+                int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
                  if (ce == UCOL_IGNORABLE) {
                      count --;
                      continue;
@@ -461,7 +651,7 @@ inline UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
                      return FALSE;
                  }
              }
-            uint32_t nextce = ucol_next(coleiter, &status);
+            int32_t nextce = ucol_next(coleiter, &status);
              while (ucol_getOffset(coleiter) == (end - start)
                     && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
                  nextce = ucol_next(coleiter, &status);
@@ -522,7 +712,7 @@ static
  inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch, 
                                                    int32_t    textoffset)
  {
-       int32_t textlength = strsrch->search->textLength;
+    int32_t textlength = strsrch->search->textLength;
      if (strsrch->pattern.hasSuffixAccents && 
          textoffset < textlength) {
                int32_t  temp       = textoffset;
@@ -550,10 +740,10 @@ inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
  static
  inline int32_t shiftForward(UStringSearch *strsrch,
                                  int32_t    textoffset,
-                                uint32_t       ce,
+                                int32_t       ce,
                                  int32_t        patternceindex)
  {
-       UPattern *pattern = &(strsrch->pattern);
+    UPattern *pattern = &(strsrch->pattern);
      if (ce != UCOL_NULLORDER) {
          int32_t shift = pattern->shift[hash(ce)];
          // this is to adjust for characters in the middle of the 
@@ -577,6 +767,7 @@ inline int32_t shiftForward(UStringSearch *strsrch,
      // * next character is a accent: shift to the next base character
      return textoffset;
  }
+#endif // #if BOYER_MOORE
  
  /**
  * sets match not found 
@@ -596,6 +787,7 @@ inline void setMatchNotFound(UStringSearch *strsrch)
      }
  }
  
+#if BOYER_MOORE
  /**
  * Gets the offset to the next safe point in text.
  * ie. not the middle of a contraction, swappable characters or supplementary
@@ -693,7 +885,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
              uint32_t            firstce   = strsrch->pattern.CE[0];
              UBool               ignorable = TRUE;
              uint32_t            ce        = UCOL_IGNORABLE;
-            while (U_SUCCESS(*status) && ce != firstce) {
+            while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_NULLORDER) {
                  offset = ucol_getOffset(coleiter);
                  if (ce != firstce && ce != UCOL_IGNORABLE) {
                      ignorable = FALSE;
@@ -743,11 +935,11 @@ UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
          UErrorCode          status    = U_ZERO_ERROR;
          // we have been iterating forwards previously
          uint32_t            ignorable = TRUE;
-        uint32_t            firstce   = strsrch->pattern.CE[0];
+        int32_t             firstce   = strsrch->pattern.CE[0];
  
-               setColEIterOffset(coleiter, start);
-        uint32_t ce  = getCE(strsrch, ucol_next(coleiter, &status));
-               if (U_FAILURE(status)) {
+        setColEIterOffset(coleiter, start);
+        int32_t ce  = getCE(strsrch, ucol_next(coleiter, &status));
+        if (U_FAILURE(status)) {
              return TRUE;
          }
          while (ce != firstce) {
@@ -755,30 +947,30 @@ UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
                  ignorable = FALSE;
              }
              ce = getCE(strsrch, ucol_next(coleiter, &status));
-            if (U_FAILURE(status)) {
+            if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
                  return TRUE;
              }
          }
-               if (!ignorable && inNormBuf(coleiter)) {
+        if (!ignorable && inNormBuf(coleiter)) {
              // within normalization buffer, discontiguous handled here
-                   return TRUE;
+            return TRUE;
          }
  
-               // within text
+        // within text
          int32_t temp = start;
-               // original code
-               // accent = (getFCD(strsrch->search->text, &temp, 
+        // original code
+        // accent = (getFCD(strsrch->search->text, &temp, 
          //                  strsrch->search->textLength) 
-               //           >> SECOND_LAST_BYTE_SHIFT_); 
-               // however this code does not work well with VC7 .net in release mode.
-               // maybe the inlines for getFCD combined with shifting has bugs in 
-               // VC7. anyways this is a work around.
-               UBool accent = getFCD(strsrch->search->text, &temp, 
+        //            >> SECOND_LAST_BYTE_SHIFT_); 
+        // however this code does not work well with VC7 .net in release mode.
+        // maybe the inlines for getFCD combined with shifting has bugs in 
+        // VC7. anyways this is a work around.
+        UBool accent = getFCD(strsrch->search->text, &temp, 
                                strsrch->search->textLength) > 0xFF;
          if (!accent) {
-                       return checkExtraMatchAccents(strsrch, start, end, &status);
+            return checkExtraMatchAccents(strsrch, start, end, &status);
          }
-               if (!ignorable) {
+        if (!ignorable) {
              return TRUE;
          }
          if (start > 0) {
@@ -825,12 +1017,13 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
                int32_t      textlength = strsrch->search->textLength;
          UTF_BACK_1(text, 0, temp);
          if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
-            uint32_t            firstce  = strsrch->pattern.CE[0];
+            int32_t             firstce  = strsrch->pattern.CE[0];
              UCollationElements *coleiter = strsrch->textIter;
              UErrorCode          status   = U_ZERO_ERROR;
+                       int32_t ce;
              setColEIterOffset(coleiter, start);
-            while (getCE(strsrch, ucol_next(coleiter, &status)) != firstce) {
-                if (U_FAILURE(status)) {
+            while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
+                if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
                      return TRUE;
                  }
              }
@@ -846,10 +1039,14 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
                  }
                  count ++;
              }
-            uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+            
+                       ce = ucol_next(coleiter, &status);
              if (U_FAILURE(status)) {
                  return TRUE;
              }
+            if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
+               ce = getCE(strsrch, ce);
+            }
              if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
                  if (ucol_getOffset(coleiter) <= end) {
                      return TRUE;
@@ -862,6 +1059,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
      }
      return FALSE;
  }
+#endif // #if BOYER_MOORE
  
  /**
  * Checks if the offset runs out of the text string
@@ -875,6 +1073,7 @@ inline UBool isOutOfBounds(int32_t textlength, int32_t offset)
      return offset < 0 || offset > textlength;
  }
  
+#if BOYER_MOORE
  /**
  * Checks for identical match
  * @param strsrch string search data
@@ -886,32 +1085,54 @@ static
  inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, 
                                    int32_t    end) 
  {
+    UChar t2[32], p2[32];
      int32_t length = end - start;
      if (strsrch->strength != UCOL_IDENTICAL) {
          return TRUE;
      }
  
-    UErrorCode status = U_ZERO_ERROR;
-    int decomplength = unorm_decompose(NULL, -1, 
+    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
+    int32_t decomplength = unorm_decompose(t2, LENGTHOF(t2), 
                                         strsrch->search->text + start, length, 
                                         FALSE, 0, &status);
-    if (decomplength != unorm_decompose(NULL, -1, strsrch->pattern.text, 
+    // use separate status2 in case of buffer overflow
+    if (decomplength != unorm_decompose(p2, LENGTHOF(p2),
+                                        strsrch->pattern.text, 
                                          strsrch->pattern.textLength,
-                                        FALSE, 0, &status)) {
-        return FALSE;
+                                        FALSE, 0, &status2)) {
+        return FALSE; // lengths are different
      }
-    decomplength ++;
-    UChar *text    = (UChar *)uprv_malloc(decomplength * sizeof(UChar));
-    UChar *pattern = (UChar *)uprv_malloc(decomplength * sizeof(UChar));
-    unorm_decompose(text, decomplength, strsrch->search->text + start, 
-                    length, FALSE, 0, &status);
-    unorm_decompose(pattern, decomplength, strsrch->pattern.text, 
-                    strsrch->pattern.textLength, FALSE, 0, &status);
-    UBool result = (uprv_memcmp(pattern, text, decomplength * sizeof(UChar)) 
-                    == 0);
-    uprv_free(text);
-    uprv_free(pattern);
-    return result;
+
+    // compare contents
+    UChar *text, *pattern;
+    if(U_SUCCESS(status)) {
+        text = t2;
+        pattern = p2;
+    } else if(status==U_BUFFER_OVERFLOW_ERROR) {
+        status = U_ZERO_ERROR;
+        // allocate one buffer for both decompositions
+        text = (UChar *)uprv_malloc(decomplength * 2 * U_SIZEOF_UCHAR);
+        // Check for allocation failure.
+        if (text == NULL) {
+               return FALSE;
+        }
+        pattern = text + decomplength;
+        unorm_decompose(text, decomplength, strsrch->search->text + start, 
+                        length, FALSE, 0, &status);
+        unorm_decompose(pattern, decomplength, strsrch->pattern.text, 
+                        strsrch->pattern.textLength, FALSE, 0, &status);
+    } else {
+        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
+        // and that we don't uprv_free() an undefined text pointer
+        text = pattern = t2;
+        decomplength = 0;
+    }
+    UBool result = (UBool)(u_memcmp(pattern, text, decomplength) == 0);
+    if(text != t2) {
+        uprv_free(text);
+    }
+    // return FALSE if NFD failed
+    return U_SUCCESS(status) && result;
  }
  
  /**
@@ -937,7 +1158,7 @@ inline UBool checkRepeatedMatch(UStringSearch *strsrch,
      else {
          result = start >= lastmatchindex;
      }
-    if (!strsrch->search->isOverlap) {
+    if (!result && !strsrch->search->isOverlap) {
          if (strsrch->search->isForwardSearching) {
              result = start < lastmatchindex + strsrch->search->matchedLength;
          }
@@ -960,7 +1181,7 @@ inline int32_t getColElemIterOffset(const UCollationElements *coleiter,
  {
      int32_t result = ucol_getOffset(coleiter);
      // intricacies of the the backwards collation element iterator
-    if (!forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) {
+    if (FALSE && !forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) {
          result ++;
      }
      return result;
@@ -988,18 +1209,18 @@ UBool checkNextExactContractionMatch(UStringSearch *strsrch,
  {
            UCollationElements *coleiter   = strsrch->textIter;
            int32_t             textlength = strsrch->search->textLength;
-          int32_t         temp       = *start;
+          int32_t             temp       = *start;
      const UCollator          *collator   = strsrch->collator;
      const UChar              *text       = strsrch->search->text;
      // This part checks if either ends of the match contains potential 
      // contraction. If so we'll have to iterate through them
-       // The start contraction needs to be checked since ucol_previous dumps
-       // all characters till the first safe character into the buffer.
-       // *start + 1 is used to test for the unsafe characters instead of *start 
-       // because ucol_prev takes all unsafe characters till the first safe 
-       // character ie *start. so by testing *start + 1, we can estimate if 
-       // excess prefix characters has been included in the potential search 
-       // results.
+    // The start contraction needs to be checked since ucol_previous dumps
+    // all characters till the first safe character into the buffer.
+    // *start + 1 is used to test for the unsafe characters instead of *start 
+    // because ucol_prev takes all unsafe characters till the first safe 
+    // character ie *start. so by testing *start + 1, we can estimate if 
+    // excess prefix characters has been included in the potential search 
+    // results.
      if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || 
          (*start + 1 < textlength 
           && ucol_unsafeCP(text[*start + 1], collator))) {
@@ -1015,9 +1236,9 @@ UBool checkNextExactContractionMatch(UStringSearch *strsrch,
              // we are only looking for acute and ring \u030A and \u0301, we'll
              // have to skip the first ce in the expansion buffer.
              ucol_next(coleiter, status);
-                       if (U_FAILURE(*status)) {
-                               return FALSE;
-                       }
+            if (U_FAILURE(*status)) {
+                return FALSE;
+            }
              if (ucol_getOffset(coleiter) != temp) {
                  *start = temp;
                  temp  = ucol_getOffset(coleiter);
@@ -1025,11 +1246,11 @@ UBool checkNextExactContractionMatch(UStringSearch *strsrch,
              expansion --;
          }
  
-        uint32_t *patternce       = strsrch->pattern.CE;
+        int32_t  *patternce       = strsrch->pattern.CE;
          int32_t   patterncelength = strsrch->pattern.CELength;
          int32_t   count           = 0;
          while (count < patterncelength) {
-            uint32_t ce = getCE(strsrch, ucol_next(coleiter, status));
+            int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
              if (ce == UCOL_IGNORABLE) {
                  continue;
              }
@@ -1075,8 +1296,8 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch,
      UCollationElements *coleiter = strsrch->textIter;
      int32_t         start    = getColElemIterOffset(coleiter, FALSE);        
          
-       if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
-           return FALSE;
+    if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
+        return FALSE;
      }
  
      // this totally matches, however we need to check if it is repeating
@@ -1085,16 +1306,21 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch,
          hasAccentsBeforeMatch(strsrch, start, *textoffset) || 
          !checkIdentical(strsrch, start, *textoffset) ||
          hasAccentsAfterMatch(strsrch, start, *textoffset)) {
-               
-               (*textoffset) ++;
+
+        (*textoffset) ++;
          *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);  
-               return FALSE;
+        return FALSE;
+    }
+
+    //Add breakiterator boundary check for primary strength search.
+    if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
+       checkBreakBoundary(strsrch, &start, textoffset);
      }
          
      // totally match, we will get rid of the ending ignorables.
      strsrch->search->matchedIndex  = start;
      strsrch->search->matchedLength = *textoffset - start;
-       return TRUE;
+    return TRUE;
  }
  
  /**
@@ -1110,7 +1336,7 @@ inline int32_t getPreviousBaseOffset(const UChar       *text,
                                                 int32_t  textoffset)
  {
      if (textoffset > 0) {
-        while (TRUE) {
+        for (;;) {
              int32_t result = textoffset;
              UTF_BACK_1(text, 0, textoffset);
              int32_t temp = textoffset;
@@ -1224,10 +1450,10 @@ inline UBool checkCollationMatch(const UStringSearch      *strsrch,
                                         UCollationElements *coleiter)
  {
      int         patternceindex = strsrch->pattern.CELength;
-    uint32_t   *patternce      = strsrch->pattern.CE;
+    int32_t    *patternce      = strsrch->pattern.CE;
      UErrorCode  status = U_ZERO_ERROR;
      while (patternceindex > 0) {
-        uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+        int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
          if (ce == UCOL_IGNORABLE) {
              continue;
          }
@@ -1288,7 +1514,7 @@ int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
      int32_t         accentsindex[INITIAL_ARRAY_SIZE_];      
      int32_t         accentsize = getUnblockedAccentIndex(accents, 
                                                                   accentsindex);
-    int32_t         count      = (2 << (accentsize - 1)) - 2;  
+    int32_t         count      = (2 << (accentsize - 1)) - 1; 
      UChar               buffer[INITIAL_ARRAY_SIZE_];
      UCollationElements *coleiter   = strsrch->utilIter;
      while (U_SUCCESS(*status) && count > 0) {
@@ -1423,13 +1649,13 @@ int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
      ucol_setText(coleiter, safetext, safetextlength, status);
      // status checked in loop below
  
-    uint32_t *ce        = strsrch->pattern.CE;
-    uint32_t  celength  = strsrch->pattern.CELength;
+    int32_t  *ce        = strsrch->pattern.CE;
+    int32_t   celength  = strsrch->pattern.CELength;
      int       ceindex   = celength - 1;
      UBool     isSafe    = TRUE; // indication flag for position in safe zone
      
      while (ceindex >= 0) {
-        uint32_t textce = ucol_previous(coleiter, status);
+        int32_t textce = ucol_previous(coleiter, status);
          if (U_FAILURE(*status)) {
              if (isSafe) {
                  cleanUpSafeText(strsrch, safetext, safebuffer);
@@ -1560,8 +1786,8 @@ UBool doNextCanonicalMatch(UStringSearch *strsrch,
      int32_t accentsindex[INITIAL_ARRAY_SIZE_];
      int32_t size = getUnblockedAccentIndex(accents, accentsindex);
  
-    // 2 power n - 1 minus the full set of accents
-    int32_t  count = (2 << (size - 1)) - 2;  
+    // 2 power n - 1 plus the full set of accents
+    int32_t  count = (2 << (size - 1)) - 1;
      while (U_SUCCESS(*status) && count > 0) {
          UChar *rearrange = strsrch->canonicalSuffixAccents;
          // copy the base characters
@@ -1638,7 +1864,7 @@ UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
      const UChar              *text       = strsrch->search->text;
      // This part checks if either ends of the match contains potential 
      // contraction. If so we'll have to iterate through them
-       if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || 
+    if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || 
          (*start + 1 < textlength 
           && ucol_unsafeCP(text[*start + 1], collator))) {
          int32_t expansion  = getExpansionPrefix(coleiter);
@@ -1653,9 +1879,9 @@ UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
              // we are only looking for acute and ring \u030A and \u0301, we'll
              // have to skip the first ce in the expansion buffer.
              ucol_next(coleiter, status);
-                       if (U_FAILURE(*status)) {
-                               return FALSE;
-                       }
+            if (U_FAILURE(*status)) {
+                return FALSE;
+            }
              if (ucol_getOffset(coleiter) != temp) {
                  *start = temp;
                  temp  = ucol_getOffset(coleiter);
@@ -1663,12 +1889,12 @@ UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
              expansion --;
          }
  
-        uint32_t *patternce       = strsrch->pattern.CE;
+        int32_t  *patternce       = strsrch->pattern.CE;
          int32_t   patterncelength = strsrch->pattern.CELength;
          int32_t   count           = 0;
          int32_t   textlength      = strsrch->search->textLength;
          while (count < patterncelength) {
-            uint32_t ce = getCE(strsrch, ucol_next(coleiter, status));
+            int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
              // status checked below, note that if status is a failure
              // ucol_next returns UCOL_NULLORDER
              if (ce == UCOL_IGNORABLE) {
@@ -1683,7 +1909,7 @@ UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
                  // accents may have extra starting ces, this occurs when a 
                  // pure accent pattern is matched without rearrangement
                  // text \u0325\u0300 and looking for \u0300
-                uint32_t expected = patternce[0]; 
+                int32_t expected = patternce[0]; 
                  if (getFCD(text, start, textlength) & LAST_BYTE_MASK_) {
                      ce = getCE(strsrch, ucol_next(coleiter, status));
                      while (U_SUCCESS(*status) && ce != expected && 
@@ -1781,7 +2007,7 @@ inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
  static
  inline int32_t reverseShift(UStringSearch *strsrch,
                                  int32_t    textoffset,
-                                uint32_t       ce,
+                                int32_t       ce,
                                  int32_t        patternceindex)
  {         
      if (strsrch->search->isOverlap) {
@@ -1835,9 +2061,9 @@ UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
      const UChar              *text       = strsrch->search->text;
      // This part checks if either if the start of the match contains potential 
      // contraction. If so we'll have to iterate through them
-       // Since we used ucol_next while previously looking for the potential 
-       // match, this guarantees that our end will not be a partial contraction,
-       // or a partial supplementary character.
+    // Since we used ucol_next while previously looking for the potential 
+    // match, this guarantees that our end will not be a partial contraction,
+    // or a partial supplementary character.
      if (*start < textlength && ucol_unsafeCP(text[*start], collator)) {
          int32_t expansion  = getExpansionSuffix(coleiter);
          UBool   expandflag = expansion > 0;
@@ -1851,9 +2077,9 @@ UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
              // we are only looking for A ring A\u030A, we'll have to skip the 
              // last ce in the expansion buffer
              ucol_previous(coleiter, status);
-                       if (U_FAILURE(*status)) {
-                               return FALSE;
-                       }
+            if (U_FAILURE(*status)) {
+                return FALSE;
+            }
              if (ucol_getOffset(coleiter) != temp) {
                  *end = temp;
                  temp  = ucol_getOffset(coleiter);
@@ -1861,11 +2087,11 @@ UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
              expansion --;
          }
  
-        uint32_t *patternce       = strsrch->pattern.CE;
+        int32_t  *patternce       = strsrch->pattern.CE;
          int32_t   patterncelength = strsrch->pattern.CELength;
          int32_t   count           = patterncelength;
          while (count > 0) {
-            uint32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
+            int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
              // status checked below, note that if status is a failure
              // ucol_previous returns UCOL_NULLORDER
              if (ce == UCOL_IGNORABLE) {
@@ -1933,6 +2159,12 @@ inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
                                              *textoffset);
          return FALSE;
      }
+    
+    //Add breakiterator boundary check for primary strength search.
+    if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
+       checkBreakBoundary(strsrch, textoffset, &end);
+    }
+    
      strsrch->search->matchedIndex = *textoffset;
      strsrch->search->matchedLength = end - *textoffset;
      return TRUE;
@@ -1984,7 +2216,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
          int32_t         accentsindex[INITIAL_ARRAY_SIZE_];      
          int32_t         accentsize = getUnblockedAccentIndex(accents, 
                                                           accentsindex);
-        int32_t         count      = (2 << (accentsize - 1)) - 2;  
+        int32_t         count      = (2 << (accentsize - 1)) - 1;  
          UChar               buffer[INITIAL_ARRAY_SIZE_];
          UCollationElements *coleiter = strsrch->utilIter;
          while (U_SUCCESS(*status) && count > 0) {
@@ -2080,14 +2312,14 @@ int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
      ucol_setText(coleiter, safetext, safetextlength, status);
      // status checked in loop below
      
-    uint32_t *ce           = strsrch->pattern.CE;
+    int32_t  *ce           = strsrch->pattern.CE;
      int32_t   celength     = strsrch->pattern.CELength;
      int       ceindex      = 0;
      UBool     isSafe       = TRUE; // safe zone indication flag for position
      int32_t   prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
      
      while (ceindex < celength) {
-        uint32_t textce = ucol_next(coleiter, status);
+        int32_t textce = ucol_next(coleiter, status);
          if (U_FAILURE(*status)) {
              if (isSafe) {
                  cleanUpSafeText(strsrch, safetext, safebuffer);
@@ -2217,8 +2449,8 @@ UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
      int32_t accentsindex[INITIAL_ARRAY_SIZE_];
      int32_t size = getUnblockedAccentIndex(accents, accentsindex);
  
-    // 2 power n - 1 minus the full set of accents
-    int32_t  count = (2 << (size - 1)) - 2;  
+    // 2 power n - 1 plus the full set of accents
+    int32_t  count = (2 << (size - 1)) - 1;  
      while (U_SUCCESS(*status) && count > 0) {
          UChar *rearrange = strsrch->canonicalPrefixAccents;
          // copy the base characters
@@ -2267,11 +2499,11 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
            int32_t         temp       = *end;
      const UCollator          *collator   = strsrch->collator;
      const UChar              *text       = strsrch->search->text;
-       // This part checks if either if the start of the match contains potential 
+    // This part checks if either if the start of the match contains potential 
      // contraction. If so we'll have to iterate through them
-       // Since we used ucol_next while previously looking for the potential 
-       // match, this guarantees that our end will not be a partial contraction,
-       // or a partial supplementary character.
+    // Since we used ucol_next while previously looking for the potential 
+    // match, this guarantees that our end will not be a partial contraction,
+    // or a partial supplementary character.
      if (*start < textlength && ucol_unsafeCP(text[*start], collator)) {
          int32_t expansion  = getExpansionSuffix(coleiter);
          UBool   expandflag = expansion > 0;
@@ -2285,9 +2517,9 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
              // we are only looking for A ring A\u030A, we'll have to skip the 
              // last ce in the expansion buffer
              ucol_previous(coleiter, status);
-                       if (U_FAILURE(*status)) {
-                               return FALSE;
-                       }
+            if (U_FAILURE(*status)) {
+                return FALSE;
+            }
              if (ucol_getOffset(coleiter) != temp) {
                  *end = temp;
                  temp  = ucol_getOffset(coleiter);
@@ -2295,11 +2527,11 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
              expansion --;
          }
  
-        uint32_t *patternce       = strsrch->pattern.CE;
+        int32_t  *patternce       = strsrch->pattern.CE;
          int32_t   patterncelength = strsrch->pattern.CELength;
          int32_t   count           = patterncelength;
          while (count > 0) {
-            uint32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
+            int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
              // status checked below, note that if status is a failure
              // ucol_previous returns UCOL_NULLORDER
              if (ce == UCOL_IGNORABLE) {
@@ -2314,7 +2546,7 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
                  ce != patternce[patterncelength - 1]) {
                  // accents may have extra starting ces, this occurs when a 
                  // pure accent pattern is matched without rearrangement
-                uint32_t    expected = patternce[patterncelength - 1];
+                int32_t    expected = patternce[patterncelength - 1];
                  UTF_BACK_1(text, 0, *end);
                  if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) {
                      ce = getCE(strsrch, ucol_previous(coleiter, status));
@@ -2397,6 +2629,7 @@ inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
      strsrch->search->matchedLength = end - *textoffset;
      return TRUE;
  }
+#endif // #if BOYER_MOORE
  
  // constructors and destructor -------------------------------------------
  
@@ -2460,6 +2693,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
  #endif
      if (pattern == NULL || text == NULL || collator == NULL) {
          *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    // string search does not really work when numeric collation is turned on
+    if(ucol_getAttribute(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) {
+        *status = U_UNSUPPORTED_ERROR;
+        return NULL;
      }
  
      if (U_SUCCESS(*status)) {
@@ -2512,17 +2752,20 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
          result->pattern.text       = pattern;
          result->pattern.textLength = patternlength;
          result->pattern.CE         = NULL;
+        result->pattern.PCE        = NULL;
          
          result->search->breakIter  = breakiter;
  #if !UCONFIG_NO_BREAK_ITERATION
+        result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
          if (breakiter) {
-            ubrk_setText(breakiter, text, textlength, status);
+               ubrk_setText(breakiter, text, textlength, status);
          }
  #endif
  
          result->ownCollator           = FALSE;
          result->search->matchedLength = 0;
          result->search->matchedIndex  = USEARCH_DONE;
+        result->utilIter              = NULL;
          result->textIter              = ucol_openElements(collator, text, 
                                                            textlength, status);
          if (U_FAILURE(*status)) {
@@ -2530,8 +2773,6 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
              return NULL;
          }
  
-        result->utilIter              = NULL;
-
          result->search->isOverlap          = FALSE;
          result->search->isCanonicalMatch   = FALSE;
          result->search->isForwardSearching = TRUE;
@@ -2556,11 +2797,25 @@ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
              strsrch->pattern.CE) {
              uprv_free(strsrch->pattern.CE);
          }
+
+        if (strsrch->pattern.PCE != NULL &&
+            strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
+            uprv_free(strsrch->pattern.PCE);
+        }
+
          ucol_closeElements(strsrch->textIter);
          ucol_closeElements(strsrch->utilIter);
+
          if (strsrch->ownCollator && strsrch->collator) {
              ucol_close((UCollator *)strsrch->collator);
          }
+
+#if !UCONFIG_NO_BREAK_ITERATION
+        if (strsrch->search->internalBreakIter) {
+               ubrk_close(strsrch->search->internalBreakIter);
+        }
+#endif
+
          uprv_free(strsrch->search);
          uprv_free(strsrch);
      }
@@ -2699,7 +2954,7 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch  *strsrch,
                                                 UErrorCode     *status)
  {
      if (U_SUCCESS(*status) && strsrch) {
-        strsrch->search->breakIter = breakiter;
+       strsrch->search->breakIter = breakiter;
          if (breakiter) {
              ubrk_setText(breakiter, strsrch->search->text, 
                           strsrch->search->textLength, status);
@@ -2739,10 +2994,11 @@ U_CAPI void U_EXPORT2 usearch_setText(      UStringSearch *strsrch,
              strsrch->search->matchedLength = 0;
              strsrch->search->reset         = TRUE;
  #if !UCONFIG_NO_BREAK_ITERATION
-                       if (strsrch->search->breakIter != NULL) {
-                               ubrk_setText(strsrch->search->breakIter, text, 
-                                                        textlength, status);
-                       }
+            if (strsrch->search->breakIter != NULL) {
+                ubrk_setText(strsrch->search->breakIter, text, 
+                             textlength, status);
+            }
+            ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
  #endif
          }
      }
@@ -2767,6 +3023,7 @@ U_CAPI void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch,
              *status = U_ILLEGAL_ARGUMENT_ERROR;
              return;
          }
+
          if (strsrch) {
              if (strsrch->ownCollator && (strsrch->collator != collator)) {
                  ucol_close((UCollator *)strsrch->collator);
@@ -2775,6 +3032,11 @@ U_CAPI void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch,
              strsrch->collator    = collator;
              strsrch->strength    = ucol_getStrength(collator);
              strsrch->ceMask      = getMask(strsrch->strength);
+#if !UCONFIG_NO_BREAK_ITERATION
+               ubrk_close(strsrch->search->internalBreakIter);
+               strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocale(collator, ULOC_VALID_LOCALE, status), 
+                                                                                                strsrch->search->text, strsrch->search->textLength, status);
+#endif
              // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
              strsrch->toShift     =  
                 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == 
@@ -2784,13 +3046,23 @@ U_CAPI void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch,
              if (U_SUCCESS(*status)) {
                  initialize(strsrch, status);
                  if (U_SUCCESS(*status)) {
+                    /* free offset buffer to avoid memory leak before initializing. */
+                    freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
                      uprv_init_collIterate(collator, strsrch->search->text, 
                                            strsrch->search->textLength, 
                                            &(strsrch->textIter->iteratordata_));
-                                       strsrch->utilIter->iteratordata_.coll = collator;
+                    strsrch->utilIter->iteratordata_.coll = collator;
                  }
              }
          }
+
+        // **** are these calls needed?
+        // **** we call uprv_init_pce in initializePatternPCETable
+        // **** and the CEBuffer constructor...
+#if 0
+        uprv_init_pce(strsrch->textIter);
+        uprv_init_pce(strsrch->utilIter);
+#endif
      }
  }
  
@@ -2921,21 +3193,33 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
                                            UErrorCode    *status)
  { 
      if (U_SUCCESS(*status) && strsrch) {
-        int32_t  offset     = usearch_getOffset(strsrch);
-        USearch     *search     = strsrch->search;
-        search->reset           = FALSE;
-        int32_t      textlength = search->textLength;
-        int32_t  matchedindex = search->matchedIndex;
+        // note offset is either equivalent to the start of the previous match
+        // or is set by the user
+        int32_t      offset       = usearch_getOffset(strsrch);
+        USearch     *search       = strsrch->search;
+        search->reset             = FALSE;
+        int32_t      textlength   = search->textLength;
          if (search->isForwardSearching) {
-            if (offset == textlength || matchedindex == textlength || 
-                (!search->isOverlap && 
+#if BOYER_MOORE
+            if (offset == textlength
+                || (!search->isOverlap && 
                      (offset + strsrch->pattern.defaultShiftSize > textlength ||
-                    (matchedindex != USEARCH_DONE && 
-                    matchedindex + search->matchedLength >= textlength)))) {
+                    (search->matchedIndex != USEARCH_DONE && 
+                     offset + search->matchedLength >= textlength)))) {
                  // not enough characters to match
                  setMatchNotFound(strsrch);
                  return USEARCH_DONE; 
              }
+#else
+            if (offset == textlength ||
+                (! search->isOverlap &&
+                (search->matchedIndex != USEARCH_DONE &&
+                offset + search->matchedLength > textlength))) {
+                    // not enough characters to match
+                    setMatchNotFound(strsrch);
+                    return USEARCH_DONE;
+            }
+#endif
          }
          else {
              // switching direction. 
@@ -2944,16 +3228,16 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
              // string. the iterator would have been set to offset 0 if a 
              // match is not found.
              search->isForwardSearching = TRUE;
-            if (matchedindex != USEARCH_DONE) {
+            if (search->matchedIndex != USEARCH_DONE) {
                  // there's no need to set the collation element iterator
                  // the next call to next will set the offset.
-                return matchedindex;
+                return search->matchedIndex;
              }
          }
  
          if (U_SUCCESS(*status)) {
              if (strsrch->pattern.CELength == 0) {
-                if (matchedindex == USEARCH_DONE) {
+                if (search->matchedIndex == USEARCH_DONE) {
                      search->matchedIndex = offset;
                  }
                  else { // moves by codepoints
@@ -2968,30 +3252,45 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
                  }
              }
              else {
-                               if (search->matchedLength > 0) {
-                                       // if matchlength is 0 we are at the start of the iteration
-                                       int offset = ucol_getOffset(strsrch->textIter);
-                                       if (search->isOverlap) {
-                                               ucol_setOffset(strsrch->textIter, offset + 1, status);
-                                       }
-                                       else {
-                                               ucol_setOffset(strsrch->textIter, 
-                                                                  offset + search->matchedLength, status);
-                                       }
-                               }
-                               if (search->isCanonicalMatch) {
-                                       // can't use exact here since extra accents are allowed.
-                                       usearch_handleNextCanonical(strsrch, status);
-                               }
-                               else {
-                                       usearch_handleNextExact(strsrch, status);
-                               }
-                       }
-            
+                if (search->matchedLength > 0) {
+                    // if matchlength is 0 we are at the start of the iteration
+                    if (search->isOverlap) {
+                        ucol_setOffset(strsrch->textIter, offset + 1, status);
+                    }
+                    else {
+                        ucol_setOffset(strsrch->textIter, 
+                                       offset + search->matchedLength, status);
+                    }
+                }
+                else {
+                    // for boundary check purposes. this will ensure that the
+                    // next match will not preceed the current offset
+                    // note search->matchedIndex will always be set to something
+                    // in the code
+                    search->matchedIndex = offset - 1;
+                }
+
+                if (search->isCanonicalMatch) {
+                    // can't use exact here since extra accents are allowed.
+                    usearch_handleNextCanonical(strsrch, status);
+                }
+                else {
+                    usearch_handleNextExact(strsrch, status);
+                }
+            }
+
              if (U_FAILURE(*status)) {
                  return USEARCH_DONE;
              }
-            
+
+#if !BOYER_MOORE
+            if (search->matchedIndex == USEARCH_DONE) {
+                ucol_setOffset(strsrch->textIter, search->textLength, status);
+            } else {
+                ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
+            }
+#endif
+
              return search->matchedIndex;
          }
      }
@@ -3027,6 +3326,7 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
              }
          }
          else {
+#if BOYER_MOORE
              if (offset == 0 || matchedindex == 0 ||
                  (!search->isOverlap && 
                      (offset < strsrch->pattern.defaultShiftSize ||
@@ -3036,6 +3336,14 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
                  setMatchNotFound(strsrch);
                  return USEARCH_DONE; 
              }
+#else
+            // Could check pattern length, but the
+            // linear search will do the right thing
+            if (offset == 0 || matchedindex == 0) {
+                setMatchNotFound(strsrch);
+                return USEARCH_DONE;
+            }
+#endif
          }
  
          if (U_SUCCESS(*status)) {
@@ -3054,6 +3362,14 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
                  }
              }
              else {
+#if !BOYER_MOORE
+                if (search->matchedIndex != USEARCH_DONE) {
+                    if (search->isOverlap) {
+                        ucol_setOffset(strsrch->textIter, search->matchedIndex + search->matchedLength - 2, status);
+                    }
+                }
+#endif
+
                  if (strsrch->search->isCanonicalMatch) {
                      // can't use exact here since extra accents are allowed.
                      usearch_handlePreviousCanonical(strsrch, status);
@@ -3114,6 +3430,8 @@ U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
          if (!sameCollAttribute) {
              initialize(strsrch, &status);
          }
+        /* free offset buffer to avoid memory leak before initializing. */
+        freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
          uprv_init_collIterate(strsrch->collator, strsrch->search->text, 
                                strsrch->search->textLength, 
                                &(strsrch->textIter->iteratordata_));
@@ -3126,6 +3444,701 @@ U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
      }
  }
  
+//
+//  CEI  Collation Element + source text index.
+//       These structs are kept in the circular buffer.
+//
+struct  CEI {
+    int64_t ce;
+    int32_t lowIndex;
+    int32_t highIndex;
+};
+
+U_NAMESPACE_BEGIN
+
+
+//
+//  CEBuffer   A circular buffer of CEs from the text being searched.
+//
+#define   DEFAULT_CEBUFFER_SIZE 50
+struct CEBuffer {
+    CEI                  defBuf[DEFAULT_CEBUFFER_SIZE];
+    CEI                 *buf;
+    int32_t              bufSize;
+    int32_t              firstIx;
+    int32_t              limitIx;
+    UCollationElements  *ceIter;
+    UStringSearch       *strSearch;
+
+
+
+               CEBuffer(UStringSearch *ss, UErrorCode *status);
+               ~CEBuffer();
+   const CEI   *get(int32_t index);
+   const CEI   *getPrevious(int32_t index);
+};
+
+
+CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
+    buf = defBuf;
+    strSearch = ss;
+    bufSize = ss->pattern.CELength+10;
+    ceIter    = ss->textIter;
+    firstIx = 0;
+    limitIx = 0;
+
+    uprv_init_pce(ceIter);
+
+    if (bufSize>DEFAULT_CEBUFFER_SIZE) {
+        buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
+        if (buf == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+        }
+    }
+}
+
+// TODO: add a reset or init function so that allocated
+//       buffers can be retained & reused.
+
+CEBuffer::~CEBuffer() {
+    if (buf != defBuf) {
+        uprv_free(buf);
+    }
+}
+
+
+// Get the CE with the specified index.
+//   Index must be in the range
+//          n-history_size < index < n+1
+//   where n is the largest index to have been fetched by some previous call to this function.
+//   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+//
+const CEI *CEBuffer::get(int32_t index) {
+    int i = index % bufSize;
+
+    if (index>=firstIx && index<limitIx) {
+        // The request was for an entry already in our buffer.
+        //  Just return it.
+        return &buf[i];
+    }
+
+    // Caller is requesting a new, never accessed before, CE.
+    //   Verify that it is the next one in sequence, which is all
+    //   that is allowed.
+    if (index != limitIx) {
+        U_ASSERT(FALSE);
+
+        return NULL;
+    }
+
+    // Manage the circular CE buffer indexing
+    limitIx++;
+
+    if (limitIx - firstIx >= bufSize) {
+        // The buffer is full, knock out the lowest-indexed entry.
+        firstIx++;
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+
+    buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+
+    return &buf[i];
+}
+
+// Get the CE with the specified index.
+//   Index must be in the range
+//          n-history_size < index < n+1
+//   where n is the largest index to have been fetched by some previous call to this function.
+//   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+//
+const CEI *CEBuffer::getPrevious(int32_t index) {
+    int i = index % bufSize;
+
+    if (index>=firstIx && index<limitIx) {
+        // The request was for an entry already in our buffer.
+        //  Just return it.
+        return &buf[i];
+    }
+
+    // Caller is requesting a new, never accessed before, CE.
+    //   Verify that it is the next one in sequence, which is all
+    //   that is allowed.
+    if (index != limitIx) {
+        U_ASSERT(FALSE);
+
+        return NULL;
+    }
+
+    // Manage the circular CE buffer indexing
+    limitIx++;
+
+    if (limitIx - firstIx >= bufSize) {
+        // The buffer is full, knock out the lowest-indexed entry.
+        firstIx++;
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+
+    buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+
+    return &buf[i];
+}
+
+U_NAMESPACE_END
+
+
+// #define USEARCH_DEBUG
+
+#ifdef USEARCH_DEBUG
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+/*
+ * Find the next break boundary after startIndex. If the UStringSearch object
+ * has an external break iterator, use that. Otherwise use the internal character
+ * break iterator.
+ */
+static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
+#if 0
+    const UChar *text = strsrch->search->text;
+    int32_t textLen   = strsrch->search->textLength;
+    
+    U_ASSERT(startIndex>=0);
+    U_ASSERT(startIndex<=textLen);
+    
+    if (startIndex >= textLen) {
+        return startIndex;
+    }
+
+    UChar32  c;
+    int32_t  i = startIndex;
+    U16_NEXT(text, i, textLen, c);
+    
+    // If we are on a control character, stop without looking for combining marks.
+    //    Control characters do not combine.
+    int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+    if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
+        return i;
+    }
+    
+    // The initial character was not a control, and can thus accept trailing
+    //   combining characters.  Advance over however many of them there are.
+    int32_t  indexOfLastCharChecked;
+    for (;;) {
+        indexOfLastCharChecked = i;
+        if (i>=textLen) {
+            break;
+        }
+        U16_NEXT(text, i, textLen, c);
+        gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+        if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+            break;
+        }
+    }
+    return indexOfLastCharChecked;
+#elif !UCONFIG_NO_BREAK_ITERATION
+    UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+    if (breakiterator == NULL) {
+        breakiterator = strsrch->search->internalBreakIter;
+    }
+
+    if (breakiterator != NULL) {
+       return ubrk_following(breakiterator, startIndex);
+    }
+
+    return startIndex;
+#else
+    // **** or should we use the original code? ****
+    return startIndex;
+#endif
+
+}
+
+/*
+ * Returns TRUE if index is on a break boundary. If the UStringSearch
+ * has an external break iterator, test using that, otherwise test
+ * using the internal character break iterator.
+ */
+static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
+#if 0
+    const UChar *text = strsrch->search->text;
+    int32_t textLen   = strsrch->search->textLength;
+    
+    U_ASSERT(index>=0);
+    U_ASSERT(index<=textLen);
+    
+    if (index>=textLen || index<=0) {
+        return FALSE;
+    }
+  
+    // If the character at the current index is not a GRAPHEME_EXTEND
+    //    then we can not be within a combining sequence.
+    UChar32  c;
+    U16_GET(text, 0, index, textLen, c);
+    int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+    if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+        return FALSE;
+    }
+    
+    // We are at a combining mark.  If the preceding character is anything
+    //   except a CONTROL, CR or LF, we are in a combining sequence.
+    U16_PREV(text, 0, index, c);    
+    gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+    UBool combining =  !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);  
+    return combining;
+#elif !UCONFIG_NO_BREAK_ITERATION
+    UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+    if (breakiterator == NULL) {
+        breakiterator = strsrch->search->internalBreakIter;
+    }
+
+    return (breakiterator != NULL && ! ubrk_isBoundary(breakiterator, index));
+#else
+    // **** or use the original code? ****
+    return FALSE;
+#endif
+}      
+
+#if 0
+static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
+{
+#if !UCONFIG_NO_BREAK_ITERATION
+    UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+    if (breakiterator != NULL) {
+        int32_t startindex = ubrk_first(breakiterator);
+        int32_t endindex   = ubrk_last(breakiterator);
+        
+        // out-of-range indexes are never boundary positions
+        if (start < startindex || start > endindex ||
+            end < startindex || end > endindex) {
+            return FALSE;
+        }
+
+        return ubrk_isBoundary(breakiterator, start) && 
+               ubrk_isBoundary(breakiterator, end);
+    }
+#endif
+
+    return TRUE;
+}
+#endif
+
+    
+U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
+                                       int32_t        startIdx,
+                                       int32_t        *matchStart,
+                                       int32_t        *matchLimit,
+                                       UErrorCode     *status) 
+{
+    if (U_FAILURE(*status)) {
+        return FALSE;
+    }
+
+    // TODO:  reject search patterns beginning with a combining char.
+
+#ifdef USEARCH_DEBUG
+    if (getenv("USEARCH_DEBUG") != NULL) {
+        printf("Pattern CEs\n");
+        for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
+            printf(" %8x", strsrch->pattern.CE[ii]);
+        }
+        printf("\n");
+    }
+    
+#endif
+    // Input parameter sanity check.
+    //  TODO:  should input indicies clip to the text length
+    //         in the same way that UText does.
+    if(strsrch->pattern.CELength == 0         || 
+       startIdx < 0                           ||
+       startIdx > strsrch->search->textLength ||
+       strsrch->pattern.CE == NULL) {
+           *status = U_ILLEGAL_ARGUMENT_ERROR;
+           return FALSE;
+    }
+
+    if (strsrch->pattern.PCE == NULL) {
+        initializePatternPCETable(strsrch, status);
+    }
+
+    ucol_setOffset(strsrch->textIter, startIdx, status);
+    CEBuffer ceb(strsrch, status);
+    
+
+    int32_t    targetIx = 0;   
+    const CEI *targetCEI;
+    int32_t    patIx;
+    UBool      found;
+
+    int32_t  mStart = -1;
+    int32_t  mLimit = -1;
+    int32_t  minLimit;
+    int32_t  maxLimit;
+    
+    
+   
+    // Outer loop moves over match starting positions in the
+    //      target CE space.
+    for(targetIx=0; ; targetIx++)
+    {
+        found = TRUE;
+        //  Inner loop checks for a match beginning at each
+        //  position from the outer loop.
+        for (patIx=0; patIx<strsrch->pattern.CELength; patIx++) {
+            int64_t patCE = strsrch->pattern.PCE[patIx];
+            targetCEI = ceb.get(targetIx+patIx);
+            //  Compare CE from target string with CE from the pattern.
+            //    Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+            //    which will fail the compare, below.
+            if (targetCEI->ce != patCE) {
+                found = FALSE;
+                break;
+            }
+        }
+
+        if (!found && targetCEI->ce != UCOL_PROCESSED_NULLORDER) {
+            // No match at this targetIx.  Try again at the next.
+            continue;
+        }
+
+        if (!found) {
+            // No match at all, we have run off the end of the target text.
+            break;
+        }
+
+
+        // We have found a match in CE space.
+        // Now determine the bounds in string index space.
+        //  There still is a chance of match failure if the CE range not correspond to
+        //     an acceptable character range.
+        //
+        const CEI *firstCEI = ceb.get(targetIx);
+        const CEI *lastCEI  = ceb.get(targetIx + strsrch->pattern.CELength - 1);
+        const CEI *nextCEI  = ceb.get(targetIx + strsrch->pattern.CELength);
+
+     // targetCEI = ceb.get(targetIx+strsrch->pattern.CELength);
+     // maxLimit = targetCEI->lowIndex;
+        mStart   = firstCEI->lowIndex;
+        minLimit = lastCEI->lowIndex;
+        maxLimit = nextCEI->lowIndex;
+
+        // Look at the CE following the match.  If it is UCOL_NULLORDER the match
+        //   extended to the end of input, and the match is good.
+
+        // Look at the high and low indices of the CE following the match. If
+        // they are the same it means one of two things:
+        //    1. The match extended to the last CE from the target text, which is OK, or
+        //    2. The last CE that was part of the match is in an expansion that extends
+        //       to the first CE after the match. In this case, we reject the match.
+        if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
+            found = FALSE;
+        }
+            
+
+        // Check for the start of the match being within a combining sequence.
+        //   This can happen if the pattern itself begins with a combining char, and
+        //   the match found combining marks in the target text that were attached
+        //    to something else.
+        //   This type of match should be rejected for not completely consuming a
+        //   combining sequence.
+        if (isBreakBoundary(strsrch, mStart)) {
+            found = FALSE;
+        }
+
+        // Check for the start of the match being within an Collation Element Expansion,
+        //   meaning that the first char of the match is only partially matched.
+        //   With exapnsions, the first CE will report the index of the source 
+        //   character, and all subsequent (expansions) CEs will report the source index of the
+        //    _following_ character.  
+        int32_t secondIx = firstCEI->highIndex;
+        if (mStart == secondIx) {
+            found = FALSE;
+        }
+    
+        //  Advance the match end position to the first acceptable match boundary.
+        //    This advances the index over any combining charcters.
+        mLimit = maxLimit;
+        if (minLimit < maxLimit) {
+            int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+
+            if (nba >= lastCEI->highIndex) {
+                mLimit = nba;
+            }
+        }
+        
+    #ifdef USEARCH_DEBUG
+        if (getenv("USEARCH_DEBUG") != NULL) {
+            printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
+        }
+    #endif
+ 
+        // If advancing to the end of a combining sequence in character indexing space
+        //   advanced us beyond the end of the match in CE space, reject this match. 
+        if (mLimit > maxLimit) {
+            found = FALSE;
+        }
+
+        if (isBreakBoundary(strsrch, mLimit)) {
+            found = FALSE;
+        }
+
+        if (found) {
+            break;
+        }
+    }
+
+    #ifdef USEARCH_DEBUG
+    if (getenv("USEARCH_DEBUG") != NULL) {
+        printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
+        int32_t  lastToPrint = ceb.limitIx+2;
+        for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
+            printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
+        }
+        printf("\n%s\n", found? "match found" : "no match");
+    }
+    #endif
+
+    // All Done.  Store back the match bounds to the caller.
+    //
+    if (found==FALSE) {
+        mLimit = -1;
+        mStart = -1;
+    }
+
+    if (matchStart != NULL) {
+        *matchStart= mStart;
+    }
+
+    if (matchLimit != NULL) {
+        *matchLimit = mLimit;
+    }
+
+    return found;
+}
+
+    
+U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,
+                                                int32_t        startIdx,
+                                                int32_t        *matchStart,
+                                                int32_t        *matchLimit,
+                                                UErrorCode     *status) 
+{
+    if (U_FAILURE(*status)) {
+        return FALSE;
+    }
+
+    // TODO:  reject search patterns beginning with a combining char.
+
+#ifdef USEARCH_DEBUG
+    if (getenv("USEARCH_DEBUG") != NULL) {
+        printf("Pattern CEs\n");
+        for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
+            printf(" %8x", strsrch->pattern.CE[ii]);
+        }
+        printf("\n");
+    }
+    
+#endif
+    // Input parameter sanity check.
+    //  TODO:  should input indicies clip to the text length
+    //         in the same way that UText does.
+    if(strsrch->pattern.CELength == 0         || 
+       startIdx < 0                           ||
+       startIdx > strsrch->search->textLength ||
+       strsrch->pattern.CE == NULL) {
+           *status = U_ILLEGAL_ARGUMENT_ERROR;
+           return FALSE;
+    }
+
+    if (strsrch->pattern.PCE == NULL) {
+        initializePatternPCETable(strsrch, status);
+    }
+
+    CEBuffer ceb(strsrch, status);
+    int32_t    targetIx = 0;   
+
+    /*
+     * Pre-load the buffer with the CE's for the grapheme
+     * after our starting position so that we're sure that
+     * we can look at the CE following the match when we
+     * check the match boundaries.
+     *
+     * This will also pre-fetch the first CE that we'll
+     * consider for the match.
+     */
+    if (startIdx < strsrch->search->textLength) {
+        UBreakIterator *bi = strsrch->search->internalBreakIter;
+        int32_t next = ubrk_following(bi, startIdx);
+
+        ucol_setOffset(strsrch->textIter, next, status);
+
+        for (targetIx = 0; ; targetIx += 1) {
+            if (ceb.getPrevious(targetIx)->lowIndex < startIdx) {
+                break;
+            }
+        }
+    } else {
+        ucol_setOffset(strsrch->textIter, startIdx, status);
+    }
+    
+
+   const CEI  *targetCEI;
+    int32_t    patIx;
+    UBool      found;
+
+    int32_t  limitIx = targetIx;
+    int32_t  mStart = -1;
+    int32_t  mLimit = -1;
+    int32_t  minLimit;
+    int32_t  maxLimit;
+    
+    
+   
+    // Outer loop moves over match starting positions in the
+    //      target CE space.
+    for(targetIx = limitIx; ; targetIx += 1)
+    {
+        found = TRUE;
+        //  Inner loop checks for a match beginning at each
+        //  position from the outer loop.
+        for (patIx = strsrch->pattern.CELength - 1; patIx >= 0; patIx -= 1) {
+            int64_t patCE = strsrch->pattern.PCE[patIx];
+
+            targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1 - patIx);
+            //  Compare CE from target string with CE from the pattern.
+            //    Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+            //    which will fail the compare, below.
+            if (targetCEI->ce != patCE) {
+                found = FALSE;
+                break;
+            }
+        }
+
+        if (!found && targetCEI->ce != UCOL_PROCESSED_NULLORDER) {
+            // No match at this targetIx.  Try again at the next.
+            continue;
+        }
+
+        if (!found) {
+            // No match at all, we have run off the end of the target text.
+            break;
+        }
+
+
+        // We have found a match in CE space.
+        // Now determine the bounds in string index space.
+        //  There still is a chance of match failure if the CE range not correspond to
+        //     an acceptable character range.
+        //
+        const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1);
+        const CEI *lastCEI  = ceb.getPrevious(targetIx);
+        const CEI *nextCEI  = targetIx > 0? ceb.getPrevious(targetIx - 1) : NULL;
+
+        mStart   = firstCEI->lowIndex;
+        minLimit = lastCEI->lowIndex;
+        maxLimit = targetIx > 0? nextCEI->lowIndex : lastCEI->highIndex;
+
+        // Look at the CE following the match.  If it is UCOL_NULLORDER the match
+        //   extended to the end of input, and the match is good.
+
+        // Look at the high and low indices of the CE following the match. If
+        // they are the same it means one of two things:
+        //    1. The match extended to the last CE from the target text, which is OK, or
+        //    2. The last CE that was part of the match is in an expansion that extends
+        //       to the first CE after the match. In this case, we reject the match.
+        if (targetIx >= 1) {
+            if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
+                found = FALSE;
+            }
+        }
+
+
+        // Check for the start of the match being within a combining sequence.
+        //   This can happen if the pattern itself begins with a combining char, and
+        //   the match found combining marks in the target text that were attached
+        //    to something else.
+        //   This type of match should be rejected for not completely consuming a
+        //   combining sequence.
+        if (isBreakBoundary(strsrch, mStart)) {
+            found = FALSE;
+        }
+
+        // Look at the high index of the first CE in the match. If it's the same as the
+        // low index, the first CE in the match is in the middle of an expansion.
+        if (mStart == firstCEI->highIndex) {
+            found = FALSE;
+        }
+    
+        //  Advance the match end position to the first acceptable match boundary.
+        //    This advances the index over any combining charcters.
+        mLimit = maxLimit;
+        if (/*targetIx > 0 &&*/ minLimit < maxLimit) {
+            int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+
+            if (nba >= lastCEI->highIndex) {
+                mLimit = nba;
+            }
+        }
+        
+    #ifdef USEARCH_DEBUG
+        if (getenv("USEARCH_DEBUG") != NULL) {
+            printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
+        }
+    #endif
+        
+        // If advancing to the end of a combining sequence in character indexing space
+        //   advanced us beyond the end of the match in CE space, reject this match. 
+        if (mLimit > maxLimit) {
+            found = FALSE;
+        }
+
+        // Make sure the end of the match is on a break boundary
+        if (isBreakBoundary(strsrch, mLimit)) {
+            found = FALSE;
+        }
+
+        if (found) {
+            break;
+        }
+    }
+
+    #ifdef USEARCH_DEBUG
+    if (getenv("USEARCH_DEBUG") != NULL) {
+        printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
+        int32_t  lastToPrint = ceb.limitIx+2;
+        for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
+            printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
+        }
+        printf("\n%s\n", found? "match found" : "no match");
+    }
+    #endif
+
+    // All Done.  Store back the match bounds to the caller.
+    //
+    if (found==FALSE) {
+        mLimit = -1;
+        mStart = -1;
+    }
+
+    if (matchStart != NULL) {
+        *matchStart= mStart;
+    }
+
+    if (matchLimit != NULL) {
+        *matchLimit = mLimit;
+    }
+
+    return found;
+}
+
+
+
+
  // internal use methods declared in usrchimp.h -----------------------------
  
  UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
@@ -3135,27 +4148,28 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
          return FALSE;
      }
  
-       UCollationElements *coleiter        = strsrch->textIter;
+#if BOYER_MOORE
+    UCollationElements *coleiter        = strsrch->textIter;
      int32_t             textlength      = strsrch->search->textLength;
-    uint32_t           *patternce       = strsrch->pattern.CE;
+    int32_t            *patternce       = strsrch->pattern.CE;
      int32_t             patterncelength = strsrch->pattern.CELength;
      int32_t             textoffset      = ucol_getOffset(coleiter);
  
-       // status used in setting coleiter offset, since offset is checked in
-       // shiftForward before setting the coleiter offset, status never 
-       // a failure
+    // status used in setting coleiter offset, since offset is checked in
+    // shiftForward before setting the coleiter offset, status never 
+    // a failure
      textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, 
                                patterncelength);
      while (textoffset <= textlength)
      {
          uint32_t    patternceindex = patterncelength - 1;
-        uint32_t    targetce;
+        int32_t     targetce;
          UBool       found          = FALSE;
-        uint32_t    lastce         = UCOL_NULLORDER;
-        
-               setColEIterOffset(coleiter, textoffset);
-    
-        while (TRUE) {
+        int32_t    lastce          = UCOL_NULLORDER;
+
+        setColEIterOffset(coleiter, textoffset);
+
+        for (;;) {
              // finding the last pattern ce match, imagine composite characters
              // for example: search for pattern A in text \u00C0
              // we'll have to skip \u0300 the grave first before we get to A
@@ -3184,9 +4198,10 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
              }
          }
  
-        targetce = lastce;
+        //targetce = lastce;
          
          while (found && patternceindex > 0) {
+               lastce = targetce;
              targetce    = ucol_previous(coleiter, status);
              if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) {
                  found = FALSE;
@@ -3200,26 +4215,42 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
              patternceindex --;
              found = found && targetce == patternce[patternceindex]; 
          }
+        
+        targetce = lastce;
  
          if (!found) {
-                       if (U_FAILURE(*status)) {
-                               break;
-                       }
-            textoffset = shiftForward(strsrch, textoffset, targetce, 
+            if (U_FAILURE(*status)) {
+                break;
+            }
+            textoffset = shiftForward(strsrch, textoffset, lastce, 
                                        patternceindex);
              // status checked at loop.
              patternceindex = patterncelength;
              continue;
          }
-        
-               if (checkNextExactMatch(strsrch, &textoffset, status)) {
+
+        if (checkNextExactMatch(strsrch, &textoffset, status)) {
              // status checked in ucol_setOffset
-            setColEIterOffset(coleiter,        strsrch->search->matchedIndex);
-                       return TRUE;
+            setColEIterOffset(coleiter, strsrch->search->matchedIndex);
+            return TRUE;
          }
      }
      setMatchNotFound(strsrch);
-       return FALSE;
+    return FALSE;
+#else
+    int32_t textOffset = ucol_getOffset(strsrch->textIter);
+    int32_t start = -1;
+    int32_t end = -1;
+
+    if (usearch_search(strsrch, textOffset, &start, &end, status)) {
+        strsrch->search->matchedIndex  = start;
+        strsrch->search->matchedLength = end - start;
+        return TRUE;
+    } else {
+        setMatchNotFound(strsrch);
+        return FALSE;
+    }
+#endif
  }
  
  UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
@@ -3229,11 +4260,12 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
          return FALSE;
      }
  
+#if BOYER_MOORE
      UCollationElements *coleiter        = strsrch->textIter;
      int32_t             textlength      = strsrch->search->textLength;
-    uint32_t           *patternce       = strsrch->pattern.CE;
+    int32_t            *patternce       = strsrch->pattern.CE;
      int32_t             patterncelength = strsrch->pattern.CELength;
-    int32_t         textoffset      = ucol_getOffset(coleiter);
+    int32_t             textoffset      = ucol_getOffset(coleiter);
      UBool               hasPatternAccents = 
         strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
      
@@ -3245,13 +4277,13 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
      while (textoffset <= textlength)
      {
          int32_t     patternceindex = patterncelength - 1;
-        uint32_t    targetce;
+        int32_t     targetce;
          UBool       found          = FALSE;
-        uint32_t    lastce         = UCOL_NULLORDER;
-        
-               setColEIterOffset(coleiter, textoffset);
-    
-        while (TRUE) {
+        int32_t     lastce         = UCOL_NULLORDER;
+
+        setColEIterOffset(coleiter, textoffset);
+
+        for (;;) {
              // finding the last pattern ce match, imagine composite characters
              // for example: search for pattern A in text \u00C0
              // we'll have to skip \u0300 the grave first before we get to A
@@ -3274,7 +4306,6 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
                  break;
              }
          }
-        targetce = lastce;
          
          while (found && patternceindex > 0) {
              targetce    = ucol_previous(coleiter, status);
@@ -3295,17 +4326,17 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
          if (hasPatternAccents && !found) {
              strsrch->canonicalPrefixAccents[0] = 0;
              strsrch->canonicalSuffixAccents[0] = 0;
-                       if (U_FAILURE(*status)) {
-                               break;
-                       }
+            if (U_FAILURE(*status)) {
+                break;
+            }
              found = doNextCanonicalMatch(strsrch, textoffset, status);
          }
  
          if (!found) {
-                       if (U_FAILURE(*status)) {
-                               break;
-                       }
-            textoffset = shiftForward(strsrch, textoffset, targetce, 
+            if (U_FAILURE(*status)) {
+                break;
+            }
+            textoffset = shiftForward(strsrch, textoffset, lastce, 
                                        patternceindex);
              // status checked at loop
              patternceindex = patterncelength;
@@ -3319,6 +4350,20 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
      }
      setMatchNotFound(strsrch);
      return FALSE;
+#else
+    int32_t textOffset = ucol_getOffset(strsrch->textIter);
+    int32_t start = -1;
+    int32_t end = -1;
+
+    if (usearch_search(strsrch, textOffset, &start, &end, status)) {
+        strsrch->search->matchedIndex  = start;
+        strsrch->search->matchedLength = end - start;
+        return TRUE;
+    } else {
+        setMatchNotFound(strsrch);
+        return FALSE;
+    }
+#endif
  }
  
  UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
@@ -3328,10 +4373,11 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
          return FALSE;
      }
  
+#if BOYER_MOORE
      UCollationElements *coleiter        = strsrch->textIter;
-    uint32_t           *patternce       = strsrch->pattern.CE;
+    int32_t            *patternce       = strsrch->pattern.CE;
      int32_t             patterncelength = strsrch->pattern.CELength;
-    int32_t         textoffset      = ucol_getOffset(coleiter);
+    int32_t             textoffset      = ucol_getOffset(coleiter);
  
      // shifting it check for setting offset
      // if setOffset is called previously or there was no previous match, we
@@ -3346,14 +4392,14 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
      while (textoffset >= 0)
      {
          int32_t     patternceindex = 1;
-        uint32_t    targetce;
+        int32_t     targetce;
          UBool       found          = FALSE;
-        uint32_t    firstce        = UCOL_NULLORDER;
+        int32_t     firstce        = UCOL_NULLORDER;
  
-               // if status is a failure, ucol_setOffset does nothing
+        // if status is a failure, ucol_setOffset does nothing
          setColEIterOffset(coleiter, textoffset);
-        
-        while (TRUE) {
+
+        for (;;) {
              // finding the first pattern ce match, imagine composite 
              // characters. for example: search for pattern \u0300 in text 
              // \u00C0, we'll have to skip A first before we get to 
@@ -3367,7 +4413,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
              if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) {
                  firstce = targetce;
              }
-            if (targetce == UCOL_IGNORABLE) {
+            if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
                  continue;
              }         
              if (targetce == patternce[0]) {
@@ -3381,9 +4427,10 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
              }
          }
  
-        targetce = firstce;
+        //targetce = firstce;
          
          while (found && (patternceindex < patterncelength)) {
+               firstce = targetce;
              targetce    = ucol_next(coleiter, status);
              if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) {
                  found = FALSE;
@@ -3397,11 +4444,14 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
              found = found && targetce == patternce[patternceindex]; 
              patternceindex ++;
          }
+        
+        targetce = firstce;
  
          if (!found) {
-                       if (U_FAILURE(*status)) {
-                               break;
-                       }
+            if (U_FAILURE(*status)) {
+                break;
+            }
+            
              textoffset = reverseShift(strsrch, textoffset, targetce, 
                                        patternceindex);
              patternceindex = 0;
@@ -3415,6 +4465,20 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
      }
      setMatchNotFound(strsrch);
      return FALSE;
+#else
+    int32_t textOffset = ucol_getOffset(strsrch->textIter);
+    int32_t start = -1;
+    int32_t end = -1;
+
+    if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
+        strsrch->search->matchedIndex = start;
+        strsrch->search->matchedLength = end - start;
+        return TRUE;
+    } else {
+        setMatchNotFound(strsrch);
+        return FALSE;
+    }
+#endif
  }
  
  UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 
@@ -3425,10 +4489,11 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
          return FALSE;
      }
  
+#if BOYER_MOORE
      UCollationElements *coleiter        = strsrch->textIter;
-    uint32_t           *patternce       = strsrch->pattern.CE;
+    int32_t            *patternce       = strsrch->pattern.CE;
      int32_t             patterncelength = strsrch->pattern.CELength;
-    int32_t         textoffset      = ucol_getOffset(coleiter);
+    int32_t             textoffset      = ucol_getOffset(coleiter);
      UBool               hasPatternAccents = 
         strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
            
@@ -3447,12 +4512,12 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
      while (textoffset >= 0)
      {
          int32_t     patternceindex = 1;
-        uint32_t    targetce;
+        int32_t     targetce;
          UBool       found          = FALSE;
-        uint32_t    firstce        = UCOL_NULLORDER;
+        int32_t     firstce        = UCOL_NULLORDER;
  
          setColEIterOffset(coleiter, textoffset);
-        while (TRUE) {
+        for (;;) {
              // finding the first pattern ce match, imagine composite 
              // characters. for example: search for pattern \u0300 in text 
              // \u00C0, we'll have to skip A first before we get to 
@@ -3500,14 +4565,14 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
          if (hasPatternAccents && !found) {
              strsrch->canonicalPrefixAccents[0] = 0;
              strsrch->canonicalSuffixAccents[0] = 0;
-                       if (U_FAILURE(*status)) {
+            if (U_FAILURE(*status)) {
                  break;
              }
              found = doPreviousCanonicalMatch(strsrch, textoffset, status);
          }
  
          if (!found) {
-                       if (U_FAILURE(*status)) {
+            if (U_FAILURE(*status)) {
                  break;
              }
              textoffset = reverseShift(strsrch, textoffset, targetce, 
@@ -3523,6 +4588,20 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
      }
      setMatchNotFound(strsrch);
      return FALSE;
+#else
+    int32_t textOffset = ucol_getOffset(strsrch->textIter);
+    int32_t start = -1;
+    int32_t end = -1;
+
+    if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
+        strsrch->search->matchedIndex = start;
+        strsrch->search->matchedLength = end - start;
+        return TRUE;
+    } else {
+        setMatchNotFound(strsrch);
+        return FALSE;
+    }
+#endif
  }
  
  #endif /* #if !UCONFIG_NO_COLLATION */