ICU-59173.0.1.tar.gz

[apple/icu.git] / icuSources / common / ubrk.cpp
diff --git a/icuSources/common/ubrk.cpp b/icuSources/common/ubrk.cpp

index 8cfb93c20d09c67d47d04d5999ec2704609e2693..f8bdf5a6b6582270bc97e5472cd29a1cbab81a17 100644 (file)
--- a/icuSources/common/ubrk.cpp
+++ b/icuSources/common/ubrk.cpp
@@ -1,8 +1,10 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
-*****************************************************************************************
-*   Copyright (C) 1996-2001, International Business Machines
+********************************************************************************
+*   Copyright (C) 1996-2015, International Business Machines
  *   Corporation and others.  All Rights Reserved.
-*****************************************************************************************
+********************************************************************************
  */
  
  #include "unicode/utypes.h"
@@ -17,15 +19,17 @@
  #include "unicode/uchriter.h"
  #include "unicode/rbbi.h"
  #include "rbbirb.h"
+#include "uassert.h"
+#include "cmemory.h"
  
  U_NAMESPACE_USE
  
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
  //
  //    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
  //                   and locale.
  //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
  U_CAPI UBreakIterator* U_EXPORT2
  ubrk_open(UBreakIteratorType type,
        const char *locale,
@@ -59,6 +63,9 @@ ubrk_open(UBreakIteratorType type,
    case UBRK_TITLE:
      result = BreakIterator::createTitleInstance(Locale(locale), *status);
      break;
+
+  default:
+    *status = U_ILLEGAL_ARGUMENT_ERROR;
    }
  
    // check for allocation error
@@ -70,26 +77,22 @@ ubrk_open(UBreakIteratorType type,
      return 0;
    }
  
-  UCharCharacterIterator *iter = 0;
-  iter = new UCharCharacterIterator(text, textLength);
-  if(iter == 0) {
-    *status = U_MEMORY_ALLOCATION_ERROR;
-    delete result;
-    return 0;
-  }
-  result->adoptText(iter);
  
-  return (UBreakIterator*)result;
+  UBreakIterator *uBI = (UBreakIterator *)result;
+  if (text != NULL) {
+      ubrk_setText(uBI, text, textLength, status);
+  }
+  return uBI;
  }
  
  
  
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
  //
  //   ubrk_openRules      open a break iterator from a set of break rules.
  //                       Invokes the rule builder.
  //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
  U_CAPI UBreakIterator* U_EXPORT2
  ubrk_openRules(  const UChar        *rules,
                         int32_t       rulesLength,
@@ -104,44 +107,71 @@ ubrk_openRules(  const UChar        *rules,
  
      BreakIterator *result = 0;
      UnicodeString ruleString(rules, rulesLength);
-    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, *parseErr, *status);
+    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
      if(U_FAILURE(*status)) {
          return 0;
      }
  
+    UBreakIterator *uBI = (UBreakIterator *)result;
      if (text != NULL) {
-        UCharCharacterIterator *iter = 0;
-        iter = new UCharCharacterIterator(text, textLength);
-        if(iter == 0) {
-            *status = U_MEMORY_ALLOCATION_ERROR;
-            delete result;
-            return 0;
-        }
-        result->adoptText(iter);
+        ubrk_setText(uBI, text, textLength, status);
      }
-    return (UBreakIterator *)result;
+    return uBI;
  }
  
  
-
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
+                     const UChar *  text, int32_t textLength,
+                     UErrorCode *   status)
+{
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    if (rulesLength < 0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
+    if (text != NULL) {
+        ubrk_setText(uBI, text, textLength, status);
+    }
+    return uBI;
+}
  
  
  U_CAPI UBreakIterator * U_EXPORT2
  ubrk_safeClone(
            const UBreakIterator *bi,
-          void *stackBuffer,
+          void * /*stackBuffer*/,
            int32_t *pBufferSize,
            UErrorCode *status)
  {
      if (status == NULL || U_FAILURE(*status)){
-        return 0;
+        return NULL;
      }
-    if (!pBufferSize || !bi){
+    if (bi == NULL) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+        return NULL;
      }
-    return (UBreakIterator *)(((BreakIterator*)bi)->
-        createBufferClone(stackBuffer, *pBufferSize, *status));
+    if (pBufferSize != NULL) {
+        int32_t inputSize = *pBufferSize;
+        *pBufferSize = 1;
+        if (inputSize == 0) {
+            return NULL;  // preflighting for deprecated functionality
+        }
+    }
+    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
+    if (newBI == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+    } else {
+        *status = U_SAFECLONE_ALLOCATED_WARNING;
+    }
+    return (UBreakIterator *)newBI;
  }
  
  
@@ -149,15 +179,7 @@ ubrk_safeClone(
  U_CAPI void U_EXPORT2
  ubrk_close(UBreakIterator *bi)
  {
-    BreakIterator *ubi = (BreakIterator*) bi;
-    if (ubi) {
-        if (ubi->isBufferClone()) {
-            ubi->~BreakIterator();
-            *(uint32_t *)ubi = 0xdeadbeef;
-        } else {
-            delete ubi;
-        }
-    }
+    delete (BreakIterator *)bi;
  }
  
  U_CAPI void U_EXPORT2
@@ -166,26 +188,27 @@ ubrk_setText(UBreakIterator* bi,
               int32_t         textLength,
               UErrorCode*     status)
  {
+    UText  ut = UTEXT_INITIALIZER;
+    utext_openUChars(&ut, text, textLength, status);
+    ((BreakIterator*)bi)->setText(&ut, *status);
+    // A stack allocated UText wrapping a UChar * string
+    //   can be dumped without explicitly closing it.
+}
  
-  if (U_FAILURE(*status)) return;
  
-  const CharacterIterator& biText = ((BreakIterator*)bi)->getText();
  
-  int32_t textLen = (textLength == -1 ? u_strlen(text) : textLength);
-  if (biText.getDynamicClassID() == UCharCharacterIterator::getStaticClassID()) {
-      ((UCharCharacterIterator&)biText).setText(text, textLen);
-  }
-  else {
-      UCharCharacterIterator *iter = 0;
-      iter = new UCharCharacterIterator(text, textLen);
-      if(iter == 0) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-      }
-      ((BreakIterator*)bi)->adoptText(iter);
-  }
+U_CAPI void U_EXPORT2
+ubrk_setUText(UBreakIterator *bi,
+             UText          *text,
+             UErrorCode     *status)
+{
+  ((BreakIterator*)bi)->setText(text, *status);
  }
  
+
+
+
+
  U_CAPI int32_t U_EXPORT2
  ubrk_current(const UBreakIterator *bi)
  {
@@ -255,14 +278,80 @@ ubrk_countAvailable()
  U_CAPI  UBool U_EXPORT2
  ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
  {
-    return ((BreakIterator *)bi)->isBoundary(offset);
+    return ((BreakIterator*)bi)->isBoundary(offset);
  }
  
  
  U_CAPI  int32_t U_EXPORT2
  ubrk_getRuleStatus(UBreakIterator *bi)
  {
-    return ((RuleBasedBreakIterator *)bi)->getRuleStatus();
+    return ((BreakIterator*)bi)->getRuleStatus();
+}
+
+U_CAPI  int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
+{
+    return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
+}
+
+
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi,
+                     ULocDataLocaleType type,
+                     UErrorCode* status)
+{
+    if (bi == NULL) {
+        if (U_SUCCESS(*status)) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+        return NULL;
+    }
+    return ((BreakIterator*)bi)->getLocaleID(type, *status);
+}
+
+
+U_CAPI void U_EXPORT2
+ubrk_refreshUText(UBreakIterator *bi,
+                       UText          *text,
+                       UErrorCode     *status)
+{
+    BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
+    bii->refreshInputText(text, *status);
  }
  
+U_CAPI int32_t U_EXPORT2
+ubrk_getBinaryRules(UBreakIterator *bi,
+                    uint8_t *       binaryRules, int32_t rulesCapacity,
+                    UErrorCode *    status)
+{
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    RuleBasedBreakIterator* rbbi;
+    if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    uint32_t rulesLength;
+    const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
+    if (rulesLength > INT32_MAX) {
+        *status = U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+    if (binaryRules != NULL) { // if not preflighting
+        // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
+        if ((int32_t)rulesLength > rulesCapacity) {
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        } else {
+            uprv_memcpy(binaryRules, returnedRules, rulesLength);
+        }
+    }
+    return (int32_t)rulesLength;
+}
+
+
  #endif /* #if !UCONFIG_NO_BREAK_ITERATION */