X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..HEAD:/icuSources/common/ubrk.cpp diff --git a/icuSources/common/ubrk.cpp b/icuSources/common/ubrk.cpp index 8cfb93c2..fcec23df 100644 --- a/icuSources/common/ubrk.cpp +++ b/icuSources/common/ubrk.cpp @@ -1,8 +1,10 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* -***************************************************************************************** -* Copyright (C) 1996-2001, International Business Machines +******************************************************************************** +* Copyright (C) 1996-2015, International Business Machines * Corporation and others. All Rights Reserved. -***************************************************************************************** +******************************************************************************** */ #include "unicode/utypes.h" @@ -17,15 +19,17 @@ #include "unicode/uchriter.h" #include "unicode/rbbi.h" #include "rbbirb.h" +#include "uassert.h" +#include "cmemory.h" U_NAMESPACE_USE -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // ubrk_open Create a canned type of break iterator based on type (word, line, etc.) // and locale. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ U_CAPI UBreakIterator* U_EXPORT2 ubrk_open(UBreakIteratorType type, const char *locale, @@ -59,6 +63,9 @@ ubrk_open(UBreakIteratorType type, case UBRK_TITLE: result = BreakIterator::createTitleInstance(Locale(locale), *status); break; + + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; } // check for allocation error @@ -70,26 +77,22 @@ ubrk_open(UBreakIteratorType type, return 0; } - UCharCharacterIterator *iter = 0; - iter = new UCharCharacterIterator(text, textLength); - if(iter == 0) { - *status = U_MEMORY_ALLOCATION_ERROR; - delete result; - return 0; - } - result->adoptText(iter); - return (UBreakIterator*)result; + UBreakIterator *uBI = (UBreakIterator *)result; + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; } -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // // ubrk_openRules open a break iterator from a set of break rules. // Invokes the rule builder. // -//---------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ U_CAPI UBreakIterator* U_EXPORT2 ubrk_openRules( const UChar *rules, int32_t rulesLength, @@ -104,44 +107,71 @@ ubrk_openRules( const UChar *rules, BreakIterator *result = 0; UnicodeString ruleString(rules, rulesLength); - result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, *parseErr, *status); + result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); if(U_FAILURE(*status)) { return 0; } + UBreakIterator *uBI = (UBreakIterator *)result; if (text != NULL) { - UCharCharacterIterator *iter = 0; - iter = new UCharCharacterIterator(text, textLength); - if(iter == 0) { - *status = U_MEMORY_ALLOCATION_ERROR; - delete result; - return 0; - } - result->adoptText(iter); + ubrk_setText(uBI, text, textLength, status); } - return (UBreakIterator *)result; + return uBI; } - +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, + const UChar * text, int32_t textLength, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return NULL; + } + if (rulesLength < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + LocalPointer lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status); + if (U_FAILURE(*status)) { + return NULL; + } + UBreakIterator *uBI = reinterpret_cast(lpRBBI.orphan()); + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; +} U_CAPI UBreakIterator * U_EXPORT2 ubrk_safeClone( const UBreakIterator *bi, - void *stackBuffer, + void * /*stackBuffer*/, int32_t *pBufferSize, UErrorCode *status) { if (status == NULL || U_FAILURE(*status)){ - return 0; + return NULL; } - if (!pBufferSize || !bi){ + if (bi == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; + return NULL; + } + if (pBufferSize != NULL) { + int32_t inputSize = *pBufferSize; + *pBufferSize = 1; + if (inputSize == 0) { + return NULL; // preflighting for deprecated functionality + } + } + BreakIterator *newBI = ((BreakIterator *)bi)->clone(); + if (newBI == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + *status = U_SAFECLONE_ALLOCATED_WARNING; } - return (UBreakIterator *)(((BreakIterator*)bi)-> - createBufferClone(stackBuffer, *pBufferSize, *status)); + return (UBreakIterator *)newBI; } @@ -149,43 +179,46 @@ ubrk_safeClone( U_CAPI void U_EXPORT2 ubrk_close(UBreakIterator *bi) { - BreakIterator *ubi = (BreakIterator*) bi; - if (ubi) { - if (ubi->isBufferClone()) { - ubi->~BreakIterator(); - *(uint32_t *)ubi = 0xdeadbeef; - } else { - delete ubi; - } - } + delete (BreakIterator *)bi; +} + + +// Apple only +U_CAPI void U_EXPORT2 +ubrk_setLineWordOpts(UBreakIterator* bi, + ULineWordOptions lineWordOpts) +{ + ((BreakIterator*)bi)->setLineWordOpts(lineWordOpts); } + U_CAPI void U_EXPORT2 ubrk_setText(UBreakIterator* bi, const UChar* text, int32_t textLength, UErrorCode* status) { + UText ut = UTEXT_INITIALIZER; + utext_openUChars(&ut, text, textLength, status); + ((BreakIterator*)bi)->setText(&ut, *status); + // A stack allocated UText wrapping a UChar * string + // can be dumped without explicitly closing it. +} - if (U_FAILURE(*status)) return; - const CharacterIterator& biText = ((BreakIterator*)bi)->getText(); - int32_t textLen = (textLength == -1 ? u_strlen(text) : textLength); - if (biText.getDynamicClassID() == UCharCharacterIterator::getStaticClassID()) { - ((UCharCharacterIterator&)biText).setText(text, textLen); - } - else { - UCharCharacterIterator *iter = 0; - iter = new UCharCharacterIterator(text, textLen); - if(iter == 0) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - ((BreakIterator*)bi)->adoptText(iter); - } +U_CAPI void U_EXPORT2 +ubrk_setUText(UBreakIterator *bi, + UText *text, + UErrorCode *status) +{ + ((BreakIterator*)bi)->setText(text, *status); } + + + + U_CAPI int32_t U_EXPORT2 ubrk_current(const UBreakIterator *bi) { @@ -255,14 +288,80 @@ ubrk_countAvailable() U_CAPI UBool U_EXPORT2 ubrk_isBoundary(UBreakIterator *bi, int32_t offset) { - return ((BreakIterator *)bi)->isBoundary(offset); + return ((BreakIterator*)bi)->isBoundary(offset); } U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatus(UBreakIterator *bi) { - return ((RuleBasedBreakIterator *)bi)->getRuleStatus(); + return ((BreakIterator*)bi)->getRuleStatus(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) +{ + return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status); +} + + +U_CAPI const char* U_EXPORT2 +ubrk_getLocaleByType(const UBreakIterator *bi, + ULocDataLocaleType type, + UErrorCode* status) +{ + if (bi == NULL) { + if (U_SUCCESS(*status)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return NULL; + } + return ((BreakIterator*)bi)->getLocaleID(type, *status); +} + + +U_CAPI void U_EXPORT2 +ubrk_refreshUText(UBreakIterator *bi, + UText *text, + UErrorCode *status) +{ + BreakIterator *bii = reinterpret_cast(bi); + bii->refreshInputText(text, *status); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_getBinaryRules(UBreakIterator *bi, + uint8_t * binaryRules, int32_t rulesCapacity, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return 0; + } + if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + RuleBasedBreakIterator* rbbi; + if ((rbbi = dynamic_cast(reinterpret_cast(bi))) == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + uint32_t rulesLength; + const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength); + if (rulesLength > INT32_MAX) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if (binaryRules != NULL) { // if not preflighting + // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely + if ((int32_t)rulesLength > rulesCapacity) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + uprv_memcpy(binaryRules, returnedRules, rulesLength); + } + } + return (int32_t)rulesLength; } + #endif /* #if !UCONFIG_NO_BREAK_ITERATION */