1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ********************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/ubrk.h"
16 #include "unicode/brkiter.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ustring.h"
19 #include "unicode/uchriter.h"
20 #include "unicode/rbbi.h"
27 //------------------------------------------------------------------------------
29 // ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
32 //------------------------------------------------------------------------------
33 U_CAPI UBreakIterator
* U_EXPORT2
34 ubrk_open(UBreakIteratorType type
,
41 if(U_FAILURE(*status
)) return 0;
43 BreakIterator
*result
= 0;
48 result
= BreakIterator::createCharacterInstance(Locale(locale
), *status
);
52 result
= BreakIterator::createWordInstance(Locale(locale
), *status
);
56 result
= BreakIterator::createLineInstance(Locale(locale
), *status
);
60 result
= BreakIterator::createSentenceInstance(Locale(locale
), *status
);
64 result
= BreakIterator::createTitleInstance(Locale(locale
), *status
);
68 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
71 // check for allocation error
72 if (U_FAILURE(*status
)) {
76 *status
= U_MEMORY_ALLOCATION_ERROR
;
81 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
83 ubrk_setText(uBI
, text
, textLength
, status
);
90 //------------------------------------------------------------------------------
92 // ubrk_openRules open a break iterator from a set of break rules.
93 // Invokes the rule builder.
95 //------------------------------------------------------------------------------
96 U_CAPI UBreakIterator
* U_EXPORT2
97 ubrk_openRules( const UChar
*rules
,
101 UParseError
*parseErr
,
102 UErrorCode
*status
) {
104 if (status
== NULL
|| U_FAILURE(*status
)){
108 BreakIterator
*result
= 0;
109 UnicodeString
ruleString(rules
, rulesLength
);
110 result
= RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString
, parseErr
, *status
);
111 if(U_FAILURE(*status
)) {
115 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
117 ubrk_setText(uBI
, text
, textLength
, status
);
123 U_CAPI UBreakIterator
* U_EXPORT2
124 ubrk_openBinaryRules(const uint8_t *binaryRules
, int32_t rulesLength
,
125 const UChar
* text
, int32_t textLength
,
128 if (U_FAILURE(*status
)) {
131 if (rulesLength
< 0) {
132 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
135 LocalPointer
<RuleBasedBreakIterator
> lpRBBI(new RuleBasedBreakIterator(binaryRules
, rulesLength
, *status
), *status
);
136 if (U_FAILURE(*status
)) {
139 UBreakIterator
*uBI
= reinterpret_cast<UBreakIterator
*>(lpRBBI
.orphan());
141 ubrk_setText(uBI
, text
, textLength
, status
);
147 U_CAPI UBreakIterator
* U_EXPORT2
149 const UBreakIterator
*bi
,
150 void * /*stackBuffer*/,
151 int32_t *pBufferSize
,
154 if (status
== NULL
|| U_FAILURE(*status
)){
158 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
161 if (pBufferSize
!= NULL
) {
162 int32_t inputSize
= *pBufferSize
;
164 if (inputSize
== 0) {
165 return NULL
; // preflighting for deprecated functionality
168 BreakIterator
*newBI
= ((BreakIterator
*)bi
)->clone();
170 *status
= U_MEMORY_ALLOCATION_ERROR
;
172 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
174 return (UBreakIterator
*)newBI
;
179 U_CAPI
void U_EXPORT2
180 ubrk_close(UBreakIterator
*bi
)
182 delete (BreakIterator
*)bi
;
187 U_CAPI
void U_EXPORT2
188 ubrk_setLineWordOpts(UBreakIterator
* bi
,
189 ULineWordOptions lineWordOpts
)
191 ((BreakIterator
*)bi
)->setLineWordOpts(lineWordOpts
);
195 U_CAPI
void U_EXPORT2
196 ubrk_setText(UBreakIterator
* bi
,
201 UText ut
= UTEXT_INITIALIZER
;
202 utext_openUChars(&ut
, text
, textLength
, status
);
203 ((BreakIterator
*)bi
)->setText(&ut
, *status
);
204 // A stack allocated UText wrapping a UChar * string
205 // can be dumped without explicitly closing it.
210 U_CAPI
void U_EXPORT2
211 ubrk_setUText(UBreakIterator
*bi
,
215 ((BreakIterator
*)bi
)->setText(text
, *status
);
222 U_CAPI
int32_t U_EXPORT2
223 ubrk_current(const UBreakIterator
*bi
)
226 return ((BreakIterator
*)bi
)->current();
229 U_CAPI
int32_t U_EXPORT2
230 ubrk_next(UBreakIterator
*bi
)
233 return ((BreakIterator
*)bi
)->next();
236 U_CAPI
int32_t U_EXPORT2
237 ubrk_previous(UBreakIterator
*bi
)
240 return ((BreakIterator
*)bi
)->previous();
243 U_CAPI
int32_t U_EXPORT2
244 ubrk_first(UBreakIterator
*bi
)
247 return ((BreakIterator
*)bi
)->first();
250 U_CAPI
int32_t U_EXPORT2
251 ubrk_last(UBreakIterator
*bi
)
254 return ((BreakIterator
*)bi
)->last();
257 U_CAPI
int32_t U_EXPORT2
258 ubrk_preceding(UBreakIterator
*bi
,
262 return ((BreakIterator
*)bi
)->preceding(offset
);
265 U_CAPI
int32_t U_EXPORT2
266 ubrk_following(UBreakIterator
*bi
,
270 return ((BreakIterator
*)bi
)->following(offset
);
273 U_CAPI
const char* U_EXPORT2
274 ubrk_getAvailable(int32_t index
)
277 return uloc_getAvailable(index
);
280 U_CAPI
int32_t U_EXPORT2
281 ubrk_countAvailable()
284 return uloc_countAvailable();
288 U_CAPI UBool U_EXPORT2
289 ubrk_isBoundary(UBreakIterator
*bi
, int32_t offset
)
291 return ((BreakIterator
*)bi
)->isBoundary(offset
);
295 U_CAPI
int32_t U_EXPORT2
296 ubrk_getRuleStatus(UBreakIterator
*bi
)
298 return ((BreakIterator
*)bi
)->getRuleStatus();
301 U_CAPI
int32_t U_EXPORT2
302 ubrk_getRuleStatusVec(UBreakIterator
*bi
, int32_t *fillInVec
, int32_t capacity
, UErrorCode
*status
)
304 return ((BreakIterator
*)bi
)->getRuleStatusVec(fillInVec
, capacity
, *status
);
308 U_CAPI
const char* U_EXPORT2
309 ubrk_getLocaleByType(const UBreakIterator
*bi
,
310 ULocDataLocaleType type
,
314 if (U_SUCCESS(*status
)) {
315 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
319 return ((BreakIterator
*)bi
)->getLocaleID(type
, *status
);
323 U_CAPI
void U_EXPORT2
324 ubrk_refreshUText(UBreakIterator
*bi
,
328 BreakIterator
*bii
= reinterpret_cast<BreakIterator
*>(bi
);
329 bii
->refreshInputText(text
, *status
);
332 U_CAPI
int32_t U_EXPORT2
333 ubrk_getBinaryRules(UBreakIterator
*bi
,
334 uint8_t * binaryRules
, int32_t rulesCapacity
,
337 if (U_FAILURE(*status
)) {
340 if ((binaryRules
== NULL
&& rulesCapacity
> 0) || rulesCapacity
< 0) {
341 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
344 RuleBasedBreakIterator
* rbbi
;
345 if ((rbbi
= dynamic_cast<RuleBasedBreakIterator
*>(reinterpret_cast<BreakIterator
*>(bi
))) == NULL
) {
346 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
349 uint32_t rulesLength
;
350 const uint8_t * returnedRules
= rbbi
->getBinaryRules(rulesLength
);
351 if (rulesLength
> INT32_MAX
) {
352 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
355 if (binaryRules
!= NULL
) { // if not preflighting
356 // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
357 if ((int32_t)rulesLength
> rulesCapacity
) {
358 *status
= U_BUFFER_OVERFLOW_ERROR
;
360 uprv_memcpy(binaryRules
, returnedRules
, rulesLength
);
363 return (int32_t)rulesLength
;
367 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */