1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ********************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/ubrk.h"
16 #include "unicode/brkiter.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ustring.h"
19 #include "unicode/uchriter.h"
20 #include "unicode/rbbi.h"
27 //------------------------------------------------------------------------------
29 // ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
32 //------------------------------------------------------------------------------
33 U_CAPI UBreakIterator
* U_EXPORT2
34 ubrk_open(UBreakIteratorType type
,
41 if(U_FAILURE(*status
)) return 0;
43 BreakIterator
*result
= 0;
48 result
= BreakIterator::createCharacterInstance(Locale(locale
), *status
);
52 result
= BreakIterator::createWordInstance(Locale(locale
), *status
);
56 result
= BreakIterator::createLineInstance(Locale(locale
), *status
);
60 result
= BreakIterator::createSentenceInstance(Locale(locale
), *status
);
64 result
= BreakIterator::createTitleInstance(Locale(locale
), *status
);
68 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
71 // check for allocation error
72 if (U_FAILURE(*status
)) {
76 *status
= U_MEMORY_ALLOCATION_ERROR
;
81 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
83 ubrk_setText(uBI
, text
, textLength
, status
);
90 //------------------------------------------------------------------------------
92 // ubrk_openRules open a break iterator from a set of break rules.
93 // Invokes the rule builder.
95 //------------------------------------------------------------------------------
96 U_CAPI UBreakIterator
* U_EXPORT2
97 ubrk_openRules( const UChar
*rules
,
101 UParseError
*parseErr
,
102 UErrorCode
*status
) {
104 if (status
== NULL
|| U_FAILURE(*status
)){
108 BreakIterator
*result
= 0;
109 UnicodeString
ruleString(rules
, rulesLength
);
110 result
= RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString
, parseErr
, *status
);
111 if(U_FAILURE(*status
)) {
115 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
117 ubrk_setText(uBI
, text
, textLength
, status
);
123 U_CAPI UBreakIterator
* U_EXPORT2
124 ubrk_openBinaryRules(const uint8_t *binaryRules
, int32_t rulesLength
,
125 const UChar
* text
, int32_t textLength
,
128 if (U_FAILURE(*status
)) {
131 if (rulesLength
< 0) {
132 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
135 LocalPointer
<RuleBasedBreakIterator
> lpRBBI(new RuleBasedBreakIterator(binaryRules
, rulesLength
, *status
), *status
);
136 if (U_FAILURE(*status
)) {
139 UBreakIterator
*uBI
= reinterpret_cast<UBreakIterator
*>(lpRBBI
.orphan());
141 ubrk_setText(uBI
, text
, textLength
, status
);
147 U_CAPI UBreakIterator
* U_EXPORT2
149 const UBreakIterator
*bi
,
150 void * /*stackBuffer*/,
151 int32_t *pBufferSize
,
154 if (status
== NULL
|| U_FAILURE(*status
)){
158 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
161 if (pBufferSize
!= NULL
) {
162 int32_t inputSize
= *pBufferSize
;
164 if (inputSize
== 0) {
165 return NULL
; // preflighting for deprecated functionality
168 BreakIterator
*newBI
= ((BreakIterator
*)bi
)->clone();
170 *status
= U_MEMORY_ALLOCATION_ERROR
;
172 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
174 return (UBreakIterator
*)newBI
;
179 U_CAPI
void U_EXPORT2
180 ubrk_close(UBreakIterator
*bi
)
182 delete (BreakIterator
*)bi
;
185 U_CAPI
void U_EXPORT2
186 ubrk_setText(UBreakIterator
* bi
,
191 UText ut
= UTEXT_INITIALIZER
;
192 utext_openUChars(&ut
, text
, textLength
, status
);
193 ((BreakIterator
*)bi
)->setText(&ut
, *status
);
194 // A stack allocated UText wrapping a UChar * string
195 // can be dumped without explicitly closing it.
200 U_CAPI
void U_EXPORT2
201 ubrk_setUText(UBreakIterator
*bi
,
205 ((BreakIterator
*)bi
)->setText(text
, *status
);
212 U_CAPI
int32_t U_EXPORT2
213 ubrk_current(const UBreakIterator
*bi
)
216 return ((BreakIterator
*)bi
)->current();
219 U_CAPI
int32_t U_EXPORT2
220 ubrk_next(UBreakIterator
*bi
)
223 return ((BreakIterator
*)bi
)->next();
226 U_CAPI
int32_t U_EXPORT2
227 ubrk_previous(UBreakIterator
*bi
)
230 return ((BreakIterator
*)bi
)->previous();
233 U_CAPI
int32_t U_EXPORT2
234 ubrk_first(UBreakIterator
*bi
)
237 return ((BreakIterator
*)bi
)->first();
240 U_CAPI
int32_t U_EXPORT2
241 ubrk_last(UBreakIterator
*bi
)
244 return ((BreakIterator
*)bi
)->last();
247 U_CAPI
int32_t U_EXPORT2
248 ubrk_preceding(UBreakIterator
*bi
,
252 return ((BreakIterator
*)bi
)->preceding(offset
);
255 U_CAPI
int32_t U_EXPORT2
256 ubrk_following(UBreakIterator
*bi
,
260 return ((BreakIterator
*)bi
)->following(offset
);
263 U_CAPI
const char* U_EXPORT2
264 ubrk_getAvailable(int32_t index
)
267 return uloc_getAvailable(index
);
270 U_CAPI
int32_t U_EXPORT2
271 ubrk_countAvailable()
274 return uloc_countAvailable();
278 U_CAPI UBool U_EXPORT2
279 ubrk_isBoundary(UBreakIterator
*bi
, int32_t offset
)
281 return ((BreakIterator
*)bi
)->isBoundary(offset
);
285 U_CAPI
int32_t U_EXPORT2
286 ubrk_getRuleStatus(UBreakIterator
*bi
)
288 return ((BreakIterator
*)bi
)->getRuleStatus();
291 U_CAPI
int32_t U_EXPORT2
292 ubrk_getRuleStatusVec(UBreakIterator
*bi
, int32_t *fillInVec
, int32_t capacity
, UErrorCode
*status
)
294 return ((BreakIterator
*)bi
)->getRuleStatusVec(fillInVec
, capacity
, *status
);
298 U_CAPI
const char* U_EXPORT2
299 ubrk_getLocaleByType(const UBreakIterator
*bi
,
300 ULocDataLocaleType type
,
304 if (U_SUCCESS(*status
)) {
305 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
309 return ((BreakIterator
*)bi
)->getLocaleID(type
, *status
);
313 U_CAPI
void U_EXPORT2
314 ubrk_refreshUText(UBreakIterator
*bi
,
318 BreakIterator
*bii
= reinterpret_cast<BreakIterator
*>(bi
);
319 bii
->refreshInputText(text
, *status
);
322 U_CAPI
int32_t U_EXPORT2
323 ubrk_getBinaryRules(UBreakIterator
*bi
,
324 uint8_t * binaryRules
, int32_t rulesCapacity
,
327 if (U_FAILURE(*status
)) {
330 if ((binaryRules
== NULL
&& rulesCapacity
> 0) || rulesCapacity
< 0) {
331 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
334 RuleBasedBreakIterator
* rbbi
;
335 if ((rbbi
= dynamic_cast<RuleBasedBreakIterator
*>(reinterpret_cast<BreakIterator
*>(bi
))) == NULL
) {
336 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
339 uint32_t rulesLength
;
340 const uint8_t * returnedRules
= rbbi
->getBinaryRules(rulesLength
);
341 if (rulesLength
> INT32_MAX
) {
342 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
345 if (binaryRules
!= NULL
) { // if not preflighting
346 // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
347 if ((int32_t)rulesLength
> rulesCapacity
) {
348 *status
= U_BUFFER_OVERFLOW_ERROR
;
350 uprv_memcpy(binaryRules
, returnedRules
, rulesLength
);
353 return (int32_t)rulesLength
;
357 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */