]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubrk.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / ubrk.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/*
46f4442e 4********************************************************************************
2ca993e8 5* Copyright (C) 1996-2015, International Business Machines
b75a7d8f 6* Corporation and others. All Rights Reserved.
46f4442e 7********************************************************************************
b75a7d8f
A
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/ubrk.h"
15
16#include "unicode/brkiter.h"
17#include "unicode/uloc.h"
18#include "unicode/ustring.h"
19#include "unicode/uchriter.h"
20#include "unicode/rbbi.h"
21#include "rbbirb.h"
73c04bcf 22#include "uassert.h"
f3c0d7a5 23#include "cmemory.h"
b75a7d8f
A
24
25U_NAMESPACE_USE
26
46f4442e 27//------------------------------------------------------------------------------
b75a7d8f
A
28//
29// ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
30// and locale.
31//
46f4442e 32//------------------------------------------------------------------------------
b75a7d8f
A
33U_CAPI UBreakIterator* U_EXPORT2
34ubrk_open(UBreakIteratorType type,
35 const char *locale,
36 const UChar *text,
37 int32_t textLength,
38 UErrorCode *status)
39{
40
41 if(U_FAILURE(*status)) return 0;
42
43 BreakIterator *result = 0;
44
45 switch(type) {
46
47 case UBRK_CHARACTER:
48 result = BreakIterator::createCharacterInstance(Locale(locale), *status);
49 break;
50
51 case UBRK_WORD:
52 result = BreakIterator::createWordInstance(Locale(locale), *status);
53 break;
54
55 case UBRK_LINE:
56 result = BreakIterator::createLineInstance(Locale(locale), *status);
57 break;
58
59 case UBRK_SENTENCE:
60 result = BreakIterator::createSentenceInstance(Locale(locale), *status);
61 break;
62
63 case UBRK_TITLE:
64 result = BreakIterator::createTitleInstance(Locale(locale), *status);
65 break;
73c04bcf
A
66
67 default:
68 *status = U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
69 }
70
71 // check for allocation error
72 if (U_FAILURE(*status)) {
73 return 0;
74 }
75 if(result == 0) {
76 *status = U_MEMORY_ALLOCATION_ERROR;
77 return 0;
78 }
79
b75a7d8f 80
73c04bcf
A
81 UBreakIterator *uBI = (UBreakIterator *)result;
82 if (text != NULL) {
83 ubrk_setText(uBI, text, textLength, status);
84 }
85 return uBI;
b75a7d8f
A
86}
87
88
89
46f4442e 90//------------------------------------------------------------------------------
b75a7d8f
A
91//
92// ubrk_openRules open a break iterator from a set of break rules.
93// Invokes the rule builder.
94//
46f4442e 95//------------------------------------------------------------------------------
b75a7d8f
A
96U_CAPI UBreakIterator* U_EXPORT2
97ubrk_openRules( const UChar *rules,
98 int32_t rulesLength,
99 const UChar *text,
100 int32_t textLength,
101 UParseError *parseErr,
102 UErrorCode *status) {
103
104 if (status == NULL || U_FAILURE(*status)){
105 return 0;
106 }
107
108 BreakIterator *result = 0;
109 UnicodeString ruleString(rules, rulesLength);
46f4442e 110 result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
b75a7d8f
A
111 if(U_FAILURE(*status)) {
112 return 0;
113 }
114
73c04bcf 115 UBreakIterator *uBI = (UBreakIterator *)result;
b75a7d8f 116 if (text != NULL) {
73c04bcf 117 ubrk_setText(uBI, text, textLength, status);
b75a7d8f 118 }
73c04bcf 119 return uBI;
b75a7d8f
A
120}
121
122
f3c0d7a5
A
123U_CAPI UBreakIterator* U_EXPORT2
124ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
125 const UChar * text, int32_t textLength,
126 UErrorCode * status)
127{
128 if (U_FAILURE(*status)) {
129 return NULL;
130 }
131 if (rulesLength < 0) {
132 *status = U_ILLEGAL_ARGUMENT_ERROR;
133 return NULL;
134 }
135 LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
136 if (U_FAILURE(*status)) {
137 return NULL;
138 }
139 UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
140 if (text != NULL) {
141 ubrk_setText(uBI, text, textLength, status);
142 }
143 return uBI;
144}
b75a7d8f
A
145
146
147U_CAPI UBreakIterator * U_EXPORT2
148ubrk_safeClone(
149 const UBreakIterator *bi,
57a6839d 150 void * /*stackBuffer*/,
b75a7d8f
A
151 int32_t *pBufferSize,
152 UErrorCode *status)
153{
154 if (status == NULL || U_FAILURE(*status)){
57a6839d 155 return NULL;
b75a7d8f 156 }
57a6839d 157 if (bi == NULL) {
b75a7d8f 158 *status = U_ILLEGAL_ARGUMENT_ERROR;
57a6839d
A
159 return NULL;
160 }
161 if (pBufferSize != NULL) {
162 int32_t inputSize = *pBufferSize;
163 *pBufferSize = 1;
164 if (inputSize == 0) {
165 return NULL; // preflighting for deprecated functionality
166 }
b75a7d8f 167 }
57a6839d
A
168 BreakIterator *newBI = ((BreakIterator *)bi)->clone();
169 if (newBI == NULL) {
170 *status = U_MEMORY_ALLOCATION_ERROR;
171 } else {
172 *status = U_SAFECLONE_ALLOCATED_WARNING;
73c04bcf 173 }
57a6839d 174 return (UBreakIterator *)newBI;
b75a7d8f
A
175}
176
177
178
179U_CAPI void U_EXPORT2
180ubrk_close(UBreakIterator *bi)
181{
57a6839d 182 delete (BreakIterator *)bi;
b75a7d8f
A
183}
184
0f5d89e8
A
185
186// Apple only
187U_CAPI void U_EXPORT2
188ubrk_setLineWordOpts(UBreakIterator* bi,
189 ULineWordOptions lineWordOpts)
190{
191 ((BreakIterator*)bi)->setLineWordOpts(lineWordOpts);
192}
193
194
b75a7d8f
A
195U_CAPI void U_EXPORT2
196ubrk_setText(UBreakIterator* bi,
197 const UChar* text,
198 int32_t textLength,
199 UErrorCode* status)
200{
73c04bcf
A
201 UText ut = UTEXT_INITIALIZER;
202 utext_openUChars(&ut, text, textLength, status);
b331163b 203 ((BreakIterator*)bi)->setText(&ut, *status);
57a6839d 204 // A stack allocated UText wrapping a UChar * string
73c04bcf
A
205 // can be dumped without explicitly closing it.
206}
b75a7d8f 207
b75a7d8f 208
b75a7d8f 209
46f4442e 210U_CAPI void U_EXPORT2
73c04bcf
A
211ubrk_setUText(UBreakIterator *bi,
212 UText *text,
213 UErrorCode *status)
214{
b331163b 215 ((BreakIterator*)bi)->setText(text, *status);
b75a7d8f
A
216}
217
73c04bcf
A
218
219
220
221
b75a7d8f
A
222U_CAPI int32_t U_EXPORT2
223ubrk_current(const UBreakIterator *bi)
224{
225
b331163b 226 return ((BreakIterator*)bi)->current();
b75a7d8f
A
227}
228
229U_CAPI int32_t U_EXPORT2
230ubrk_next(UBreakIterator *bi)
231{
232
b331163b 233 return ((BreakIterator*)bi)->next();
b75a7d8f
A
234}
235
236U_CAPI int32_t U_EXPORT2
237ubrk_previous(UBreakIterator *bi)
238{
239
b331163b 240 return ((BreakIterator*)bi)->previous();
b75a7d8f
A
241}
242
243U_CAPI int32_t U_EXPORT2
244ubrk_first(UBreakIterator *bi)
245{
246
b331163b 247 return ((BreakIterator*)bi)->first();
b75a7d8f
A
248}
249
250U_CAPI int32_t U_EXPORT2
251ubrk_last(UBreakIterator *bi)
252{
253
b331163b 254 return ((BreakIterator*)bi)->last();
b75a7d8f
A
255}
256
257U_CAPI int32_t U_EXPORT2
258ubrk_preceding(UBreakIterator *bi,
259 int32_t offset)
260{
261
b331163b 262 return ((BreakIterator*)bi)->preceding(offset);
b75a7d8f
A
263}
264
265U_CAPI int32_t U_EXPORT2
266ubrk_following(UBreakIterator *bi,
267 int32_t offset)
268{
269
b331163b 270 return ((BreakIterator*)bi)->following(offset);
b75a7d8f
A
271}
272
273U_CAPI const char* U_EXPORT2
274ubrk_getAvailable(int32_t index)
275{
276
277 return uloc_getAvailable(index);
278}
279
280U_CAPI int32_t U_EXPORT2
281ubrk_countAvailable()
282{
283
284 return uloc_countAvailable();
285}
286
287
288U_CAPI UBool U_EXPORT2
289ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
290{
b331163b 291 return ((BreakIterator*)bi)->isBoundary(offset);
b75a7d8f
A
292}
293
294
295U_CAPI int32_t U_EXPORT2
296ubrk_getRuleStatus(UBreakIterator *bi)
297{
b331163b 298 return ((BreakIterator*)bi)->getRuleStatus();
b75a7d8f
A
299}
300
374ca955
A
301U_CAPI int32_t U_EXPORT2
302ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
303{
b331163b 304 return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
374ca955
A
305}
306
307
308U_CAPI const char* U_EXPORT2
73c04bcf
A
309ubrk_getLocaleByType(const UBreakIterator *bi,
310 ULocDataLocaleType type,
374ca955
A
311 UErrorCode* status)
312{
313 if (bi == NULL) {
314 if (U_SUCCESS(*status)) {
315 *status = U_ILLEGAL_ARGUMENT_ERROR;
316 }
317 return NULL;
318 }
319 return ((BreakIterator*)bi)->getLocaleID(type, *status);
320}
321
322
f3c0d7a5
A
323U_CAPI void U_EXPORT2
324ubrk_refreshUText(UBreakIterator *bi,
4388f060
A
325 UText *text,
326 UErrorCode *status)
327{
328 BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
329 bii->refreshInputText(text, *status);
330}
331
f3c0d7a5
A
332U_CAPI int32_t U_EXPORT2
333ubrk_getBinaryRules(UBreakIterator *bi,
334 uint8_t * binaryRules, int32_t rulesCapacity,
335 UErrorCode * status)
336{
337 if (U_FAILURE(*status)) {
338 return 0;
339 }
340 if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
341 *status = U_ILLEGAL_ARGUMENT_ERROR;
342 return 0;
343 }
344 RuleBasedBreakIterator* rbbi;
345 if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
346 *status = U_ILLEGAL_ARGUMENT_ERROR;
347 return 0;
348 }
349 uint32_t rulesLength;
350 const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
351 if (rulesLength > INT32_MAX) {
352 *status = U_INDEX_OUTOFBOUNDS_ERROR;
353 return 0;
354 }
355 if (binaryRules != NULL) { // if not preflighting
356 // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
357 if ((int32_t)rulesLength > rulesCapacity) {
358 *status = U_BUFFER_OVERFLOW_ERROR;
359 } else {
360 uprv_memcpy(binaryRules, returnedRules, rulesLength);
361 }
362 }
363 return (int32_t)rulesLength;
364}
4388f060
A
365
366
b75a7d8f 367#endif /* #if !UCONFIG_NO_BREAK_ITERATION */