]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
73c04bcf | 2 | ****************************************************************************** |
4388f060 | 3 | * Copyright (C) 1996-2011, International Business Machines Corporation and others. |
73c04bcf A |
4 | * All Rights Reserved. |
5 | ****************************************************************************** | |
b75a7d8f A |
6 | */ |
7 | ||
8 | #ifndef UBRK_H | |
9 | #define UBRK_H | |
10 | ||
11 | #include "unicode/utypes.h" | |
374ca955 | 12 | #include "unicode/uloc.h" |
73c04bcf | 13 | #include "unicode/utext.h" |
729e4ab9 | 14 | #include "unicode/localpointer.h" |
b75a7d8f A |
15 | |
16 | /** | |
17 | * A text-break iterator. | |
18 | * For usage in C programs. | |
19 | */ | |
20 | #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR | |
21 | # define UBRK_TYPEDEF_UBREAK_ITERATOR | |
22 | /** | |
23 | * Opaque type representing an ICU Break iterator object. | |
24 | * @stable ICU 2.0 | |
25 | */ | |
729e4ab9 | 26 | typedef struct UBreakIterator UBreakIterator; |
b75a7d8f A |
27 | #endif |
28 | ||
29 | #if !UCONFIG_NO_BREAK_ITERATION | |
30 | ||
31 | #include "unicode/parseerr.h" | |
32 | ||
33 | /** | |
34 | * \file | |
35 | * \brief C API: BreakIterator | |
36 | * | |
37 | * <h2> BreakIterator C API </h2> | |
38 | * | |
39 | * The BreakIterator C API defines methods for finding the location | |
40 | * of boundaries in text. Pointer to a UBreakIterator maintain a | |
41 | * current position and scan over text returning the index of characters | |
42 | * where boundaries occur. | |
73c04bcf | 43 | * <p> |
b75a7d8f A |
44 | * Line boundary analysis determines where a text string can be broken |
45 | * when line-wrapping. The mechanism correctly handles punctuation and | |
46 | * hyphenated words. | |
73c04bcf | 47 | * <p> |
b75a7d8f A |
48 | * Sentence boundary analysis allows selection with correct |
49 | * interpretation of periods within numbers and abbreviations, and | |
50 | * trailing punctuation marks such as quotation marks and parentheses. | |
73c04bcf | 51 | * <p> |
b75a7d8f A |
52 | * Word boundary analysis is used by search and replace functions, as |
53 | * well as within text editing applications that allow the user to | |
54 | * select words with a double click. Word selection provides correct | |
55 | * interpretation of punctuation marks within and following | |
56 | * words. Characters that are not part of a word, such as symbols or | |
57 | * punctuation marks, have word-breaks on both sides. | |
73c04bcf | 58 | * <p> |
729e4ab9 A |
59 | * Character boundary analysis identifies the boundaries of |
60 | * "Extended Grapheme Clusters", which are groupings of codepoints | |
61 | * that should be treated as character-like units for many text operations. | |
62 | * Please see Unicode Standard Annex #29, Unicode Text Segmentation, | |
63 | * http://www.unicode.org/reports/tr29/ for additional information | |
64 | * on grapheme clusters and guidelines on their use. | |
73c04bcf | 65 | * <p> |
b75a7d8f A |
66 | * Title boundary analysis locates all positions, |
67 | * typically starts of words, that should be set to Title Case | |
68 | * when title casing the text. | |
73c04bcf A |
69 | * <p> |
70 | * The text boundary positions are found according to the rules | |
71 | * described in Unicode Standard Annex #29, Text Boundaries, and | |
72 | * Unicode Standard Annex #14, Line Breaking Properties. These | |
73 | * are available at http://www.unicode.org/reports/tr14/ and | |
74 | * http://www.unicode.org/reports/tr29/. | |
75 | * <p> | |
76 | * In addition to the plain C API defined in this header file, an | |
77 | * object oriented C++ API with equivalent functionality is defined in the | |
78 | * file brkiter.h. | |
79 | * <p> | |
729e4ab9 | 80 | * Code snippets illustrating the use of the Break Iterator APIs |
46f4442e A |
81 | * are available in the ICU User Guide, |
82 | * http://icu-project.org/userguide/boundaryAnalysis.html | |
729e4ab9 | 83 | * and in the sample program icu/source/samples/break/break.cpp |
b75a7d8f A |
84 | */ |
85 | ||
86 | /** The possible types of text boundaries. @stable ICU 2.0 */ | |
87 | typedef enum UBreakIteratorType { | |
88 | /** Character breaks @stable ICU 2.0 */ | |
73c04bcf | 89 | UBRK_CHARACTER = 0, |
b75a7d8f | 90 | /** Word breaks @stable ICU 2.0 */ |
73c04bcf | 91 | UBRK_WORD = 1, |
b75a7d8f | 92 | /** Line breaks @stable ICU 2.0 */ |
73c04bcf | 93 | UBRK_LINE = 2, |
b75a7d8f | 94 | /** Sentence breaks @stable ICU 2.0 */ |
73c04bcf | 95 | UBRK_SENTENCE = 3, |
374ca955 A |
96 | |
97 | #ifndef U_HIDE_DEPRECATED_API | |
46f4442e A |
98 | /** |
99 | * Title Case breaks | |
100 | * The iterator created using this type locates title boundaries as described for | |
b75a7d8f | 101 | * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, |
374ca955 | 102 | * please use Word Boundary iterator. |
b75a7d8f | 103 | * |
374ca955 | 104 | * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. |
b75a7d8f | 105 | */ |
73c04bcf | 106 | UBRK_TITLE = 4, |
374ca955 | 107 | #endif /* U_HIDE_DEPRECATED_API */ |
73c04bcf | 108 | UBRK_COUNT = 5 |
b75a7d8f A |
109 | } UBreakIteratorType; |
110 | ||
111 | /** Value indicating all text boundaries have been returned. | |
46f4442e | 112 | * @stable ICU 2.0 |
b75a7d8f A |
113 | */ |
114 | #define UBRK_DONE ((int32_t) -1) | |
115 | ||
116 | ||
117 | /** | |
118 | * Enum constants for the word break tags returned by | |
119 | * getRuleStatus(). A range of values is defined for each category of | |
120 | * word, to allow for further subdivisions of a category in future releases. | |
121 | * Applications should check for tag values falling within the range, rather | |
122 | * than for single individual values. | |
374ca955 | 123 | * @stable ICU 2.2 |
b75a7d8f A |
124 | */ |
125 | typedef enum UWordBreak { | |
46f4442e | 126 | /** Tag value for "words" that do not fit into any of other categories. |
b75a7d8f A |
127 | * Includes spaces and most punctuation. */ |
128 | UBRK_WORD_NONE = 0, | |
129 | /** Upper bound for tags for uncategorized words. */ | |
130 | UBRK_WORD_NONE_LIMIT = 100, | |
131 | /** Tag value for words that appear to be numbers, lower limit. */ | |
132 | UBRK_WORD_NUMBER = 100, | |
133 | /** Tag value for words that appear to be numbers, upper limit. */ | |
134 | UBRK_WORD_NUMBER_LIMIT = 200, | |
135 | /** Tag value for words that contain letters, excluding | |
136 | * hiragana, katakana or ideographic characters, lower limit. */ | |
137 | UBRK_WORD_LETTER = 200, | |
138 | /** Tag value for words containing letters, upper limit */ | |
139 | UBRK_WORD_LETTER_LIMIT = 300, | |
140 | /** Tag value for words containing kana characters, lower limit */ | |
141 | UBRK_WORD_KANA = 300, | |
142 | /** Tag value for words containing kana characters, upper limit */ | |
143 | UBRK_WORD_KANA_LIMIT = 400, | |
144 | /** Tag value for words containing ideographic characters, lower limit */ | |
145 | UBRK_WORD_IDEO = 400, | |
146 | /** Tag value for words containing ideographic characters, upper limit */ | |
147 | UBRK_WORD_IDEO_LIMIT = 500 | |
148 | } UWordBreak; | |
149 | ||
374ca955 A |
150 | /** |
151 | * Enum constants for the line break tags returned by getRuleStatus(). | |
152 | * A range of values is defined for each category of | |
153 | * word, to allow for further subdivisions of a category in future releases. | |
154 | * Applications should check for tag values falling within the range, rather | |
155 | * than for single individual values. | |
73c04bcf | 156 | * @stable ICU 2.8 |
374ca955 A |
157 | */ |
158 | typedef enum ULineBreakTag { | |
159 | /** Tag value for soft line breaks, positions at which a line break | |
160 | * is acceptable but not required */ | |
161 | UBRK_LINE_SOFT = 0, | |
162 | /** Upper bound for soft line breaks. */ | |
163 | UBRK_LINE_SOFT_LIMIT = 100, | |
164 | /** Tag value for a hard, or mandatory line break */ | |
165 | UBRK_LINE_HARD = 100, | |
166 | /** Upper bound for hard line breaks. */ | |
167 | UBRK_LINE_HARD_LIMIT = 200 | |
168 | } ULineBreakTag; | |
169 | ||
170 | ||
171 | ||
172 | /** | |
173 | * Enum constants for the sentence break tags returned by getRuleStatus(). | |
174 | * A range of values is defined for each category of | |
175 | * sentence, to allow for further subdivisions of a category in future releases. | |
176 | * Applications should check for tag values falling within the range, rather | |
177 | * than for single individual values. | |
73c04bcf | 178 | * @stable ICU 2.8 |
374ca955 A |
179 | */ |
180 | typedef enum USentenceBreakTag { | |
181 | /** Tag value for for sentences ending with a sentence terminator | |
182 | * ('.', '?', '!', etc.) character, possibly followed by a | |
183 | * hard separator (CR, LF, PS, etc.) | |
184 | */ | |
185 | UBRK_SENTENCE_TERM = 0, | |
186 | /** Upper bound for tags for sentences ended by sentence terminators. */ | |
187 | UBRK_SENTENCE_TERM_LIMIT = 100, | |
188 | /** Tag value for for sentences that do not contain an ending | |
46f4442e | 189 | * sentence terminator ('.', '?', '!', etc.) character, but |
374ca955 A |
190 | * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. |
191 | */ | |
192 | UBRK_SENTENCE_SEP = 100, | |
193 | /** Upper bound for tags for sentences ended by a separator. */ | |
194 | UBRK_SENTENCE_SEP_LIMIT = 200 | |
195 | /** Tag value for a hard, or mandatory line break */ | |
196 | } USentenceBreakTag; | |
197 | ||
b75a7d8f A |
198 | |
199 | /** | |
200 | * Open a new UBreakIterator for locating text boundaries for a specified locale. | |
201 | * A UBreakIterator may be used for detecting character, line, word, | |
202 | * and sentence breaks in text. | |
203 | * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, | |
204 | * UBRK_LINE, UBRK_SENTENCE | |
205 | * @param locale The locale specifying the text-breaking conventions. | |
206 | * @param text The text to be iterated over. | |
207 | * @param textLength The number of characters in text, or -1 if null-terminated. | |
208 | * @param status A UErrorCode to receive any errors. | |
209 | * @return A UBreakIterator for the specified locale. | |
210 | * @see ubrk_openRules | |
211 | * @stable ICU 2.0 | |
212 | */ | |
374ca955 | 213 | U_STABLE UBreakIterator* U_EXPORT2 |
b75a7d8f A |
214 | ubrk_open(UBreakIteratorType type, |
215 | const char *locale, | |
216 | const UChar *text, | |
217 | int32_t textLength, | |
218 | UErrorCode *status); | |
219 | ||
220 | /** | |
221 | * Open a new UBreakIterator for locating text boundaries using specified breaking rules. | |
222 | * The rule syntax is ... (TBD) | |
223 | * @param rules A set of rules specifying the text breaking conventions. | |
224 | * @param rulesLength The number of characters in rules, or -1 if null-terminated. | |
225 | * @param text The text to be iterated over. May be null, in which case ubrk_setText() is | |
226 | * used to specify the text to be iterated. | |
227 | * @param textLength The number of characters in text, or -1 if null-terminated. | |
228 | * @param parseErr Receives position and context information for any syntax errors | |
229 | * detected while parsing the rules. | |
230 | * @param status A UErrorCode to receive any errors. | |
231 | * @return A UBreakIterator for the specified rules. | |
232 | * @see ubrk_open | |
374ca955 | 233 | * @stable ICU 2.2 |
b75a7d8f | 234 | */ |
374ca955 | 235 | U_STABLE UBreakIterator* U_EXPORT2 |
b75a7d8f A |
236 | ubrk_openRules(const UChar *rules, |
237 | int32_t rulesLength, | |
238 | const UChar *text, | |
239 | int32_t textLength, | |
240 | UParseError *parseErr, | |
241 | UErrorCode *status); | |
242 | ||
243 | /** | |
244 | * Thread safe cloning operation | |
245 | * @param bi iterator to be cloned | |
246 | * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. | |
247 | * If buffer is not large enough, new memory will be allocated. | |
248 | * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. | |
249 | * @param pBufferSize pointer to size of allocated space. | |
250 | * If *pBufferSize == 0, a sufficient size for use in cloning will | |
251 | * be returned ('pre-flighting') | |
252 | * If *pBufferSize is not enough for a stack-based safe clone, | |
253 | * new memory will be allocated. | |
254 | * @param status to indicate whether the operation went on smoothly or there were errors | |
255 | * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. | |
256 | * @return pointer to the new clone | |
257 | * @stable ICU 2.0 | |
258 | */ | |
374ca955 | 259 | U_STABLE UBreakIterator * U_EXPORT2 |
b75a7d8f A |
260 | ubrk_safeClone( |
261 | const UBreakIterator *bi, | |
262 | void *stackBuffer, | |
263 | int32_t *pBufferSize, | |
264 | UErrorCode *status); | |
265 | ||
266 | /** | |
267 | * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). | |
268 | * @stable ICU 2.0 | |
269 | */ | |
4388f060 | 270 | #define U_BRK_SAFECLONE_BUFFERSIZE 528 |
b75a7d8f A |
271 | |
272 | /** | |
273 | * Close a UBreakIterator. | |
274 | * Once closed, a UBreakIterator may no longer be used. | |
275 | * @param bi The break iterator to close. | |
276 | * @stable ICU 2.0 | |
277 | */ | |
374ca955 | 278 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
279 | ubrk_close(UBreakIterator *bi); |
280 | ||
729e4ab9 A |
281 | #if U_SHOW_CPLUSPLUS_API |
282 | ||
283 | U_NAMESPACE_BEGIN | |
284 | ||
285 | /** | |
286 | * \class LocalUBreakIteratorPointer | |
287 | * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). | |
288 | * For most methods see the LocalPointerBase base class. | |
289 | * | |
290 | * @see LocalPointerBase | |
291 | * @see LocalPointer | |
292 | * @stable ICU 4.4 | |
293 | */ | |
294 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); | |
295 | ||
296 | U_NAMESPACE_END | |
297 | ||
298 | #endif | |
299 | ||
b75a7d8f A |
300 | /** |
301 | * Sets an existing iterator to point to a new piece of text | |
302 | * @param bi The iterator to use | |
303 | * @param text The text to be set | |
304 | * @param textLength The length of the text | |
305 | * @param status The error code | |
306 | * @stable ICU 2.0 | |
307 | */ | |
374ca955 | 308 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
309 | ubrk_setText(UBreakIterator* bi, |
310 | const UChar* text, | |
311 | int32_t textLength, | |
312 | UErrorCode* status); | |
313 | ||
73c04bcf A |
314 | |
315 | /** | |
316 | * Sets an existing iterator to point to a new piece of text | |
317 | * @param bi The iterator to use | |
46f4442e A |
318 | * @param text The text to be set. |
319 | * This function makes a shallow clone of the supplied UText. This means | |
320 | * that the caller is free to immediately close or otherwise reuse the | |
321 | * UText that was passed as a parameter, but that the underlying text itself | |
322 | * must not be altered while being referenced by the break iterator. | |
73c04bcf | 323 | * @param status The error code |
46f4442e | 324 | * @stable ICU 3.4 |
73c04bcf | 325 | */ |
46f4442e | 326 | U_STABLE void U_EXPORT2 |
73c04bcf A |
327 | ubrk_setUText(UBreakIterator* bi, |
328 | UText* text, | |
329 | UErrorCode* status); | |
330 | ||
331 | ||
332 | ||
b75a7d8f A |
333 | /** |
334 | * Determine the most recently-returned text boundary. | |
335 | * | |
336 | * @param bi The break iterator to use. | |
374ca955 A |
337 | * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, |
338 | * \ref ubrk_first, or \ref ubrk_last. | |
b75a7d8f A |
339 | * @stable ICU 2.0 |
340 | */ | |
374ca955 | 341 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
342 | ubrk_current(const UBreakIterator *bi); |
343 | ||
344 | /** | |
345 | * Determine the text boundary following the current text boundary. | |
346 | * | |
347 | * @param bi The break iterator to use. | |
348 | * @return The character index of the next text boundary, or UBRK_DONE | |
349 | * if all text boundaries have been returned. | |
350 | * @see ubrk_previous | |
351 | * @stable ICU 2.0 | |
352 | */ | |
374ca955 | 353 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
354 | ubrk_next(UBreakIterator *bi); |
355 | ||
356 | /** | |
357 | * Determine the text boundary preceding the current text boundary. | |
358 | * | |
359 | * @param bi The break iterator to use. | |
360 | * @return The character index of the preceding text boundary, or UBRK_DONE | |
361 | * if all text boundaries have been returned. | |
362 | * @see ubrk_next | |
363 | * @stable ICU 2.0 | |
364 | */ | |
374ca955 | 365 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
366 | ubrk_previous(UBreakIterator *bi); |
367 | ||
368 | /** | |
369 | * Determine the index of the first character in the text being scanned. | |
370 | * This is not always the same as index 0 of the text. | |
371 | * @param bi The break iterator to use. | |
372 | * @return The character index of the first character in the text being scanned. | |
373 | * @see ubrk_last | |
374 | * @stable ICU 2.0 | |
375 | */ | |
374ca955 | 376 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
377 | ubrk_first(UBreakIterator *bi); |
378 | ||
379 | /** | |
380 | * Determine the index immediately <EM>beyond</EM> the last character in the text being | |
381 | * scanned. | |
382 | * This is not the same as the last character. | |
383 | * @param bi The break iterator to use. | |
384 | * @return The character offset immediately <EM>beyond</EM> the last character in the | |
385 | * text being scanned. | |
386 | * @see ubrk_first | |
387 | * @stable ICU 2.0 | |
388 | */ | |
374ca955 | 389 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
390 | ubrk_last(UBreakIterator *bi); |
391 | ||
392 | /** | |
393 | * Determine the text boundary preceding the specified offset. | |
394 | * The value returned is always smaller than offset, or UBRK_DONE. | |
395 | * @param bi The break iterator to use. | |
396 | * @param offset The offset to begin scanning. | |
397 | * @return The text boundary preceding offset, or UBRK_DONE. | |
398 | * @see ubrk_following | |
399 | * @stable ICU 2.0 | |
400 | */ | |
374ca955 | 401 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
402 | ubrk_preceding(UBreakIterator *bi, |
403 | int32_t offset); | |
404 | ||
405 | /** | |
406 | * Determine the text boundary following the specified offset. | |
407 | * The value returned is always greater than offset, or UBRK_DONE. | |
408 | * @param bi The break iterator to use. | |
409 | * @param offset The offset to begin scanning. | |
410 | * @return The text boundary following offset, or UBRK_DONE. | |
411 | * @see ubrk_preceding | |
412 | * @stable ICU 2.0 | |
413 | */ | |
374ca955 | 414 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
415 | ubrk_following(UBreakIterator *bi, |
416 | int32_t offset); | |
417 | ||
418 | /** | |
419 | * Get a locale for which text breaking information is available. | |
420 | * A UBreakIterator in a locale returned by this function will perform the correct | |
421 | * text breaking for the locale. | |
422 | * @param index The index of the desired locale. | |
423 | * @return A locale for which number text breaking information is available, or 0 if none. | |
424 | * @see ubrk_countAvailable | |
425 | * @stable ICU 2.0 | |
426 | */ | |
374ca955 | 427 | U_STABLE const char* U_EXPORT2 |
b75a7d8f A |
428 | ubrk_getAvailable(int32_t index); |
429 | ||
430 | /** | |
431 | * Determine how many locales have text breaking information available. | |
432 | * This function is most useful as determining the loop ending condition for | |
374ca955 | 433 | * calls to \ref ubrk_getAvailable. |
b75a7d8f A |
434 | * @return The number of locales for which text breaking information is available. |
435 | * @see ubrk_getAvailable | |
436 | * @stable ICU 2.0 | |
437 | */ | |
374ca955 | 438 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
439 | ubrk_countAvailable(void); |
440 | ||
441 | ||
442 | /** | |
443 | * Returns true if the specfied position is a boundary position. As a side | |
444 | * effect, leaves the iterator pointing to the first boundary position at | |
445 | * or after "offset". | |
446 | * @param bi The break iterator to use. | |
447 | * @param offset the offset to check. | |
448 | * @return True if "offset" is a boundary position. | |
449 | * @stable ICU 2.0 | |
450 | */ | |
374ca955 | 451 | U_STABLE UBool U_EXPORT2 |
b75a7d8f A |
452 | ubrk_isBoundary(UBreakIterator *bi, int32_t offset); |
453 | ||
454 | /** | |
455 | * Return the status from the break rule that determined the most recently | |
456 | * returned break position. The values appear in the rule source | |
457 | * within brackets, {123}, for example. For rules that do not specify a | |
458 | * status, a default value of 0 is returned. | |
459 | * <p> | |
460 | * For word break iterators, the possible values are defined in enum UWordBreak. | |
374ca955 | 461 | * @stable ICU 2.2 |
b75a7d8f | 462 | */ |
374ca955 | 463 | U_STABLE int32_t U_EXPORT2 |
b75a7d8f A |
464 | ubrk_getRuleStatus(UBreakIterator *bi); |
465 | ||
374ca955 A |
466 | /** |
467 | * Get the statuses from the break rules that determined the most recently | |
468 | * returned break position. The values appear in the rule source | |
469 | * within brackets, {123}, for example. The default status value for rules | |
470 | * that do not explicitly provide one is zero. | |
471 | * <p> | |
472 | * For word break iterators, the possible values are defined in enum UWordBreak. | |
473 | * @param bi The break iterator to use | |
46f4442e | 474 | * @param fillInVec an array to be filled in with the status values. |
374ca955 A |
475 | * @param capacity the length of the supplied vector. A length of zero causes |
476 | * the function to return the number of status values, in the | |
477 | * normal way, without attemtping to store any values. | |
46f4442e A |
478 | * @param status receives error codes. |
479 | * @return The number of rule status values from rules that determined | |
374ca955 | 480 | * the most recent boundary returned by the break iterator. |
73c04bcf | 481 | * @stable ICU 3.0 |
374ca955 | 482 | */ |
73c04bcf | 483 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
484 | ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status); |
485 | ||
486 | /** | |
487 | * Return the locale of the break iterator. You can choose between the valid and | |
488 | * the actual locale. | |
489 | * @param bi break iterator | |
490 | * @param type locale type (valid or actual) | |
491 | * @param status error code | |
492 | * @return locale string | |
73c04bcf | 493 | * @stable ICU 2.8 |
374ca955 | 494 | */ |
73c04bcf | 495 | U_STABLE const char* U_EXPORT2 |
374ca955 A |
496 | ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status); |
497 | ||
4388f060 A |
498 | #ifndef U_HIDE_DRAFT_API |
499 | /** | |
500 | * Set the subject text string upon which the break iterator is operating | |
501 | * without changing any other aspect of the state. | |
502 | * The new and previous text strings must have the same content. | |
503 | * | |
504 | * This function is intended for use in environments where ICU is operating on | |
505 | * strings that may move around in memory. It provides a mechanism for notifying | |
506 | * ICU that the string has been relocated, and providing a new UText to access the | |
507 | * string in its new position. | |
508 | * | |
509 | * Note that the break iterator never copies the underlying text | |
510 | * of a string being processed, but always operates directly on the original text | |
511 | * provided by the user. Refreshing simply drops the references to the old text | |
512 | * and replaces them with references to the new. | |
513 | * | |
514 | * Caution: this function is normally used only by very specialized | |
515 | * system-level code. One example use case is with garbage collection | |
516 | * that moves the text in memory. | |
517 | * | |
518 | * @param bi The break iterator. | |
519 | * @param text The new (moved) text string. | |
520 | * @param status Receives errors detected by this function. | |
521 | * | |
522 | * @draft ICU 49 | |
523 | */ | |
524 | U_DRAFT void U_EXPORT2 | |
525 | ubrk_refreshUText(UBreakIterator *bi, | |
526 | UText *text, | |
527 | UErrorCode *status); | |
528 | #endif /* U_HIDE_DRAFT_API */ | |
374ca955 | 529 | |
b75a7d8f A |
530 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
531 | ||
532 | #endif |