]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/brkiter.h
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / brkiter.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4********************************************************************************
2ca993e8 5* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*
9* File brkiter.h
10*
11* Modification History:
12*
13* Date Name Description
14* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15* 05/07/97 aliu Fixed DLL declaration.
16* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17* 08/11/98 helena Sync-up JDK1.2.
18* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19********************************************************************************
20*/
21
22#ifndef BRKITER_H
23#define BRKITER_H
24
25#include "unicode/utypes.h"
26
73c04bcf
A
27/**
28 * \file
29 * \brief C++ API: Break Iterator.
30 */
46f4442e 31
b75a7d8f
A
32#if UCONFIG_NO_BREAK_ITERATION
33
f3c0d7a5 34#if U_SHOW_CPLUSPLUS_API
b75a7d8f
A
35U_NAMESPACE_BEGIN
36
37/*
38 * Allow the declaration of APIs with pointers to BreakIterator
39 * even when break iteration is removed from the build.
40 */
41class BreakIterator;
42
43U_NAMESPACE_END
f3c0d7a5 44#endif // U_SHOW_CPLUSPLUS_API
b75a7d8f
A
45
46#else
47
48#include "unicode/uobject.h"
49#include "unicode/unistr.h"
50#include "unicode/chariter.h"
51#include "unicode/locid.h"
52#include "unicode/ubrk.h"
53#include "unicode/strenum.h"
73c04bcf
A
54#include "unicode/utext.h"
55#include "unicode/umisc.h"
b75a7d8f 56
f3c0d7a5 57#if U_SHOW_CPLUSPLUS_API
b75a7d8f
A
58U_NAMESPACE_BEGIN
59
b75a7d8f
A
60/**
61 * The BreakIterator class implements methods for finding the location
62 * of boundaries in text. BreakIterator is an abstract base class.
63 * Instances of BreakIterator maintain a current position and scan over
64 * text returning the index of characters where boundaries occur.
73c04bcf 65 * <p>
b75a7d8f
A
66 * Line boundary analysis determines where a text string can be broken
67 * when line-wrapping. The mechanism correctly handles punctuation and
68 * hyphenated words.
73c04bcf 69 * <p>
b75a7d8f
A
70 * Sentence boundary analysis allows selection with correct
71 * interpretation of periods within numbers and abbreviations, and
72 * trailing punctuation marks such as quotation marks and parentheses.
73c04bcf 73 * <p>
b75a7d8f
A
74 * Word boundary analysis is used by search and replace functions, as
75 * well as within text editing applications that allow the user to
76 * select words with a double click. Word selection provides correct
77 * interpretation of punctuation marks within and following
78 * words. Characters that are not part of a word, such as symbols or
79 * punctuation marks, have word-breaks on both sides.
73c04bcf 80 * <p>
b75a7d8f
A
81 * Character boundary analysis allows users to interact with
82 * characters as they expect to, for example, when moving the cursor
83 * through a text string. Character boundary analysis provides correct
84 * navigation of through character strings, regardless of how the
85 * character is stored. For example, an accented character might be
86 * stored as a base character and a diacritical mark. What users
87 * consider to be a character can differ between languages.
73c04bcf
A
88 * <p>
89 * The text boundary positions are found according to the rules
90 * described in Unicode Standard Annex #29, Text Boundaries, and
91 * Unicode Standard Annex #14, Line Breaking Properties. These
92 * are available at http://www.unicode.org/reports/tr14/ and
93 * http://www.unicode.org/reports/tr29/.
94 * <p>
95 * In addition to the C++ API defined in this header file, a
96 * plain C API with equivalent functionality is defined in the
97 * file ubrk.h
98 * <p>
729e4ab9 99 * Code snippets illustrating the use of the Break Iterator APIs
46f4442e
A
100 * are available in the ICU User Guide,
101 * http://icu-project.org/userguide/boundaryAnalysis.html
729e4ab9 102 * and in the sample program icu/source/samples/break/break.cpp
b75a7d8f 103 *
b75a7d8f
A
104 */
105class U_COMMON_API BreakIterator : public UObject {
106public:
107 /**
108 * destructor
109 * @stable ICU 2.0
110 */
111 virtual ~BreakIterator();
112
113 /**
114 * Return true if another object is semantically equal to this
115 * one. The other object should be an instance of the same subclass of
116 * BreakIterator. Objects of different subclasses are considered
117 * unequal.
118 * <P>
119 * Return true if this BreakIterator is at the same position in the
120 * same text, and is the same class and type (word, line, etc.) of
121 * BreakIterator, as the argument. Text is considered the same if
122 * it contains the same characters, it need not be the same
123 * object, and styles are not considered.
124 * @stable ICU 2.0
125 */
126 virtual UBool operator==(const BreakIterator&) const = 0;
127
128 /**
129 * Returns the complement of the result of operator==
130 * @param rhs The BreakIterator to be compared for inequality
131 * @return the complement of the result of operator==
132 * @stable ICU 2.0
133 */
134 UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
135
136 /**
137 * Return a polymorphic copy of this object. This is an abstract
138 * method which subclasses implement.
139 * @stable ICU 2.0
140 */
141 virtual BreakIterator* clone(void) const = 0;
142
143 /**
144 * Return a polymorphic class ID for this object. Different subclasses
145 * will return distinct unequal values.
146 * @stable ICU 2.0
147 */
148 virtual UClassID getDynamicClassID(void) const = 0;
149
150 /**
151 * Return a CharacterIterator over the text being analyzed.
b75a7d8f
A
152 * @stable ICU 2.0
153 */
73c04bcf
A
154 virtual CharacterIterator& getText(void) const = 0;
155
156
157 /**
158 * Get a UText for the text being analyzed.
159 * The returned UText is a shallow clone of the UText used internally
160 * by the break iterator implementation. It can safely be used to
161 * access the text without impacting any break iterator operations,
162 * but the underlying text itself must not be altered.
163 *
164 * @param fillIn A UText to be filled in. If NULL, a new UText will be
165 * allocated to hold the result.
166 * @param status receives any error codes.
167 * @return The current UText for this break iterator. If an input
168 * UText was provided, it will always be returned.
46f4442e 169 * @stable ICU 3.4
73c04bcf
A
170 */
171 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
b75a7d8f
A
172
173 /**
174 * Change the text over which this operates. The text boundary is
175 * reset to the start.
f3c0d7a5
A
176 *
177 * The BreakIterator will retain a reference to the supplied string.
178 * The caller must not modify or delete the text while the BreakIterator
179 * retains the reference.
180 *
b75a7d8f
A
181 * @param text The UnicodeString used to change the text.
182 * @stable ICU 2.0
183 */
184 virtual void setText(const UnicodeString &text) = 0;
185
73c04bcf 186 /**
46f4442e 187 * Reset the break iterator to operate over the text represented by
73c04bcf
A
188 * the UText. The iterator position is reset to the start.
189 *
190 * This function makes a shallow clone of the supplied UText. This means
191 * that the caller is free to immediately close or otherwise reuse the
192 * Utext that was passed as a parameter, but that the underlying text itself
193 * must not be altered while being referenced by the break iterator.
194 *
51004dcb
A
195 * All index positions returned by break iterator functions are
196 * native indices from the UText. For example, when breaking UTF-8
197 * encoded text, the break positions returned by next(), previous(), etc.
198 * will be UTF-8 string indices, not UTF-16 positions.
199 *
73c04bcf
A
200 * @param text The UText used to change the text.
201 * @param status receives any error codes.
46f4442e 202 * @stable ICU 3.4
73c04bcf
A
203 */
204 virtual void setText(UText *text, UErrorCode &status) = 0;
205
b75a7d8f
A
206 /**
207 * Change the text over which this operates. The text boundary is
208 * reset to the start.
73c04bcf
A
209 * Note that setText(UText *) provides similar functionality to this function,
210 * and is more efficient.
b75a7d8f
A
211 * @param it The CharacterIterator used to change the text.
212 * @stable ICU 2.0
213 */
214 virtual void adoptText(CharacterIterator* it) = 0;
215
73c04bcf
A
216 enum {
217 /**
218 * DONE is returned by previous() and next() after all valid
219 * boundaries have been returned.
220 * @stable ICU 2.0
221 */
222 DONE = (int32_t)-1
223 };
b75a7d8f
A
224
225 /**
b331163b
A
226 * Sets the current iteration position to the beginning of the text, position zero.
227 * @return The offset of the beginning of the text, zero.
b75a7d8f
A
228 * @stable ICU 2.0
229 */
230 virtual int32_t first(void) = 0;
231
232 /**
51004dcb
A
233 * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
234 * @return The index immediately BEYOND the last character in the text being scanned.
b75a7d8f
A
235 * @stable ICU 2.0
236 */
237 virtual int32_t last(void) = 0;
238
239 /**
51004dcb 240 * Set the iterator position to the boundary preceding the current boundary.
b75a7d8f
A
241 * @return The character index of the previous text boundary or DONE if all
242 * boundaries have been returned.
243 * @stable ICU 2.0
244 */
245 virtual int32_t previous(void) = 0;
246
247 /**
51004dcb 248 * Advance the iterator to the boundary following the current boundary.
b75a7d8f
A
249 * @return The character index of the next text boundary or DONE if all
250 * boundaries have been returned.
251 * @stable ICU 2.0
252 */
253 virtual int32_t next(void) = 0;
254
255 /**
0f5d89e8 256 * Return character index of the current iterator position within the text.
b75a7d8f
A
257 * @return The boundary most recently returned.
258 * @stable ICU 2.0
259 */
260 virtual int32_t current(void) const = 0;
261
262 /**
51004dcb 263 * Advance the iterator to the first boundary following the specified offset.
b75a7d8f
A
264 * The value returned is always greater than the offset or
265 * the value BreakIterator.DONE
266 * @param offset the offset to begin scanning.
267 * @return The first boundary after the specified offset.
268 * @stable ICU 2.0
269 */
270 virtual int32_t following(int32_t offset) = 0;
271
272 /**
51004dcb 273 * Set the iterator position to the first boundary preceding the specified offset.
b75a7d8f
A
274 * The value returned is always smaller than the offset or
275 * the value BreakIterator.DONE
276 * @param offset the offset to begin scanning.
277 * @return The first boundary before the specified offset.
278 * @stable ICU 2.0
279 */
280 virtual int32_t preceding(int32_t offset) = 0;
281
282 /**
0f5d89e8 283 * Return true if the specified position is a boundary position.
b75a7d8f
A
284 * As a side effect, the current position of the iterator is set
285 * to the first boundary position at or following the specified offset.
286 * @param offset the offset to check.
287 * @return True if "offset" is a boundary position.
288 * @stable ICU 2.0
289 */
290 virtual UBool isBoundary(int32_t offset) = 0;
291
292 /**
51004dcb
A
293 * Set the iterator position to the nth boundary from the current boundary
294 * @param n the number of boundaries to move by. A value of 0
b75a7d8f
A
295 * does nothing. Negative values move to previous boundaries
296 * and positive values move to later boundaries.
51004dcb 297 * @return The new iterator position, or
0f5d89e8 298 * DONE if there are fewer than |n| boundaries in the specified direction.
b75a7d8f
A
299 * @stable ICU 2.0
300 */
301 virtual int32_t next(int32_t n) = 0;
302
57a6839d 303 /**
0f5d89e8
A
304 * For RuleBasedBreakIterators, return the status tag from the break rule
305 * that determined the boundary at the current iteration position.
57a6839d
A
306 * <p>
307 * For break iterator types that do not support a rule status,
308 * a default value of 0 is returned.
309 * <p>
0f5d89e8
A
310 * @return the status from the break rule that determined the boundary at
311 * the current iteration position.
57a6839d
A
312 * @see RuleBaseBreakIterator::getRuleStatus()
313 * @see UWordBreak
b331163b 314 * @stable ICU 52
57a6839d
A
315 */
316 virtual int32_t getRuleStatus() const;
317
318 /**
2ca993e8 319 * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
0f5d89e8 320 * that determined the boundary at the current iteration position.
57a6839d
A
321 * <p>
322 * For break iterator types that do not support rule status,
323 * no values are returned.
324 * <p>
325 * The returned status value(s) are stored into an array provided by the caller.
326 * The values are stored in sorted (ascending) order.
327 * If the capacity of the output array is insufficient to hold the data,
328 * the output will be truncated to the available length, and a
329 * U_BUFFER_OVERFLOW_ERROR will be signaled.
330 * <p>
331 * @see RuleBaseBreakIterator::getRuleStatusVec
332 *
333 * @param fillInVec an array to be filled in with the status values.
334 * @param capacity the length of the supplied vector. A length of zero causes
335 * the function to return the number of status values, in the
0f5d89e8 336 * normal way, without attempting to store any values.
57a6839d
A
337 * @param status receives error codes.
338 * @return The number of rule status values from rules that determined
0f5d89e8 339 * the boundary at the current iteration position.
57a6839d
A
340 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
341 * is the total number of status values that were available,
342 * not the reduced number that were actually returned.
343 * @see getRuleStatus
b331163b 344 * @stable ICU 52
57a6839d
A
345 */
346 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
347
b75a7d8f
A
348 /**
349 * Create BreakIterator for word-breaks using the given locale.
350 * Returns an instance of a BreakIterator implementing word breaks.
351 * WordBreak is useful for word selection (ex. double click)
352 * @param where the locale.
353 * @param status the error code
354 * @return A BreakIterator for word-breaks. The UErrorCode& status
355 * parameter is used to return status information to the user.
356 * To check whether the construction succeeded or not, you should check
357 * the value of U_SUCCESS(err). If you wish more detailed information, you
358 * can check for informational error results which still indicate success.
359 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
360 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
361 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
362 * used; neither the requested locale nor any of its fall back locales
363 * could be found.
364 * The caller owns the returned object and is responsible for deleting it.
365 * @stable ICU 2.0
366 */
374ca955
A
367 static BreakIterator* U_EXPORT2
368 createWordInstance(const Locale& where, UErrorCode& status);
b75a7d8f
A
369
370 /**
371 * Create BreakIterator for line-breaks using specified locale.
372 * Returns an instance of a BreakIterator implementing line breaks. Line
373 * breaks are logically possible line breaks, actual line breaks are
374 * usually determined based on display width.
375 * LineBreak is useful for word wrapping text.
376 * @param where the locale.
377 * @param status The error code.
378 * @return A BreakIterator for line-breaks. The UErrorCode& status
379 * parameter is used to return status information to the user.
380 * To check whether the construction succeeded or not, you should check
381 * the value of U_SUCCESS(err). If you wish more detailed information, you
382 * can check for informational error results which still indicate success.
383 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
384 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
385 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
386 * used; neither the requested locale nor any of its fall back locales
387 * could be found.
388 * The caller owns the returned object and is responsible for deleting it.
389 * @stable ICU 2.0
390 */
374ca955
A
391 static BreakIterator* U_EXPORT2
392 createLineInstance(const Locale& where, UErrorCode& status);
b75a7d8f
A
393
394 /**
395 * Create BreakIterator for character-breaks using specified locale
396 * Returns an instance of a BreakIterator implementing character breaks.
397 * Character breaks are boundaries of combining character sequences.
398 * @param where the locale.
399 * @param status The error code.
400 * @return A BreakIterator for character-breaks. The UErrorCode& status
401 * parameter is used to return status information to the user.
402 * To check whether the construction succeeded or not, you should check
403 * the value of U_SUCCESS(err). If you wish more detailed information, you
404 * can check for informational error results which still indicate success.
405 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
406 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
407 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
408 * used; neither the requested locale nor any of its fall back locales
409 * could be found.
410 * The caller owns the returned object and is responsible for deleting it.
411 * @stable ICU 2.0
412 */
374ca955
A
413 static BreakIterator* U_EXPORT2
414 createCharacterInstance(const Locale& where, UErrorCode& status);
b75a7d8f
A
415
416 /**
417 * Create BreakIterator for sentence-breaks using specified locale
418 * Returns an instance of a BreakIterator implementing sentence breaks.
419 * @param where the locale.
420 * @param status The error code.
421 * @return A BreakIterator for sentence-breaks. The UErrorCode& status
422 * parameter is used to return status information to the user.
423 * To check whether the construction succeeded or not, you should check
424 * the value of U_SUCCESS(err). If you wish more detailed information, you
425 * can check for informational error results which still indicate success.
426 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
427 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
428 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
429 * used; neither the requested locale nor any of its fall back locales
430 * could be found.
431 * The caller owns the returned object and is responsible for deleting it.
432 * @stable ICU 2.0
433 */
374ca955
A
434 static BreakIterator* U_EXPORT2
435 createSentenceInstance(const Locale& where, UErrorCode& status);
b75a7d8f 436
3d1f044b 437#ifndef U_HIDE_DEPRECATED_API
b75a7d8f
A
438 /**
439 * Create BreakIterator for title-casing breaks using the specified locale
440 * Returns an instance of a BreakIterator implementing title breaks.
374ca955 441 * The iterator returned locates title boundaries as described for
b75a7d8f 442 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
3d1f044b 443 * please use a word boundary iterator. See {@link #createWordInstance }.
b75a7d8f
A
444 *
445 * @param where the locale.
446 * @param status The error code.
447 * @return A BreakIterator for title-breaks. The UErrorCode& status
448 * parameter is used to return status information to the user.
449 * To check whether the construction succeeded or not, you should check
450 * the value of U_SUCCESS(err). If you wish more detailed information, you
451 * can check for informational error results which still indicate success.
452 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
453 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
454 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
455 * used; neither the requested locale nor any of its fall back locales
456 * could be found.
457 * The caller owns the returned object and is responsible for deleting it.
3d1f044b 458 * @deprecated ICU 64 Use createWordInstance instead.
b75a7d8f 459 */
374ca955
A
460 static BreakIterator* U_EXPORT2
461 createTitleInstance(const Locale& where, UErrorCode& status);
3d1f044b 462#endif /* U_HIDE_DEPRECATED_API */
b75a7d8f
A
463
464 /**
465 * Get the set of Locales for which TextBoundaries are installed.
466 * <p><b>Note:</b> this will not return locales added through the register
374ca955
A
467 * call. To see the registered locales too, use the getAvailableLocales
468 * function that returns a StringEnumeration object </p>
b75a7d8f
A
469 * @param count the output parameter of number of elements in the locale list
470 * @return available locales
471 * @stable ICU 2.0
472 */
374ca955 473 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
b75a7d8f
A
474
475 /**
0f5d89e8 476 * Get name of the object for the desired Locale, in the desired language.
b75a7d8f
A
477 * @param objectLocale must be from getAvailableLocales.
478 * @param displayLocale specifies the desired locale for output.
479 * @param name the fill-in parameter of the return value
480 * Uses best match.
481 * @return user-displayable name
482 * @stable ICU 2.0
483 */
374ca955 484 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
b75a7d8f
A
485 const Locale& displayLocale,
486 UnicodeString& name);
487
488 /**
0f5d89e8 489 * Get name of the object for the desired Locale, in the language of the
b75a7d8f
A
490 * default locale.
491 * @param objectLocale must be from getMatchingLocales
492 * @param name the fill-in parameter of the return value
493 * @return user-displayable name
494 * @stable ICU 2.0
495 */
374ca955 496 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
b75a7d8f
A
497 UnicodeString& name);
498
499 /**
57a6839d
A
500 * Deprecated functionality. Use clone() instead.
501 *
b75a7d8f
A
502 * Thread safe client-buffer-based cloning operation
503 * Do NOT call delete on a safeclone, since 'new' is not used to create it.
504 * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
505 * If buffer is not large enough, new memory will be allocated.
506 * @param BufferSize reference to size of allocated space.
507 * If BufferSize == 0, a sufficient size for use in cloning will
508 * be returned ('pre-flighting')
509 * If BufferSize is not enough for a stack-based safe clone,
510 * new memory will be allocated.
511 * @param status to indicate whether the operation went on smoothly or there were errors
512 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
513 * necessary.
514 * @return pointer to the new clone
515 *
57a6839d 516 * @deprecated ICU 52. Use clone() instead.
b75a7d8f
A
517 */
518 virtual BreakIterator * createBufferClone(void *stackBuffer,
519 int32_t &BufferSize,
520 UErrorCode &status) = 0;
521
57a6839d
A
522#ifndef U_HIDE_DEPRECATED_API
523
b75a7d8f
A
524 /**
525 * Determine whether the BreakIterator was created in user memory by
526 * createBufferClone(), and thus should not be deleted. Such objects
527 * must be closed by an explicit call to the destructor (not delete).
57a6839d 528 * @deprecated ICU 52. Always delete the BreakIterator.
b75a7d8f
A
529 */
530 inline UBool isBufferClone(void);
531
57a6839d
A
532#endif /* U_HIDE_DEPRECATED_API */
533
374ca955 534#if !UCONFIG_NO_SERVICE
b75a7d8f
A
535 /**
536 * Register a new break iterator of the indicated kind, to use in the given locale.
374ca955 537 * The break iterator will be adopted. Clones of the iterator will be returned
b75a7d8f
A
538 * if a request for a break iterator of the given kind matches or falls back to
539 * this locale.
57a6839d
A
540 * Because ICU may choose to cache BreakIterators internally, this must
541 * be called at application startup, prior to any calls to
542 * BreakIterator::createXXXInstance to avoid undefined behavior.
b75a7d8f
A
543 * @param toAdopt the BreakIterator instance to be adopted
544 * @param locale the Locale for which this instance is to be registered
545 * @param kind the type of iterator for which this instance is to be registered
546 * @param status the in/out status code, no special meanings are assigned
547 * @return a registry key that can be used to unregister this instance
374ca955 548 * @stable ICU 2.4
b75a7d8f 549 */
374ca955
A
550 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
551 const Locale& locale,
552 UBreakIteratorType kind,
553 UErrorCode& status);
b75a7d8f
A
554
555 /**
556 * Unregister a previously-registered BreakIterator using the key returned from the
557 * register call. Key becomes invalid after a successful call and should not be used again.
558 * The BreakIterator corresponding to the key will be deleted.
57a6839d
A
559 * Because ICU may choose to cache BreakIterators internally, this should
560 * be called during application shutdown, after all calls to
561 * BreakIterator::createXXXInstance to avoid undefined behavior.
b75a7d8f
A
562 * @param key the registry key returned by a previous call to registerInstance
563 * @param status the in/out status code, no special meanings are assigned
564 * @return TRUE if the iterator for the key was successfully unregistered
374ca955 565 * @stable ICU 2.4
b75a7d8f 566 */
374ca955 567 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
b75a7d8f
A
568
569 /**
374ca955 570 * Return a StringEnumeration over the locales available at the time of the call,
b75a7d8f
A
571 * including registered locales.
572 * @return a StringEnumeration over the locales available at the time of the call
374ca955 573 * @stable ICU 2.4
b75a7d8f 574 */
374ca955
A
575 static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
576#endif
b75a7d8f 577
374ca955
A
578 /**
579 * Returns the locale for this break iterator. Two flavors are available: valid and
580 * actual locale.
73c04bcf 581 * @stable ICU 2.8
374ca955
A
582 */
583 Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
b75a7d8f 584
4388f060 585#ifndef U_HIDE_INTERNAL_API
374ca955
A
586 /** Get the locale for this break iterator object. You can choose between valid and actual locale.
587 * @param type type of the locale we're looking for (valid or actual)
588 * @param status error code for the operation
589 * @return the locale
590 * @internal
591 */
592 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
4388f060
A
593#endif /* U_HIDE_INTERNAL_API */
594
595 /**
596 * Set the subject text string upon which the break iterator is operating
597 * without changing any other aspect of the matching state.
598 * The new and previous text strings must have the same content.
599 *
600 * This function is intended for use in environments where ICU is operating on
601 * strings that may move around in memory. It provides a mechanism for notifying
602 * ICU that the string has been relocated, and providing a new UText to access the
603 * string in its new position.
604 *
605 * Note that the break iterator implementation never copies the underlying text
606 * of a string being processed, but always operates directly on the original text
607 * provided by the user. Refreshing simply drops the references to the old text
608 * and replaces them with references to the new.
609 *
610 * Caution: this function is normally used only by very specialized,
611 * system-level code. One example use case is with garbage collection that moves
612 * the text in memory.
613 *
614 * @param input The new (moved) text string.
615 * @param status Receives errors detected by this function.
616 * @return *this
617 *
51004dcb 618 * @stable ICU 49
4388f060
A
619 */
620 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
374ca955 621
0f5d89e8
A
622#ifndef U_HIDE_INTERNAL_API
623 /**
624 * Set the ULineWordOptions for this break iterator.
625 * @param lineWordOpts The ULineWordOptions to set.
626 * @internal Apple only
627 */
628 void setLineWordOpts(ULineWordOptions lineWordOpts);
629#endif /* U_HIDE_INTERNAL_API */
630
374ca955 631 private:
0f5d89e8 632 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
73c04bcf 633 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
b75a7d8f
A
634 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
635
636 friend class ICUBreakIteratorFactory;
637 friend class ICUBreakIteratorService;
638
639protected:
4388f060
A
640 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
641 // or else the compiler will create a public ones.
b75a7d8f
A
642 /** @internal */
643 BreakIterator();
644 /** @internal */
0f5d89e8 645 BreakIterator (const BreakIterator &other);
2ca993e8 646#ifndef U_HIDE_INTERNAL_API
b75a7d8f 647 /** @internal */
0f5d89e8
A
648 BreakIterator (const Locale& valid, const Locale &actual);
649 /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
650 BreakIterator &operator = (const BreakIterator &other);
2ca993e8 651#endif /* U_HIDE_INTERNAL_API */
0f5d89e8 652 ULineWordOptions fLineWordOpts;
2ca993e8 653
b75a7d8f 654private:
374ca955 655
0f5d89e8 656 /** @internal (private) */
374ca955
A
657 char actualLocale[ULOC_FULLNAME_CAPACITY];
658 char validLocale[ULOC_FULLNAME_CAPACITY];
b75a7d8f
A
659};
660
0f5d89e8 661inline void BreakIterator::setLineWordOpts(ULineWordOptions lineWordOpts)
2ca993e8 662{
0f5d89e8 663 fLineWordOpts = lineWordOpts;
2ca993e8
A
664}
665
57a6839d
A
666#ifndef U_HIDE_DEPRECATED_API
667
b75a7d8f
A
668inline UBool BreakIterator::isBufferClone()
669{
57a6839d 670 return FALSE;
b75a7d8f
A
671}
672
57a6839d
A
673#endif /* U_HIDE_DEPRECATED_API */
674
b75a7d8f 675U_NAMESPACE_END
f3c0d7a5 676#endif // U_SHOW_CPLUSPLUS_API
b75a7d8f
A
677
678#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
679
0f5d89e8 680#endif // BRKITER_H
b75a7d8f 681//eof