]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/normalizer2.h
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / unicode / normalizer2.h
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
3*
4388f060 4* Copyright (C) 2009-2012, International Business Machines
729e4ab9
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: normalizer2.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009nov22
14* created by: Markus W. Scherer
15*/
16
17#ifndef __NORMALIZER2_H__
18#define __NORMALIZER2_H__
19
20/**
21 * \file
22 * \brief C++ API: New API for Unicode Normalization.
23 */
24
25#include "unicode/utypes.h"
26
27#if !UCONFIG_NO_NORMALIZATION
28
29#include "unicode/uniset.h"
30#include "unicode/unistr.h"
31#include "unicode/unorm2.h"
32
33U_NAMESPACE_BEGIN
34
35/**
36 * Unicode normalization functionality for standard Unicode normalization or
37 * for using custom mapping tables.
38 * All instances of this class are unmodifiable/immutable.
39 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
40 * The Normalizer2 class is not intended for public subclassing.
41 *
42 * The primary functions are to produce a normalized string and to detect whether
43 * a string is already normalized.
44 * The most commonly used normalization forms are those defined in
45 * http://www.unicode.org/unicode/reports/tr15/
46 * However, this API supports additional normalization forms for specialized purposes.
47 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
48 * and can be used in implementations of UTS #46.
49 *
50 * Not only are the standard compose and decompose modes supplied,
51 * but additional modes are provided as documented in the Mode enum.
52 *
53 * Some of the functions in this class identify normalization boundaries.
54 * At a normalization boundary, the portions of the string
55 * before it and starting from it do not interact and can be handled independently.
56 *
57 * The spanQuickCheckYes() stops at a normalization boundary.
58 * When the goal is a normalized string, then the text before the boundary
59 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
60 *
61 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
62 * a character is guaranteed to be at a normalization boundary,
63 * regardless of context.
64 * This is used for moving from one normalization boundary to the next
65 * or preceding boundary, and for performing iterative normalization.
66 *
67 * Iterative normalization is useful when only a small portion of a
68 * longer string needs to be processed.
69 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
70 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
71 * (to process only the substring for which sort key bytes are computed).
72 *
73 * The set of normalization boundaries returned by these functions may not be
74 * complete: There may be more boundaries that could be returned.
75 * Different functions may return different boundaries.
76 * @stable ICU 4.4
77 */
78class U_COMMON_API Normalizer2 : public UObject {
79public:
4388f060
A
80 /**
81 * Destructor.
82 * @stable ICU 4.4
83 */
84 ~Normalizer2();
85
86#ifndef U_HIDE_DRAFT_API
87 /**
88 * Returns a Normalizer2 instance for Unicode NFC normalization.
89 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
90 * Returns an unmodifiable singleton instance. Do not delete it.
91 * @param errorCode Standard ICU error code. Its input value must
92 * pass the U_SUCCESS() test, or else the function returns
93 * immediately. Check for U_FAILURE() on output or use with
94 * function chaining. (See User Guide for details.)
95 * @return the requested Normalizer2, if successful
96 * @draft ICU 49
97 */
98 static const Normalizer2 *
99 getNFCInstance(UErrorCode &errorCode);
100
101 /**
102 * Returns a Normalizer2 instance for Unicode NFD normalization.
103 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
104 * Returns an unmodifiable singleton instance. Do not delete it.
105 * @param errorCode Standard ICU error code. Its input value must
106 * pass the U_SUCCESS() test, or else the function returns
107 * immediately. Check for U_FAILURE() on output or use with
108 * function chaining. (See User Guide for details.)
109 * @return the requested Normalizer2, if successful
110 * @draft ICU 49
111 */
112 static const Normalizer2 *
113 getNFDInstance(UErrorCode &errorCode);
114
115 /**
116 * Returns a Normalizer2 instance for Unicode NFKC normalization.
117 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
118 * Returns an unmodifiable singleton instance. Do not delete it.
119 * @param errorCode Standard ICU error code. Its input value must
120 * pass the U_SUCCESS() test, or else the function returns
121 * immediately. Check for U_FAILURE() on output or use with
122 * function chaining. (See User Guide for details.)
123 * @return the requested Normalizer2, if successful
124 * @draft ICU 49
125 */
126 static const Normalizer2 *
127 getNFKCInstance(UErrorCode &errorCode);
128
129 /**
130 * Returns a Normalizer2 instance for Unicode NFKD normalization.
131 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
132 * Returns an unmodifiable singleton instance. Do not delete it.
133 * @param errorCode Standard ICU error code. Its input value must
134 * pass the U_SUCCESS() test, or else the function returns
135 * immediately. Check for U_FAILURE() on output or use with
136 * function chaining. (See User Guide for details.)
137 * @return the requested Normalizer2, if successful
138 * @draft ICU 49
139 */
140 static const Normalizer2 *
141 getNFKDInstance(UErrorCode &errorCode);
142
143 /**
144 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
145 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
146 * Returns an unmodifiable singleton instance. Do not delete it.
147 * @param errorCode Standard ICU error code. Its input value must
148 * pass the U_SUCCESS() test, or else the function returns
149 * immediately. Check for U_FAILURE() on output or use with
150 * function chaining. (See User Guide for details.)
151 * @return the requested Normalizer2, if successful
152 * @draft ICU 49
153 */
154 static const Normalizer2 *
155 getNFKCCasefoldInstance(UErrorCode &errorCode);
156#endif /* U_HIDE_DRAFT_API */
157
729e4ab9
A
158 /**
159 * Returns a Normalizer2 instance which uses the specified data file
160 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
161 * and which composes or decomposes text according to the specified mode.
162 * Returns an unmodifiable singleton instance. Do not delete it.
163 *
164 * Use packageName=NULL for data files that are part of ICU's own data.
165 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
166 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
167 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
168 *
169 * @param packageName NULL for ICU built-in data, otherwise application data package name
170 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
171 * @param mode normalization mode (compose or decompose etc.)
172 * @param errorCode Standard ICU error code. Its input value must
173 * pass the U_SUCCESS() test, or else the function returns
174 * immediately. Check for U_FAILURE() on output or use with
175 * function chaining. (See User Guide for details.)
176 * @return the requested Normalizer2, if successful
177 * @stable ICU 4.4
178 */
179 static const Normalizer2 *
180 getInstance(const char *packageName,
181 const char *name,
182 UNormalization2Mode mode,
183 UErrorCode &errorCode);
184
185 /**
186 * Returns the normalized form of the source string.
187 * @param src source string
188 * @param errorCode Standard ICU error code. Its input value must
189 * pass the U_SUCCESS() test, or else the function returns
190 * immediately. Check for U_FAILURE() on output or use with
191 * function chaining. (See User Guide for details.)
192 * @return normalized src
193 * @stable ICU 4.4
194 */
195 UnicodeString
196 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
197 UnicodeString result;
198 normalize(src, result, errorCode);
199 return result;
200 }
201 /**
202 * Writes the normalized form of the source string to the destination string
203 * (replacing its contents) and returns the destination string.
204 * The source and destination strings must be different objects.
205 * @param src source string
206 * @param dest destination string; its contents is replaced with normalized src
207 * @param errorCode Standard ICU error code. Its input value must
208 * pass the U_SUCCESS() test, or else the function returns
209 * immediately. Check for U_FAILURE() on output or use with
210 * function chaining. (See User Guide for details.)
211 * @return dest
212 * @stable ICU 4.4
213 */
214 virtual UnicodeString &
215 normalize(const UnicodeString &src,
216 UnicodeString &dest,
217 UErrorCode &errorCode) const = 0;
218 /**
219 * Appends the normalized form of the second string to the first string
220 * (merging them at the boundary) and returns the first string.
221 * The result is normalized if the first string was normalized.
222 * The first and second strings must be different objects.
223 * @param first string, should be normalized
224 * @param second string, will be normalized
225 * @param errorCode Standard ICU error code. Its input value must
226 * pass the U_SUCCESS() test, or else the function returns
227 * immediately. Check for U_FAILURE() on output or use with
228 * function chaining. (See User Guide for details.)
229 * @return first
230 * @stable ICU 4.4
231 */
232 virtual UnicodeString &
233 normalizeSecondAndAppend(UnicodeString &first,
234 const UnicodeString &second,
235 UErrorCode &errorCode) const = 0;
236 /**
237 * Appends the second string to the first string
238 * (merging them at the boundary) and returns the first string.
239 * The result is normalized if both the strings were normalized.
240 * The first and second strings must be different objects.
241 * @param first string, should be normalized
242 * @param second string, should be normalized
243 * @param errorCode Standard ICU error code. Its input value must
244 * pass the U_SUCCESS() test, or else the function returns
245 * immediately. Check for U_FAILURE() on output or use with
246 * function chaining. (See User Guide for details.)
247 * @return first
248 * @stable ICU 4.4
249 */
250 virtual UnicodeString &
251 append(UnicodeString &first,
252 const UnicodeString &second,
253 UErrorCode &errorCode) const = 0;
254
255 /**
4388f060
A
256 * Gets the decomposition mapping of c.
257 * Roughly equivalent to normalizing the String form of c
258 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
259 * returns FALSE and does not write a string
260 * if c does not have a decomposition mapping in this instance's data.
729e4ab9
A
261 * This function is independent of the mode of the Normalizer2.
262 * @param c code point
263 * @param decomposition String object which will be set to c's
264 * decomposition mapping, if there is one.
265 * @return TRUE if c has a decomposition, otherwise FALSE
4388f060 266 * @stable ICU 4.6
729e4ab9
A
267 */
268 virtual UBool
269 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
270
4388f060
A
271 /**
272 * Gets the raw decomposition mapping of c.
273 *
274 * This is similar to the getDecomposition() method but returns the
275 * raw decomposition mapping as specified in UnicodeData.txt or
276 * (for custom data) in the mapping files processed by the gennorm2 tool.
277 * By contrast, getDecomposition() returns the processed,
278 * recursively-decomposed version of this mapping.
279 *
280 * When used on a standard NFKC Normalizer2 instance,
281 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
282 *
283 * When used on a standard NFC Normalizer2 instance,
284 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
285 * in this case, the result contains either one or two code points (=1..4 UChars).
286 *
287 * This function is independent of the mode of the Normalizer2.
288 * The default implementation returns FALSE.
289 * @param c code point
290 * @param decomposition String object which will be set to c's
291 * raw decomposition mapping, if there is one.
292 * @return TRUE if c has a decomposition, otherwise FALSE
293 * @draft ICU 49
294 */
295 virtual UBool
296 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
297
298 /**
299 * Performs pairwise composition of a & b and returns the composite if there is one.
300 *
301 * Returns a composite code point c only if c has a two-way mapping to a+b.
302 * In standard Unicode normalization, this means that
303 * c has a canonical decomposition to a+b
304 * and c does not have the Full_Composition_Exclusion property.
305 *
306 * This function is independent of the mode of the Normalizer2.
307 * The default implementation returns a negative value.
308 * @param a A (normalization starter) code point.
309 * @param b Another code point.
310 * @return The non-negative composite code point if there is one; otherwise a negative value.
311 * @draft ICU 49
312 */
313 virtual UChar32
314 composePair(UChar32 a, UChar32 b) const;
315
316 /**
317 * Gets the combining class of c.
318 * The default implementation returns 0
319 * but all standard implementations return the Unicode Canonical_Combining_Class value.
320 * @param c code point
321 * @return c's combining class
322 * @draft ICU 49
323 */
324 virtual uint8_t
325 getCombiningClass(UChar32 c) const;
326
729e4ab9
A
327 /**
328 * Tests if the string is normalized.
329 * Internally, in cases where the quickCheck() method would return "maybe"
330 * (which is only possible for the two COMPOSE modes) this method
331 * resolves to "yes" or "no" to provide a definitive result,
332 * at the cost of doing more work in those cases.
333 * @param s input string
334 * @param errorCode Standard ICU error code. Its input value must
335 * pass the U_SUCCESS() test, or else the function returns
336 * immediately. Check for U_FAILURE() on output or use with
337 * function chaining. (See User Guide for details.)
338 * @return TRUE if s is normalized
339 * @stable ICU 4.4
340 */
341 virtual UBool
342 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
343
344 /**
345 * Tests if the string is normalized.
346 * For the two COMPOSE modes, the result could be "maybe" in cases that
347 * would take a little more work to resolve definitively.
348 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
349 * combination of quick check + normalization, to avoid
350 * re-checking the "yes" prefix.
351 * @param s input string
352 * @param errorCode Standard ICU error code. Its input value must
353 * pass the U_SUCCESS() test, or else the function returns
354 * immediately. Check for U_FAILURE() on output or use with
355 * function chaining. (See User Guide for details.)
356 * @return UNormalizationCheckResult
357 * @stable ICU 4.4
358 */
359 virtual UNormalizationCheckResult
360 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
361
362 /**
363 * Returns the end of the normalized substring of the input string.
364 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
365 * the substring <code>UnicodeString(s, 0, end)</code>
366 * will pass the quick check with a "yes" result.
367 *
368 * The returned end index is usually one or more characters before the
369 * "no" or "maybe" character: The end index is at a normalization boundary.
370 * (See the class documentation for more about normalization boundaries.)
371 *
372 * When the goal is a normalized string and most input strings are expected
373 * to be normalized already, then call this method,
374 * and if it returns a prefix shorter than the input string,
375 * copy that prefix and use normalizeSecondAndAppend() for the remainder.
376 * @param s input string
377 * @param errorCode Standard ICU error code. Its input value must
378 * pass the U_SUCCESS() test, or else the function returns
379 * immediately. Check for U_FAILURE() on output or use with
380 * function chaining. (See User Guide for details.)
381 * @return "yes" span end index
382 * @stable ICU 4.4
383 */
384 virtual int32_t
385 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
386
387 /**
388 * Tests if the character always has a normalization boundary before it,
389 * regardless of context.
390 * If true, then the character does not normalization-interact with
391 * preceding characters.
392 * In other words, a string containing this character can be normalized
393 * by processing portions before this character and starting from this
394 * character independently.
395 * This is used for iterative normalization. See the class documentation for details.
396 * @param c character to test
397 * @return TRUE if c has a normalization boundary before it
398 * @stable ICU 4.4
399 */
400 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
401
402 /**
403 * Tests if the character always has a normalization boundary after it,
404 * regardless of context.
405 * If true, then the character does not normalization-interact with
406 * following characters.
407 * In other words, a string containing this character can be normalized
408 * by processing portions up to this character and after this
409 * character independently.
410 * This is used for iterative normalization. See the class documentation for details.
411 * Note that this operation may be significantly slower than hasBoundaryBefore().
412 * @param c character to test
413 * @return TRUE if c has a normalization boundary after it
414 * @stable ICU 4.4
415 */
416 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
417
418 /**
419 * Tests if the character is normalization-inert.
420 * If true, then the character does not change, nor normalization-interact with
421 * preceding or following characters.
422 * In other words, a string containing this character can be normalized
423 * by processing portions before this character and after this
424 * character independently.
425 * This is used for iterative normalization. See the class documentation for details.
426 * Note that this operation may be significantly slower than hasBoundaryBefore().
427 * @param c character to test
428 * @return TRUE if c is normalization-inert
429 * @stable ICU 4.4
430 */
431 virtual UBool isInert(UChar32 c) const = 0;
432
433private:
434 // No ICU "poor man's RTTI" for this class nor its subclasses.
435 virtual UClassID getDynamicClassID() const;
436};
437
438/**
439 * Normalization filtered by a UnicodeSet.
440 * Normalizes portions of the text contained in the filter set and leaves
441 * portions not contained in the filter set unchanged.
442 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
443 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
444 * This class implements all of (and only) the Normalizer2 API.
445 * An instance of this class is unmodifiable/immutable but is constructed and
446 * must be destructed by the owner.
447 * @stable ICU 4.4
448 */
449class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
450public:
451 /**
452 * Constructs a filtered normalizer wrapping any Normalizer2 instance
453 * and a filter set.
454 * Both are aliased and must not be modified or deleted while this object
455 * is used.
456 * The filter set should be frozen; otherwise the performance will suffer greatly.
457 * @param n2 wrapped Normalizer2 instance
458 * @param filterSet UnicodeSet which determines the characters to be normalized
459 * @stable ICU 4.4
460 */
461 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
462 norm2(n2), set(filterSet) {}
463
4388f060
A
464 /**
465 * Destructor.
466 * @stable ICU 4.4
467 */
468 ~FilteredNormalizer2();
469
729e4ab9
A
470 /**
471 * Writes the normalized form of the source string to the destination string
472 * (replacing its contents) and returns the destination string.
473 * The source and destination strings must be different objects.
474 * @param src source string
475 * @param dest destination string; its contents is replaced with normalized src
476 * @param errorCode Standard ICU error code. Its input value must
477 * pass the U_SUCCESS() test, or else the function returns
478 * immediately. Check for U_FAILURE() on output or use with
479 * function chaining. (See User Guide for details.)
480 * @return dest
481 * @stable ICU 4.4
482 */
483 virtual UnicodeString &
484 normalize(const UnicodeString &src,
485 UnicodeString &dest,
486 UErrorCode &errorCode) const;
487 /**
488 * Appends the normalized form of the second string to the first string
489 * (merging them at the boundary) and returns the first string.
490 * The result is normalized if the first string was normalized.
491 * The first and second strings must be different objects.
492 * @param first string, should be normalized
493 * @param second string, will be normalized
494 * @param errorCode Standard ICU error code. Its input value must
495 * pass the U_SUCCESS() test, or else the function returns
496 * immediately. Check for U_FAILURE() on output or use with
497 * function chaining. (See User Guide for details.)
498 * @return first
499 * @stable ICU 4.4
500 */
501 virtual UnicodeString &
502 normalizeSecondAndAppend(UnicodeString &first,
503 const UnicodeString &second,
504 UErrorCode &errorCode) const;
505 /**
506 * Appends the second string to the first string
507 * (merging them at the boundary) and returns the first string.
508 * The result is normalized if both the strings were normalized.
509 * The first and second strings must be different objects.
510 * @param first string, should be normalized
511 * @param second string, should be normalized
512 * @param errorCode Standard ICU error code. Its input value must
513 * pass the U_SUCCESS() test, or else the function returns
514 * immediately. Check for U_FAILURE() on output or use with
515 * function chaining. (See User Guide for details.)
516 * @return first
517 * @stable ICU 4.4
518 */
519 virtual UnicodeString &
520 append(UnicodeString &first,
521 const UnicodeString &second,
522 UErrorCode &errorCode) const;
523
524 /**
4388f060
A
525 * Gets the decomposition mapping of c.
526 * For details see the base class documentation.
527 *
729e4ab9
A
528 * This function is independent of the mode of the Normalizer2.
529 * @param c code point
530 * @param decomposition String object which will be set to c's
531 * decomposition mapping, if there is one.
532 * @return TRUE if c has a decomposition, otherwise FALSE
4388f060 533 * @stable ICU 4.6
729e4ab9
A
534 */
535 virtual UBool
536 getDecomposition(UChar32 c, UnicodeString &decomposition) const;
537
4388f060
A
538 /**
539 * Gets the raw decomposition mapping of c.
540 * For details see the base class documentation.
541 *
542 * This function is independent of the mode of the Normalizer2.
543 * @param c code point
544 * @param decomposition String object which will be set to c's
545 * raw decomposition mapping, if there is one.
546 * @return TRUE if c has a decomposition, otherwise FALSE
547 * @draft ICU 49
548 */
549 virtual UBool
550 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
551
552 /**
553 * Performs pairwise composition of a & b and returns the composite if there is one.
554 * For details see the base class documentation.
555 *
556 * This function is independent of the mode of the Normalizer2.
557 * @param a A (normalization starter) code point.
558 * @param b Another code point.
559 * @return The non-negative composite code point if there is one; otherwise a negative value.
560 * @draft ICU 49
561 */
562 virtual UChar32
563 composePair(UChar32 a, UChar32 b) const;
564
565 /**
566 * Gets the combining class of c.
567 * The default implementation returns 0
568 * but all standard implementations return the Unicode Canonical_Combining_Class value.
569 * @param c code point
570 * @return c's combining class
571 * @draft ICU 49
572 */
573 virtual uint8_t
574 getCombiningClass(UChar32 c) const;
575
729e4ab9
A
576 /**
577 * Tests if the string is normalized.
578 * For details see the Normalizer2 base class documentation.
579 * @param s input string
580 * @param errorCode Standard ICU error code. Its input value must
581 * pass the U_SUCCESS() test, or else the function returns
582 * immediately. Check for U_FAILURE() on output or use with
583 * function chaining. (See User Guide for details.)
584 * @return TRUE if s is normalized
585 * @stable ICU 4.4
586 */
587 virtual UBool
588 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
589 /**
590 * Tests if the string is normalized.
591 * For details see the Normalizer2 base class documentation.
592 * @param s input string
593 * @param errorCode Standard ICU error code. Its input value must
594 * pass the U_SUCCESS() test, or else the function returns
595 * immediately. Check for U_FAILURE() on output or use with
596 * function chaining. (See User Guide for details.)
597 * @return UNormalizationCheckResult
598 * @stable ICU 4.4
599 */
600 virtual UNormalizationCheckResult
601 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
602 /**
603 * Returns the end of the normalized substring of the input string.
604 * For details see the Normalizer2 base class documentation.
605 * @param s input string
606 * @param errorCode Standard ICU error code. Its input value must
607 * pass the U_SUCCESS() test, or else the function returns
608 * immediately. Check for U_FAILURE() on output or use with
609 * function chaining. (See User Guide for details.)
610 * @return "yes" span end index
611 * @stable ICU 4.4
612 */
613 virtual int32_t
614 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
615
616 /**
617 * Tests if the character always has a normalization boundary before it,
618 * regardless of context.
619 * For details see the Normalizer2 base class documentation.
620 * @param c character to test
621 * @return TRUE if c has a normalization boundary before it
622 * @stable ICU 4.4
623 */
624 virtual UBool hasBoundaryBefore(UChar32 c) const;
625
626 /**
627 * Tests if the character always has a normalization boundary after it,
628 * regardless of context.
629 * For details see the Normalizer2 base class documentation.
630 * @param c character to test
631 * @return TRUE if c has a normalization boundary after it
632 * @stable ICU 4.4
633 */
634 virtual UBool hasBoundaryAfter(UChar32 c) const;
635
636 /**
637 * Tests if the character is normalization-inert.
638 * For details see the Normalizer2 base class documentation.
639 * @param c character to test
640 * @return TRUE if c is normalization-inert
641 * @stable ICU 4.4
642 */
643 virtual UBool isInert(UChar32 c) const;
644private:
645 UnicodeString &
646 normalize(const UnicodeString &src,
647 UnicodeString &dest,
648 USetSpanCondition spanCondition,
649 UErrorCode &errorCode) const;
650
651 UnicodeString &
652 normalizeSecondAndAppend(UnicodeString &first,
653 const UnicodeString &second,
654 UBool doNormalize,
655 UErrorCode &errorCode) const;
656
657 const Normalizer2 &norm2;
658 const UnicodeSet &set;
659};
660
661U_NAMESPACE_END
662
663#endif // !UCONFIG_NO_NORMALIZATION
664#endif // __NORMALIZER2_H__