]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/normalizer2.h
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / normalizer2.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
5*
51004dcb 6* Copyright (C) 2009-2013, International Business Machines
729e4ab9
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: normalizer2.h
f3c0d7a5 11* encoding: UTF-8
729e4ab9
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2009nov22
16* created by: Markus W. Scherer
17*/
18
19#ifndef __NORMALIZER2_H__
20#define __NORMALIZER2_H__
21
22/**
23 * \file
24 * \brief C++ API: New API for Unicode Normalization.
25 */
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_NORMALIZATION
30
31#include "unicode/uniset.h"
32#include "unicode/unistr.h"
33#include "unicode/unorm2.h"
34
f3c0d7a5 35#if U_SHOW_CPLUSPLUS_API
729e4ab9
A
36U_NAMESPACE_BEGIN
37
38/**
39 * Unicode normalization functionality for standard Unicode normalization or
40 * for using custom mapping tables.
41 * All instances of this class are unmodifiable/immutable.
42 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
43 * The Normalizer2 class is not intended for public subclassing.
44 *
45 * The primary functions are to produce a normalized string and to detect whether
46 * a string is already normalized.
47 * The most commonly used normalization forms are those defined in
48 * http://www.unicode.org/unicode/reports/tr15/
49 * However, this API supports additional normalization forms for specialized purposes.
50 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
51 * and can be used in implementations of UTS #46.
52 *
53 * Not only are the standard compose and decompose modes supplied,
54 * but additional modes are provided as documented in the Mode enum.
55 *
56 * Some of the functions in this class identify normalization boundaries.
57 * At a normalization boundary, the portions of the string
58 * before it and starting from it do not interact and can be handled independently.
59 *
60 * The spanQuickCheckYes() stops at a normalization boundary.
61 * When the goal is a normalized string, then the text before the boundary
62 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
63 *
64 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
65 * a character is guaranteed to be at a normalization boundary,
66 * regardless of context.
67 * This is used for moving from one normalization boundary to the next
68 * or preceding boundary, and for performing iterative normalization.
69 *
70 * Iterative normalization is useful when only a small portion of a
71 * longer string needs to be processed.
72 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
73 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
74 * (to process only the substring for which sort key bytes are computed).
75 *
76 * The set of normalization boundaries returned by these functions may not be
77 * complete: There may be more boundaries that could be returned.
78 * Different functions may return different boundaries.
79 * @stable ICU 4.4
80 */
81class U_COMMON_API Normalizer2 : public UObject {
82public:
4388f060
A
83 /**
84 * Destructor.
85 * @stable ICU 4.4
86 */
87 ~Normalizer2();
88
4388f060
A
89 /**
90 * Returns a Normalizer2 instance for Unicode NFC normalization.
91 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
92 * Returns an unmodifiable singleton instance. Do not delete it.
93 * @param errorCode Standard ICU error code. Its input value must
94 * pass the U_SUCCESS() test, or else the function returns
95 * immediately. Check for U_FAILURE() on output or use with
96 * function chaining. (See User Guide for details.)
97 * @return the requested Normalizer2, if successful
51004dcb 98 * @stable ICU 49
4388f060
A
99 */
100 static const Normalizer2 *
101 getNFCInstance(UErrorCode &errorCode);
102
103 /**
104 * Returns a Normalizer2 instance for Unicode NFD normalization.
105 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
106 * Returns an unmodifiable singleton instance. Do not delete it.
107 * @param errorCode Standard ICU error code. Its input value must
108 * pass the U_SUCCESS() test, or else the function returns
109 * immediately. Check for U_FAILURE() on output or use with
110 * function chaining. (See User Guide for details.)
111 * @return the requested Normalizer2, if successful
51004dcb 112 * @stable ICU 49
4388f060
A
113 */
114 static const Normalizer2 *
115 getNFDInstance(UErrorCode &errorCode);
116
117 /**
118 * Returns a Normalizer2 instance for Unicode NFKC normalization.
119 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
120 * Returns an unmodifiable singleton instance. Do not delete it.
121 * @param errorCode Standard ICU error code. Its input value must
122 * pass the U_SUCCESS() test, or else the function returns
123 * immediately. Check for U_FAILURE() on output or use with
124 * function chaining. (See User Guide for details.)
125 * @return the requested Normalizer2, if successful
51004dcb 126 * @stable ICU 49
4388f060
A
127 */
128 static const Normalizer2 *
129 getNFKCInstance(UErrorCode &errorCode);
130
131 /**
132 * Returns a Normalizer2 instance for Unicode NFKD normalization.
133 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
134 * Returns an unmodifiable singleton instance. Do not delete it.
135 * @param errorCode Standard ICU error code. Its input value must
136 * pass the U_SUCCESS() test, or else the function returns
137 * immediately. Check for U_FAILURE() on output or use with
138 * function chaining. (See User Guide for details.)
139 * @return the requested Normalizer2, if successful
51004dcb 140 * @stable ICU 49
4388f060
A
141 */
142 static const Normalizer2 *
143 getNFKDInstance(UErrorCode &errorCode);
144
145 /**
146 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
147 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
148 * Returns an unmodifiable singleton instance. Do not delete it.
149 * @param errorCode Standard ICU error code. Its input value must
150 * pass the U_SUCCESS() test, or else the function returns
151 * immediately. Check for U_FAILURE() on output or use with
152 * function chaining. (See User Guide for details.)
153 * @return the requested Normalizer2, if successful
51004dcb 154 * @stable ICU 49
4388f060
A
155 */
156 static const Normalizer2 *
157 getNFKCCasefoldInstance(UErrorCode &errorCode);
4388f060 158
729e4ab9
A
159 /**
160 * Returns a Normalizer2 instance which uses the specified data file
161 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
162 * and which composes or decomposes text according to the specified mode.
163 * Returns an unmodifiable singleton instance. Do not delete it.
164 *
165 * Use packageName=NULL for data files that are part of ICU's own data.
166 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
167 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
168 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
169 *
170 * @param packageName NULL for ICU built-in data, otherwise application data package name
171 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
172 * @param mode normalization mode (compose or decompose etc.)
173 * @param errorCode Standard ICU error code. Its input value must
174 * pass the U_SUCCESS() test, or else the function returns
175 * immediately. Check for U_FAILURE() on output or use with
176 * function chaining. (See User Guide for details.)
177 * @return the requested Normalizer2, if successful
178 * @stable ICU 4.4
179 */
180 static const Normalizer2 *
181 getInstance(const char *packageName,
182 const char *name,
183 UNormalization2Mode mode,
184 UErrorCode &errorCode);
185
186 /**
187 * Returns the normalized form of the source string.
188 * @param src source string
189 * @param errorCode Standard ICU error code. Its input value must
190 * pass the U_SUCCESS() test, or else the function returns
191 * immediately. Check for U_FAILURE() on output or use with
192 * function chaining. (See User Guide for details.)
193 * @return normalized src
194 * @stable ICU 4.4
195 */
196 UnicodeString
197 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
198 UnicodeString result;
199 normalize(src, result, errorCode);
200 return result;
201 }
202 /**
203 * Writes the normalized form of the source string to the destination string
204 * (replacing its contents) and returns the destination string.
205 * The source and destination strings must be different objects.
206 * @param src source string
207 * @param dest destination string; its contents is replaced with normalized src
208 * @param errorCode Standard ICU error code. Its input value must
209 * pass the U_SUCCESS() test, or else the function returns
210 * immediately. Check for U_FAILURE() on output or use with
211 * function chaining. (See User Guide for details.)
212 * @return dest
213 * @stable ICU 4.4
214 */
215 virtual UnicodeString &
216 normalize(const UnicodeString &src,
217 UnicodeString &dest,
218 UErrorCode &errorCode) const = 0;
219 /**
220 * Appends the normalized form of the second string to the first string
221 * (merging them at the boundary) and returns the first string.
222 * The result is normalized if the first string was normalized.
223 * The first and second strings must be different objects.
224 * @param first string, should be normalized
225 * @param second string, will be normalized
226 * @param errorCode Standard ICU error code. Its input value must
227 * pass the U_SUCCESS() test, or else the function returns
228 * immediately. Check for U_FAILURE() on output or use with
229 * function chaining. (See User Guide for details.)
230 * @return first
231 * @stable ICU 4.4
232 */
233 virtual UnicodeString &
234 normalizeSecondAndAppend(UnicodeString &first,
235 const UnicodeString &second,
236 UErrorCode &errorCode) const = 0;
237 /**
238 * Appends the second string to the first string
239 * (merging them at the boundary) and returns the first string.
240 * The result is normalized if both the strings were normalized.
241 * The first and second strings must be different objects.
242 * @param first string, should be normalized
243 * @param second string, should be normalized
244 * @param errorCode Standard ICU error code. Its input value must
245 * pass the U_SUCCESS() test, or else the function returns
246 * immediately. Check for U_FAILURE() on output or use with
247 * function chaining. (See User Guide for details.)
248 * @return first
249 * @stable ICU 4.4
250 */
251 virtual UnicodeString &
252 append(UnicodeString &first,
253 const UnicodeString &second,
254 UErrorCode &errorCode) const = 0;
255
256 /**
4388f060
A
257 * Gets the decomposition mapping of c.
258 * Roughly equivalent to normalizing the String form of c
259 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
260 * returns FALSE and does not write a string
261 * if c does not have a decomposition mapping in this instance's data.
729e4ab9
A
262 * This function is independent of the mode of the Normalizer2.
263 * @param c code point
264 * @param decomposition String object which will be set to c's
265 * decomposition mapping, if there is one.
266 * @return TRUE if c has a decomposition, otherwise FALSE
4388f060 267 * @stable ICU 4.6
729e4ab9
A
268 */
269 virtual UBool
270 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
271
4388f060
A
272 /**
273 * Gets the raw decomposition mapping of c.
274 *
275 * This is similar to the getDecomposition() method but returns the
276 * raw decomposition mapping as specified in UnicodeData.txt or
277 * (for custom data) in the mapping files processed by the gennorm2 tool.
278 * By contrast, getDecomposition() returns the processed,
279 * recursively-decomposed version of this mapping.
280 *
281 * When used on a standard NFKC Normalizer2 instance,
282 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
283 *
284 * When used on a standard NFC Normalizer2 instance,
285 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
f3c0d7a5 286 * in this case, the result contains either one or two code points (=1..4 char16_ts).
4388f060
A
287 *
288 * This function is independent of the mode of the Normalizer2.
289 * The default implementation returns FALSE.
290 * @param c code point
291 * @param decomposition String object which will be set to c's
292 * raw decomposition mapping, if there is one.
293 * @return TRUE if c has a decomposition, otherwise FALSE
51004dcb 294 * @stable ICU 49
4388f060
A
295 */
296 virtual UBool
297 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
298
299 /**
300 * Performs pairwise composition of a & b and returns the composite if there is one.
301 *
302 * Returns a composite code point c only if c has a two-way mapping to a+b.
303 * In standard Unicode normalization, this means that
304 * c has a canonical decomposition to a+b
305 * and c does not have the Full_Composition_Exclusion property.
306 *
307 * This function is independent of the mode of the Normalizer2.
308 * The default implementation returns a negative value.
309 * @param a A (normalization starter) code point.
310 * @param b Another code point.
311 * @return The non-negative composite code point if there is one; otherwise a negative value.
51004dcb 312 * @stable ICU 49
4388f060
A
313 */
314 virtual UChar32
315 composePair(UChar32 a, UChar32 b) const;
316
317 /**
318 * Gets the combining class of c.
319 * The default implementation returns 0
320 * but all standard implementations return the Unicode Canonical_Combining_Class value.
321 * @param c code point
322 * @return c's combining class
51004dcb 323 * @stable ICU 49
4388f060
A
324 */
325 virtual uint8_t
326 getCombiningClass(UChar32 c) const;
327
729e4ab9
A
328 /**
329 * Tests if the string is normalized.
330 * Internally, in cases where the quickCheck() method would return "maybe"
331 * (which is only possible for the two COMPOSE modes) this method
332 * resolves to "yes" or "no" to provide a definitive result,
333 * at the cost of doing more work in those cases.
334 * @param s input string
335 * @param errorCode Standard ICU error code. Its input value must
336 * pass the U_SUCCESS() test, or else the function returns
337 * immediately. Check for U_FAILURE() on output or use with
338 * function chaining. (See User Guide for details.)
339 * @return TRUE if s is normalized
340 * @stable ICU 4.4
341 */
342 virtual UBool
343 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
344
345 /**
346 * Tests if the string is normalized.
347 * For the two COMPOSE modes, the result could be "maybe" in cases that
348 * would take a little more work to resolve definitively.
349 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
350 * combination of quick check + normalization, to avoid
351 * re-checking the "yes" prefix.
352 * @param s input string
353 * @param errorCode Standard ICU error code. Its input value must
354 * pass the U_SUCCESS() test, or else the function returns
355 * immediately. Check for U_FAILURE() on output or use with
356 * function chaining. (See User Guide for details.)
357 * @return UNormalizationCheckResult
358 * @stable ICU 4.4
359 */
360 virtual UNormalizationCheckResult
361 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
362
363 /**
364 * Returns the end of the normalized substring of the input string.
365 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
366 * the substring <code>UnicodeString(s, 0, end)</code>
367 * will pass the quick check with a "yes" result.
368 *
369 * The returned end index is usually one or more characters before the
370 * "no" or "maybe" character: The end index is at a normalization boundary.
371 * (See the class documentation for more about normalization boundaries.)
372 *
373 * When the goal is a normalized string and most input strings are expected
374 * to be normalized already, then call this method,
375 * and if it returns a prefix shorter than the input string,
376 * copy that prefix and use normalizeSecondAndAppend() for the remainder.
377 * @param s input string
378 * @param errorCode Standard ICU error code. Its input value must
379 * pass the U_SUCCESS() test, or else the function returns
380 * immediately. Check for U_FAILURE() on output or use with
381 * function chaining. (See User Guide for details.)
382 * @return "yes" span end index
383 * @stable ICU 4.4
384 */
385 virtual int32_t
386 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
387
388 /**
389 * Tests if the character always has a normalization boundary before it,
390 * regardless of context.
391 * If true, then the character does not normalization-interact with
392 * preceding characters.
393 * In other words, a string containing this character can be normalized
394 * by processing portions before this character and starting from this
395 * character independently.
396 * This is used for iterative normalization. See the class documentation for details.
397 * @param c character to test
398 * @return TRUE if c has a normalization boundary before it
399 * @stable ICU 4.4
400 */
401 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
402
403 /**
404 * Tests if the character always has a normalization boundary after it,
405 * regardless of context.
406 * If true, then the character does not normalization-interact with
407 * following characters.
408 * In other words, a string containing this character can be normalized
409 * by processing portions up to this character and after this
410 * character independently.
411 * This is used for iterative normalization. See the class documentation for details.
412 * Note that this operation may be significantly slower than hasBoundaryBefore().
413 * @param c character to test
414 * @return TRUE if c has a normalization boundary after it
415 * @stable ICU 4.4
416 */
417 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
418
419 /**
420 * Tests if the character is normalization-inert.
421 * If true, then the character does not change, nor normalization-interact with
422 * preceding or following characters.
423 * In other words, a string containing this character can be normalized
424 * by processing portions before this character and after this
425 * character independently.
426 * This is used for iterative normalization. See the class documentation for details.
427 * Note that this operation may be significantly slower than hasBoundaryBefore().
428 * @param c character to test
429 * @return TRUE if c is normalization-inert
430 * @stable ICU 4.4
431 */
432 virtual UBool isInert(UChar32 c) const = 0;
729e4ab9
A
433};
434
435/**
436 * Normalization filtered by a UnicodeSet.
437 * Normalizes portions of the text contained in the filter set and leaves
438 * portions not contained in the filter set unchanged.
439 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
440 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
441 * This class implements all of (and only) the Normalizer2 API.
442 * An instance of this class is unmodifiable/immutable but is constructed and
443 * must be destructed by the owner.
444 * @stable ICU 4.4
445 */
446class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
447public:
448 /**
449 * Constructs a filtered normalizer wrapping any Normalizer2 instance
450 * and a filter set.
451 * Both are aliased and must not be modified or deleted while this object
452 * is used.
453 * The filter set should be frozen; otherwise the performance will suffer greatly.
454 * @param n2 wrapped Normalizer2 instance
455 * @param filterSet UnicodeSet which determines the characters to be normalized
456 * @stable ICU 4.4
457 */
458 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
459 norm2(n2), set(filterSet) {}
460
4388f060
A
461 /**
462 * Destructor.
463 * @stable ICU 4.4
464 */
465 ~FilteredNormalizer2();
466
729e4ab9
A
467 /**
468 * Writes the normalized form of the source string to the destination string
469 * (replacing its contents) and returns the destination string.
470 * The source and destination strings must be different objects.
471 * @param src source string
472 * @param dest destination string; its contents is replaced with normalized src
473 * @param errorCode Standard ICU error code. Its input value must
474 * pass the U_SUCCESS() test, or else the function returns
475 * immediately. Check for U_FAILURE() on output or use with
476 * function chaining. (See User Guide for details.)
477 * @return dest
478 * @stable ICU 4.4
479 */
480 virtual UnicodeString &
481 normalize(const UnicodeString &src,
482 UnicodeString &dest,
483 UErrorCode &errorCode) const;
484 /**
485 * Appends the normalized form of the second string to the first string
486 * (merging them at the boundary) and returns the first string.
487 * The result is normalized if the first string was normalized.
488 * The first and second strings must be different objects.
489 * @param first string, should be normalized
490 * @param second string, will be normalized
491 * @param errorCode Standard ICU error code. Its input value must
492 * pass the U_SUCCESS() test, or else the function returns
493 * immediately. Check for U_FAILURE() on output or use with
494 * function chaining. (See User Guide for details.)
495 * @return first
496 * @stable ICU 4.4
497 */
498 virtual UnicodeString &
499 normalizeSecondAndAppend(UnicodeString &first,
500 const UnicodeString &second,
501 UErrorCode &errorCode) const;
502 /**
503 * Appends the second string to the first string
504 * (merging them at the boundary) and returns the first string.
505 * The result is normalized if both the strings were normalized.
506 * The first and second strings must be different objects.
507 * @param first string, should be normalized
508 * @param second string, should be normalized
509 * @param errorCode Standard ICU error code. Its input value must
510 * pass the U_SUCCESS() test, or else the function returns
511 * immediately. Check for U_FAILURE() on output or use with
512 * function chaining. (See User Guide for details.)
513 * @return first
514 * @stable ICU 4.4
515 */
516 virtual UnicodeString &
517 append(UnicodeString &first,
518 const UnicodeString &second,
519 UErrorCode &errorCode) const;
520
521 /**
4388f060
A
522 * Gets the decomposition mapping of c.
523 * For details see the base class documentation.
524 *
729e4ab9
A
525 * This function is independent of the mode of the Normalizer2.
526 * @param c code point
527 * @param decomposition String object which will be set to c's
528 * decomposition mapping, if there is one.
529 * @return TRUE if c has a decomposition, otherwise FALSE
4388f060 530 * @stable ICU 4.6
729e4ab9
A
531 */
532 virtual UBool
533 getDecomposition(UChar32 c, UnicodeString &decomposition) const;
534
4388f060
A
535 /**
536 * Gets the raw decomposition mapping of c.
537 * For details see the base class documentation.
538 *
539 * This function is independent of the mode of the Normalizer2.
540 * @param c code point
541 * @param decomposition String object which will be set to c's
542 * raw decomposition mapping, if there is one.
543 * @return TRUE if c has a decomposition, otherwise FALSE
51004dcb 544 * @stable ICU 49
4388f060
A
545 */
546 virtual UBool
547 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
548
549 /**
550 * Performs pairwise composition of a & b and returns the composite if there is one.
551 * For details see the base class documentation.
552 *
553 * This function is independent of the mode of the Normalizer2.
554 * @param a A (normalization starter) code point.
555 * @param b Another code point.
556 * @return The non-negative composite code point if there is one; otherwise a negative value.
51004dcb 557 * @stable ICU 49
4388f060
A
558 */
559 virtual UChar32
560 composePair(UChar32 a, UChar32 b) const;
561
562 /**
563 * Gets the combining class of c.
564 * The default implementation returns 0
565 * but all standard implementations return the Unicode Canonical_Combining_Class value.
566 * @param c code point
567 * @return c's combining class
51004dcb 568 * @stable ICU 49
4388f060
A
569 */
570 virtual uint8_t
571 getCombiningClass(UChar32 c) const;
572
729e4ab9
A
573 /**
574 * Tests if the string is normalized.
575 * For details see the Normalizer2 base class documentation.
576 * @param s input string
577 * @param errorCode Standard ICU error code. Its input value must
578 * pass the U_SUCCESS() test, or else the function returns
579 * immediately. Check for U_FAILURE() on output or use with
580 * function chaining. (See User Guide for details.)
581 * @return TRUE if s is normalized
582 * @stable ICU 4.4
583 */
584 virtual UBool
585 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
586 /**
587 * Tests if the string is normalized.
588 * For details see the Normalizer2 base class documentation.
589 * @param s input string
590 * @param errorCode Standard ICU error code. Its input value must
591 * pass the U_SUCCESS() test, or else the function returns
592 * immediately. Check for U_FAILURE() on output or use with
593 * function chaining. (See User Guide for details.)
594 * @return UNormalizationCheckResult
595 * @stable ICU 4.4
596 */
597 virtual UNormalizationCheckResult
598 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
599 /**
600 * Returns the end of the normalized substring of the input string.
601 * For details see the Normalizer2 base class documentation.
602 * @param s input string
603 * @param errorCode Standard ICU error code. Its input value must
604 * pass the U_SUCCESS() test, or else the function returns
605 * immediately. Check for U_FAILURE() on output or use with
606 * function chaining. (See User Guide for details.)
607 * @return "yes" span end index
608 * @stable ICU 4.4
609 */
610 virtual int32_t
611 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
612
613 /**
614 * Tests if the character always has a normalization boundary before it,
615 * regardless of context.
616 * For details see the Normalizer2 base class documentation.
617 * @param c character to test
618 * @return TRUE if c has a normalization boundary before it
619 * @stable ICU 4.4
620 */
621 virtual UBool hasBoundaryBefore(UChar32 c) const;
622
623 /**
624 * Tests if the character always has a normalization boundary after it,
625 * regardless of context.
626 * For details see the Normalizer2 base class documentation.
627 * @param c character to test
628 * @return TRUE if c has a normalization boundary after it
629 * @stable ICU 4.4
630 */
631 virtual UBool hasBoundaryAfter(UChar32 c) const;
632
633 /**
634 * Tests if the character is normalization-inert.
635 * For details see the Normalizer2 base class documentation.
636 * @param c character to test
637 * @return TRUE if c is normalization-inert
638 * @stable ICU 4.4
639 */
640 virtual UBool isInert(UChar32 c) const;
641private:
642 UnicodeString &
643 normalize(const UnicodeString &src,
644 UnicodeString &dest,
645 USetSpanCondition spanCondition,
646 UErrorCode &errorCode) const;
647
648 UnicodeString &
649 normalizeSecondAndAppend(UnicodeString &first,
650 const UnicodeString &second,
651 UBool doNormalize,
652 UErrorCode &errorCode) const;
653
654 const Normalizer2 &norm2;
655 const UnicodeSet &set;
656};
657
658U_NAMESPACE_END
f3c0d7a5 659#endif // U_SHOW_CPLUSPLUS_API
729e4ab9
A
660
661#endif // !UCONFIG_NO_NORMALIZATION
662#endif // __NORMALIZER2_H__