1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2001-2007, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/20/2001 aliu Creation.
10 **********************************************************************
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_TRANSLITERATION
19 #include "unicode/translit.h"
24 * A transliterator that converts Unicode characters to an escape
25 * form. Examples of escape forms are "U+4E01" and "".
26 * Escape forms have a prefix and suffix, either of which may be
27 * empty, a radix, typically 16 or 10, a minimum digit count,
28 * typically 1, 4, or 8, and a boolean that specifies whether
29 * supplemental characters are handled as 32-bit code points or as two
30 * 16-bit code units. Most escape forms handle 32-bit code points,
31 * but some, such as the Java form, intentionally break them into two
32 * surrogate pairs, for backward compatibility.
34 * <p>Some escape forms actually have two different patterns, one for
35 * BMP characters (0..FFFF) and one for supplements (>FFFF). To
36 * handle this, a second EscapeTransliterator may be defined that
37 * specifies the pattern to be produced for supplementals. An example
38 * of a form that requires this is the C form, which uses "\\uFFFF"
39 * for BMP characters and "\\U0010FFFF" for supplementals.
41 * <p>This class is package private. It registers several standard
42 * variants with the system which are then accessed via their IDs.
46 class EscapeTransliterator
: public Transliterator
{
51 * The prefix of the escape form; may be empty, but usually isn't.
56 * The prefix of the escape form; often empty.
61 * The radix to display the number in. Typically 16 or 10. Must
62 * be in the range 2 to 36.
67 * The minimum number of digits. Typically 1, 4, or 8. Values
68 * less than 1 are equivalent to 1.
73 * If true, supplementals are handled as 32-bit code points. If
74 * false, they are handled as two 16-bit code units.
76 UBool grokSupplementals
;
79 * The form to be used for supplementals. If this is null then
80 * the same form is used for BMP characters and supplementals. If
81 * this is not null and if grokSupplementals is true then the
82 * prefix, suffix, radix, and minDigits of this object are used
83 * for supplementals. This pointer is owned.
85 EscapeTransliterator
* supplementalHandler
;
90 * Registers standard variants with the system. Called by
91 * Transliterator during initialization.
93 static void registerIDs();
96 * Constructs an escape transliterator with the given ID and
97 * parameters. See the class member documentation for details.
99 EscapeTransliterator(const UnicodeString
& ID
,
100 const UnicodeString
& prefix
, const UnicodeString
& suffix
,
101 int32_t radix
, int32_t minDigits
,
102 UBool grokSupplementals
,
103 EscapeTransliterator
* adoptedSupplementalHandler
);
108 EscapeTransliterator(const EscapeTransliterator
&);
113 virtual ~EscapeTransliterator();
116 * Transliterator API.
118 virtual Transliterator
* clone() const;
121 * ICU "poor man's RTTI", returns a UClassID for the actual class.
123 virtual UClassID
getDynamicClassID() const;
126 * ICU "poor man's RTTI", returns a UClassID for this class.
128 U_I18N_API
static UClassID U_EXPORT2
getStaticClassID();
133 * Implements {@link Transliterator#handleTransliterate}.
135 virtual void handleTransliterate(Replaceable
& text
, UTransPosition
& offset
,
136 UBool isIncremental
) const;
142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */