]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ********************************************************************** | |
3 | * Copyright (c) 2001-2006, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ********************************************************************** | |
6 | * Date Name Description | |
7 | * 11/19/2001 aliu Creation. | |
8 | ********************************************************************** | |
9 | */ | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | ||
13 | #if !UCONFIG_NO_TRANSLITERATION | |
14 | ||
15 | #include "esctrn.h" | |
16 | #include "util.h" | |
17 | ||
18 | U_NAMESPACE_BEGIN | |
19 | ||
20 | static const UChar UNIPRE[] = {85,43,0}; // "U+" | |
21 | static const UChar BS_u[] = {92,117,0}; // "\\u" | |
22 | static const UChar BS_U[] = {92,85,0}; // "\\U" | |
23 | static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" | |
24 | static const UChar XML10PRE[] = {38,35,0}; // "&#" | |
25 | static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" | |
26 | static const UChar SEMI[] = {59,0}; // ";" | |
27 | static const UChar RBRACE[] = {125,0}; // "}" | |
28 | static const UChar EMPTY[] = {0}; // "" | |
29 | ||
30 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) | |
31 | ||
32 | /** | |
33 | * Factory methods | |
34 | */ | |
35 | static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
36 | // Unicode: "U+10FFFF" hex, min=4, max=6 | |
37 | return new EscapeTransliterator(ID, UNIPRE, EMPTY, 16, 4, TRUE, NULL); | |
38 | } | |
39 | static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
40 | // Java: "\\uFFFF" hex, min=4, max=4 | |
41 | return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, FALSE, NULL); | |
42 | } | |
43 | static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
44 | // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 | |
45 | return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, TRUE, | |
46 | new EscapeTransliterator(EMPTY, BS_U, EMPTY, 16, 8, TRUE, NULL)); | |
47 | } | |
48 | static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
49 | // XML: "" hex, min=1, max=6 | |
50 | return new EscapeTransliterator(ID, XMLPRE, SEMI, 16, 1, TRUE, NULL); | |
51 | } | |
52 | static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
53 | // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") | |
54 | return new EscapeTransliterator(ID, XML10PRE, SEMI, 10, 1, TRUE, NULL); | |
55 | } | |
56 | static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { | |
57 | // Perl: "\\x{263A}" hex, min=1, max=6 | |
58 | return new EscapeTransliterator(ID, PERLPRE, RBRACE, 16, 1, TRUE, NULL); | |
59 | } | |
60 | ||
61 | /** | |
62 | * Registers standard variants with the system. Called by | |
63 | * Transliterator during initialization. | |
64 | */ | |
65 | void EscapeTransliterator::registerIDs() { | |
66 | Token t = integerToken(0); | |
67 | ||
68 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); | |
69 | ||
70 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); | |
71 | ||
72 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); | |
73 | ||
74 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); | |
75 | ||
76 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); | |
77 | ||
78 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); | |
79 | ||
80 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); | |
81 | } | |
82 | ||
83 | /** | |
84 | * Constructs an escape transliterator with the given ID and | |
85 | * parameters. See the class member documentation for details. | |
86 | */ | |
87 | EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, | |
88 | const UnicodeString& _prefix, const UnicodeString& _suffix, | |
89 | int32_t _radix, int32_t _minDigits, | |
90 | UBool _grokSupplementals, | |
91 | EscapeTransliterator* adoptedSupplementalHandler) : | |
92 | Transliterator(newID, NULL) | |
93 | { | |
94 | this->prefix = _prefix; | |
95 | this->suffix = _suffix; | |
96 | this->radix = _radix; | |
97 | this->minDigits = _minDigits; | |
98 | this->grokSupplementals = _grokSupplementals; | |
99 | this->supplementalHandler = adoptedSupplementalHandler; | |
100 | } | |
101 | ||
102 | /** | |
103 | * Copy constructor. | |
104 | */ | |
105 | EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : | |
106 | Transliterator(o), | |
107 | prefix(o.prefix), | |
108 | suffix(o.suffix), | |
109 | radix(o.radix), | |
110 | minDigits(o.minDigits), | |
111 | grokSupplementals(o.grokSupplementals) { | |
112 | supplementalHandler = (o.supplementalHandler != 0) ? | |
113 | new EscapeTransliterator(*o.supplementalHandler) : NULL; | |
114 | } | |
115 | ||
116 | EscapeTransliterator::~EscapeTransliterator() { | |
117 | delete supplementalHandler; | |
118 | } | |
119 | ||
120 | /** | |
121 | * Transliterator API. | |
122 | */ | |
123 | Transliterator* EscapeTransliterator::clone() const { | |
124 | return new EscapeTransliterator(*this); | |
125 | } | |
126 | ||
127 | /** | |
128 | * Implements {@link Transliterator#handleTransliterate}. | |
129 | */ | |
130 | void EscapeTransliterator::handleTransliterate(Replaceable& text, | |
131 | UTransPosition& pos, | |
132 | UBool /*isIncremental*/) const | |
133 | { | |
134 | /* TODO: Verify that isIncremental can be ignored */ | |
135 | int32_t start = pos.start; | |
136 | int32_t limit = pos.limit; | |
137 | ||
138 | UnicodeString buf(prefix); | |
139 | int32_t prefixLen = prefix.length(); | |
140 | UBool redoPrefix = FALSE; | |
141 | ||
142 | while (start < limit) { | |
143 | int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); | |
144 | int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1; | |
145 | ||
146 | if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { | |
147 | buf.truncate(0); | |
148 | buf.append(supplementalHandler->prefix); | |
149 | ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, | |
150 | supplementalHandler->minDigits); | |
151 | buf.append(supplementalHandler->suffix); | |
152 | redoPrefix = TRUE; | |
153 | } else { | |
154 | if (redoPrefix) { | |
155 | buf.truncate(0); | |
156 | buf.append(prefix); | |
157 | redoPrefix = FALSE; | |
158 | } else { | |
159 | buf.truncate(prefixLen); | |
160 | } | |
161 | ICU_Utility::appendNumber(buf, c, radix, minDigits); | |
162 | buf.append(suffix); | |
163 | } | |
164 | ||
165 | text.handleReplaceBetween(start, start + charLen, buf); | |
166 | start += buf.length(); | |
167 | limit += buf.length() - charLen; | |
168 | } | |
169 | ||
170 | pos.contextLimit += limit - pos.limit; | |
171 | pos.limit = limit; | |
172 | pos.start = start; | |
173 | } | |
174 | ||
175 | U_NAMESPACE_END | |
176 | ||
177 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
178 | ||
179 | //eof |