2 **********************************************************************
3 * Copyright (c) 2001-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/19/2001 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
15 #include "unicode/utf16.h"
21 static const UChar UNIPRE
[] = {85,43,0}; // "U+"
22 static const UChar BS_u
[] = {92,117,0}; // "\\u"
23 static const UChar BS_U
[] = {92,85,0}; // "\\U"
24 static const UChar XMLPRE
[] = {38,35,120,0}; // "&#x"
25 static const UChar XML10PRE
[] = {38,35,0}; // "&#"
26 static const UChar PERLPRE
[] = {92,120,123,0}; // "\\x{"
27 static const UChar SEMI
[] = {59,0}; // ";"
28 static const UChar RBRACE
[] = {125,0}; // "}"
30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator
)
35 static Transliterator
* _createEscUnicode(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
36 // Unicode: "U+10FFFF" hex, min=4, max=6
37 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, UNIPRE
, 2), UnicodeString(), 16, 4, TRUE
, NULL
);
39 static Transliterator
* _createEscJava(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
40 // Java: "\\uFFFF" hex, min=4, max=4
41 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, BS_u
, 2), UnicodeString(), 16, 4, FALSE
, NULL
);
43 static Transliterator
* _createEscC(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
44 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
45 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, BS_u
, 2), UnicodeString(), 16, 4, TRUE
,
46 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE
, BS_U
, 2), UnicodeString(), 16, 8, TRUE
, NULL
));
48 static Transliterator
* _createEscXML(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
49 // XML: "" hex, min=1, max=6
50 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, XMLPRE
, 3), UnicodeString(SEMI
[0]), 16, 1, TRUE
, NULL
);
52 static Transliterator
* _createEscXML10(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
53 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
54 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, XML10PRE
, 2), UnicodeString(SEMI
[0]), 10, 1, TRUE
, NULL
);
56 static Transliterator
* _createEscPerl(const UnicodeString
& ID
, Transliterator::Token
/*context*/) {
57 // Perl: "\\x{263A}" hex, min=1, max=6
58 return new EscapeTransliterator(ID
, UnicodeString(TRUE
, PERLPRE
, 3), UnicodeString(RBRACE
[0]), 16, 1, TRUE
, NULL
);
62 * Registers standard variants with the system. Called by
63 * Transliterator during initialization.
65 void EscapeTransliterator::registerIDs() {
66 Token t
= integerToken(0);
68 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode
, t
);
70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava
, t
);
72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC
, t
);
74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML
, t
);
76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10
, t
);
78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl
, t
);
80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava
, t
);
84 * Constructs an escape transliterator with the given ID and
85 * parameters. See the class member documentation for details.
87 EscapeTransliterator::EscapeTransliterator(const UnicodeString
& newID
,
88 const UnicodeString
& _prefix
, const UnicodeString
& _suffix
,
89 int32_t _radix
, int32_t _minDigits
,
90 UBool _grokSupplementals
,
91 EscapeTransliterator
* adoptedSupplementalHandler
) :
92 Transliterator(newID
, NULL
)
94 this->prefix
= _prefix
;
95 this->suffix
= _suffix
;
97 this->minDigits
= _minDigits
;
98 this->grokSupplementals
= _grokSupplementals
;
99 this->supplementalHandler
= adoptedSupplementalHandler
;
105 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator
& o
) :
110 minDigits(o
.minDigits
),
111 grokSupplementals(o
.grokSupplementals
) {
112 supplementalHandler
= (o
.supplementalHandler
!= 0) ?
113 new EscapeTransliterator(*o
.supplementalHandler
) : NULL
;
116 EscapeTransliterator::~EscapeTransliterator() {
117 delete supplementalHandler
;
121 * Transliterator API.
123 Transliterator
* EscapeTransliterator::clone() const {
124 return new EscapeTransliterator(*this);
128 * Implements {@link Transliterator#handleTransliterate}.
130 void EscapeTransliterator::handleTransliterate(Replaceable
& text
,
132 UBool
/*isIncremental*/) const
134 /* TODO: Verify that isIncremental can be ignored */
135 int32_t start
= pos
.start
;
136 int32_t limit
= pos
.limit
;
138 UnicodeString
buf(prefix
);
139 int32_t prefixLen
= prefix
.length();
140 UBool redoPrefix
= FALSE
;
142 while (start
< limit
) {
143 int32_t c
= grokSupplementals
? text
.char32At(start
) : text
.charAt(start
);
144 int32_t charLen
= grokSupplementals
? U16_LENGTH(c
) : 1;
146 if ((c
& 0xFFFF0000) != 0 && supplementalHandler
!= NULL
) {
148 buf
.append(supplementalHandler
->prefix
);
149 ICU_Utility::appendNumber(buf
, c
, supplementalHandler
->radix
,
150 supplementalHandler
->minDigits
);
151 buf
.append(supplementalHandler
->suffix
);
159 buf
.truncate(prefixLen
);
161 ICU_Utility::appendNumber(buf
, c
, radix
, minDigits
);
165 text
.handleReplaceBetween(start
, start
+ charLen
, buf
);
166 start
+= buf
.length();
167 limit
+= buf
.length() - charLen
;
170 pos
.contextLimit
+= limit
- pos
.limit
;
177 #endif /* #if !UCONFIG_NO_TRANSLITERATION */