2 *******************************************************************************
4 * Copyright (C) 2003-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_TRANSLITERATION
23 #include "unicode/resbund.h"
24 #include "unicode/uniset.h"
34 const char NamePrepTransform::fgClassID
=0;
37 NamePrepTransform
* NamePrepTransform::createInstance(UParseError
& parseError
, UErrorCode
& status
){
38 NamePrepTransform
* transform
= new NamePrepTransform(parseError
, status
);
39 if(U_FAILURE(status
)){
47 NamePrepTransform::NamePrepTransform(UParseError
& parseError
, UErrorCode
& status
)
48 : unassigned(), prohibited(), labelSeparatorSet(){
54 const char* testDataName
= IntlTest::loadTestData(status
);
56 if(U_FAILURE(status
)){
60 bundle
= ures_openDirect(testDataName
,"idna_rules",&status
);
62 if(bundle
!= NULL
&& U_SUCCESS(status
)){
63 // create the mapping transliterator
65 const UChar
* ruleUChar
= ures_getStringByKey(bundle
, "MapNFKC",&ruleLen
, &status
);
66 int32_t mapRuleLen
= 0;
67 const UChar
*mapRuleUChar
= ures_getStringByKey(bundle
, "MapNoNormalization", &mapRuleLen
, &status
);
68 UnicodeString
rule(mapRuleUChar
, mapRuleLen
);
69 rule
.append(ruleUChar
, ruleLen
);
71 mapping
= Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule
,
72 UTRANS_FORWARD
, parseError
,status
);
73 if(U_FAILURE(status
)) {
77 //create the unassigned set
78 int32_t patternLen
=0;
79 const UChar
* pattern
= ures_getStringByKey(bundle
,"UnassignedSet",&patternLen
, &status
);
80 unassigned
.applyPattern(UnicodeString(pattern
, patternLen
), status
);
82 //create prohibited set
84 pattern
= ures_getStringByKey(bundle
,"ProhibitedSet",&patternLen
, &status
);
85 UnicodeString
test(pattern
,patternLen
);
86 prohibited
.applyPattern(test
,status
);
88 if(U_FAILURE(status
)){
89 printf("Construction of Unicode set failed\n");
92 if(U_SUCCESS(status
)){
93 if(prohibited
.contains((UChar
) 0x644)){
94 printf("The string contains 0x644 ... damn !!\n");
97 prohibited
.toPattern(temp
,TRUE
);
99 for(int32_t i
=0;i
<temp
.length();i
++){
100 printf("%c", (char)temp
.charAt(i
));
106 //create label separator set
108 pattern
= ures_getStringByKey(bundle
,"LabelSeparatorSet",&patternLen
, &status
);
109 labelSeparatorSet
.applyPattern(UnicodeString(pattern
,patternLen
),status
);
112 if(U_SUCCESS(status
) &&
115 status
= U_MEMORY_ALLOCATION_ERROR
;
125 UBool
NamePrepTransform::isProhibited(UChar32 ch
){
126 return (UBool
)(ch
!= ASCII_SPACE
);
129 NamePrepTransform::~NamePrepTransform(){
139 int32_t NamePrepTransform::map(const UChar
* src
, int32_t srcLength
,
140 UChar
* dest
, int32_t destCapacity
,
141 UBool allowUnassigned
,
142 UParseError
* /*parseError*/,
143 UErrorCode
& status
){
145 if(U_FAILURE(status
)){
149 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
150 status
=U_ILLEGAL_ARGUMENT_ERROR
;
154 UnicodeString
rsource(src
,srcLength
);
155 // map the code points
156 // transliteration also performs NFKC
157 mapping
->transliterate(rsource
);
159 const UChar
* buffer
= rsource
.getBuffer();
160 int32_t bufLen
= rsource
.length();
161 // check if unassigned
162 if(allowUnassigned
== FALSE
){
165 for(;bufIndex
<bufLen
;){
166 U16_NEXT(buffer
, bufIndex
, bufLen
, ch
);
167 if(unassigned
.contains(ch
)){
168 status
= U_IDNA_UNASSIGNED_ERROR
;
173 // check if there is enough room in the output
174 if(bufLen
< destCapacity
){
175 uprv_memcpy(dest
,buffer
,bufLen
*U_SIZEOF_UCHAR
);
178 return u_terminateUChars(dest
, destCapacity
, bufLen
, &status
);
182 #define MAX_BUFFER_SIZE 300
184 int32_t NamePrepTransform::process( const UChar
* src
, int32_t srcLength
,
185 UChar
* dest
, int32_t destCapacity
,
186 UBool allowUnassigned
,
187 UParseError
* parseError
,
188 UErrorCode
& status
){
189 // check error status
190 if(U_FAILURE(status
)){
195 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
196 status
=U_ILLEGAL_ARGUMENT_ERROR
;
200 UnicodeString b1String
;
201 UChar
*b1
= b1String
.getBuffer(MAX_BUFFER_SIZE
);
205 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
206 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
208 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
209 b1String
.releaseBuffer(b1Len
);
211 if(status
== U_BUFFER_OVERFLOW_ERROR
){
212 // redo processing of string
213 /* we do not have enough room so grow the buffer*/
214 b1
= b1String
.getBuffer(b1Len
);
215 status
= U_ZERO_ERROR
; // reset error
216 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
217 b1String
.releaseBuffer(b1Len
);
220 if(U_FAILURE(status
)){
226 for(; b1Index
<b1Len
; ){
230 U16_NEXT(b1
, b1Index
, b1Len
, ch
);
232 if(prohibited
.contains(ch
) && ch
!=0x0020){
233 status
= U_IDNA_PROHIBITED_ERROR
;
238 direction
= u_charDirection(ch
);
239 if(firstCharDir
==U_CHAR_DIRECTION_COUNT
){
240 firstCharDir
= direction
;
242 if(direction
== U_LEFT_TO_RIGHT
){
245 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
251 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
252 status
= U_IDNA_CHECK_BIDI_ERROR
;
258 if( rightToLeft
== TRUE
&&
259 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
260 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
262 status
= U_IDNA_CHECK_BIDI_ERROR
;
266 if(b1Len
<= destCapacity
){
267 uprv_memmove(dest
,b1
, b1Len
*U_SIZEOF_UCHAR
);
271 return u_terminateUChars(dest
, destCapacity
, b1Len
, &status
);
274 UBool
NamePrepTransform::isLabelSeparator(UChar32 ch
, UErrorCode
& status
){
275 // check error status
276 if(U_FAILURE(status
)){
280 return labelSeparatorSet
.contains(ch
);
283 #endif /* #if !UCONFIG_NO_IDNA */
284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */