2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_TRANSLITERATION
23 #include "unicode/resbund.h"
24 #include "unicode/uniset.h"
34 const char NamePrepTransform::fgClassID
=0;
37 NamePrepTransform
* NamePrepTransform::createInstance(UParseError
& parseError
, UErrorCode
& status
){
38 NamePrepTransform
* transform
= new NamePrepTransform(parseError
, status
);
39 if(U_FAILURE(status
)){
47 NamePrepTransform::NamePrepTransform(UParseError
& parseError
, UErrorCode
& status
)
48 : unassigned(), prohibited(), labelSeparatorSet(){
54 const char* testDataName
= IntlTest::loadTestData(status
);
56 if(U_FAILURE(status
)){
60 bundle
= ures_openDirect(testDataName
,"idna_rules",&status
);
62 if(bundle
!= NULL
&& U_SUCCESS(status
)){
63 // create the mapping transliterator
65 const UChar
* ruleUChar
= ures_getStringByKey(bundle
, "MapNFKC",&ruleLen
, &status
);
66 int32_t mapRuleLen
= 0;
67 const UChar
*mapRuleUChar
= ures_getStringByKey(bundle
, "MapNoNormalization", &mapRuleLen
, &status
);
68 UnicodeString
rule(mapRuleUChar
, mapRuleLen
);
69 rule
.append(ruleUChar
, ruleLen
);
71 mapping
= Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule
,
72 UTRANS_FORWARD
, parseError
,status
);
73 if(U_FAILURE(status
)) {
77 //create the unassigned set
78 int32_t patternLen
=0;
79 const UChar
* pattern
= ures_getStringByKey(bundle
,"UnassignedSet",&patternLen
, &status
);
80 unassigned
.applyPattern(UnicodeString(pattern
, patternLen
), status
);
82 //create prohibited set
84 pattern
= ures_getStringByKey(bundle
,"ProhibitedSet",&patternLen
, &status
);
85 UnicodeString
test(pattern
,patternLen
);
86 prohibited
.applyPattern(test
,status
);
88 if(U_FAILURE(status
)){
89 printf("Construction of Unicode set failed\n");
92 if(U_SUCCESS(status
)){
93 if(prohibited
.contains((UChar
) 0x644)){
94 printf("The string contains 0x644 ... damn !!\n");
97 prohibited
.toPattern(temp
,TRUE
);
99 for(int32_t i
=0;i
<temp
.length();i
++){
100 printf("%c", (char)temp
.charAt(i
));
106 //create label separator set
108 pattern
= ures_getStringByKey(bundle
,"LabelSeparatorSet",&patternLen
, &status
);
109 labelSeparatorSet
.applyPattern(UnicodeString(pattern
,patternLen
),status
);
112 if(U_SUCCESS(status
) &&
115 status
= U_MEMORY_ALLOCATION_ERROR
;
125 UBool
NamePrepTransform::isProhibited(UChar32 ch
){
126 return (UBool
)(ch
!= ASCII_SPACE
);
129 NamePrepTransform::~NamePrepTransform(){
139 int32_t NamePrepTransform::map(const UChar
* src
, int32_t srcLength
,
140 UChar
* dest
, int32_t destCapacity
,
141 UBool allowUnassigned
,
142 UParseError
* /*parseError*/,
143 UErrorCode
& status
){
145 if(U_FAILURE(status
)){
149 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
150 status
=U_ILLEGAL_ARGUMENT_ERROR
;
154 UnicodeString
rsource(src
,srcLength
);
155 // map the code points
156 // transliteration also performs NFKC
157 mapping
->transliterate(rsource
);
159 const UChar
* buffer
= rsource
.getBuffer();
160 int32_t bufLen
= rsource
.length();
161 // check if unassigned
162 if(allowUnassigned
== FALSE
){
165 for(;bufIndex
<bufLen
;){
166 U16_NEXT(buffer
, bufIndex
, bufLen
, ch
);
167 if(unassigned
.contains(ch
)){
168 status
= U_IDNA_UNASSIGNED_ERROR
;
169 rsource
.releaseBuffer();
174 // check if there is enough room in the output
175 if(bufLen
< destCapacity
){
176 uprv_memcpy(dest
,buffer
,bufLen
*U_SIZEOF_UCHAR
);
179 return u_terminateUChars(dest
, destCapacity
, bufLen
, &status
);
183 #define MAX_BUFFER_SIZE 300
185 int32_t NamePrepTransform::process( const UChar
* src
, int32_t srcLength
,
186 UChar
* dest
, int32_t destCapacity
,
187 UBool allowUnassigned
,
188 UParseError
* parseError
,
189 UErrorCode
& status
){
190 // check error status
191 if(U_FAILURE(status
)){
196 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
197 status
=U_ILLEGAL_ARGUMENT_ERROR
;
201 UChar b1Stack
[MAX_BUFFER_SIZE
];
203 int32_t b1Len
,b1Capacity
= MAX_BUFFER_SIZE
;
206 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
207 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
209 b1Len
= map(src
,srcLength
, b1
, b1Capacity
,allowUnassigned
,parseError
, status
);
211 if(status
== U_BUFFER_OVERFLOW_ERROR
){
212 // redo processing of string
213 /* we do not have enough room so grow the buffer*/
214 if(!u_growBufferFromStatic(b1Stack
,&b1
,&b1Capacity
,b1Len
,0)){
215 status
= U_MEMORY_ALLOCATION_ERROR
;
219 status
= U_ZERO_ERROR
; // reset error
221 b1Len
= map(src
,srcLength
, b1
, b1Len
,allowUnassigned
, parseError
, status
);
225 if(U_FAILURE(status
)){
230 for(; b1Index
<b1Len
; ){
234 U16_NEXT(b1
, b1Index
, b1Len
, ch
);
236 if(prohibited
.contains(ch
) && ch
!=0x0020){
237 status
= U_IDNA_PROHIBITED_ERROR
;
241 direction
= u_charDirection(ch
);
242 if(firstCharDir
==U_CHAR_DIRECTION_COUNT
){
243 firstCharDir
= direction
;
245 if(direction
== U_LEFT_TO_RIGHT
){
248 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
254 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
255 status
= U_IDNA_CHECK_BIDI_ERROR
;
260 if( rightToLeft
== TRUE
&&
261 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
262 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
264 status
= U_IDNA_CHECK_BIDI_ERROR
;
268 if(b1Len
<= destCapacity
){
269 uprv_memmove(dest
,b1
, b1Len
*U_SIZEOF_UCHAR
);
277 return u_terminateUChars(dest
, destCapacity
, b1Len
, &status
);
280 UBool
NamePrepTransform::isLabelSeparator(UChar32 ch
, UErrorCode
& status
){
281 // check error status
282 if(U_FAILURE(status
)){
286 return labelSeparatorSet
.contains(ch
);
289 #endif /* #if !UCONFIG_NO_IDNA */
290 #endif /* #if !UCONFIG_NO_TRANSLITERATION */