2 *******************************************************************************
4 * Copyright (C) 2003-2005, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_TRANSLITERATION
23 #include "unicode/resbund.h"
24 #include "unicode/uniset.h"
34 const char NamePrepTransform::fgClassID
=0;
37 NamePrepTransform
* NamePrepTransform::createInstance(UParseError
& parseError
, UErrorCode
& status
){
38 NamePrepTransform
* transform
= new NamePrepTransform(parseError
, status
);
39 if(U_FAILURE(status
)){
47 NamePrepTransform::NamePrepTransform(UParseError
& parseError
, UErrorCode
& status
)
48 : unassigned(), prohibited(), labelSeparatorSet(){
54 const char* testDataName
= IntlTest::loadTestData(status
);
56 if(U_FAILURE(status
)){
60 bundle
= ures_openDirect(testDataName
,"idna_rules",&status
);
62 if(bundle
!= NULL
&& U_SUCCESS(status
)){
63 // create the mapping transliterator
65 const UChar
* ruleUChar
= ures_getStringByKey(bundle
, "MapNFKC",&ruleLen
, &status
);
66 int32_t mapRuleLen
= 0;
67 const UChar
*mapRuleUChar
= ures_getStringByKey(bundle
, "MapNoNormalization", &mapRuleLen
, &status
);
68 UnicodeString
rule(mapRuleUChar
, mapRuleLen
);
69 rule
.append(ruleUChar
, ruleLen
);
71 mapping
= Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule
,
72 UTRANS_FORWARD
, parseError
,status
);
73 if(U_FAILURE(status
)) {
77 //create the unassigned set
78 int32_t patternLen
=0;
79 const UChar
* pattern
= ures_getStringByKey(bundle
,"UnassignedSet",&patternLen
, &status
);
80 unassigned
.applyPattern(UnicodeString(pattern
, patternLen
), status
);
82 //create prohibited set
84 pattern
= ures_getStringByKey(bundle
,"ProhibitedSet",&patternLen
, &status
);
85 UnicodeString
test(pattern
,patternLen
);
86 prohibited
.applyPattern(test
,status
);
88 if(U_FAILURE(status
)){
89 printf("Construction of Unicode set failed\n");
92 if(U_SUCCESS(status
)){
93 if(prohibited
.contains((UChar
) 0x644)){
94 printf("The string contains 0x644 ... damn !!\n");
97 prohibited
.toPattern(temp
,TRUE
);
99 for(int32_t i
=0;i
<temp
.length();i
++){
100 printf("%c", (char)temp
.charAt(i
));
106 //create label separator set
108 pattern
= ures_getStringByKey(bundle
,"LabelSeparatorSet",&patternLen
, &status
);
109 labelSeparatorSet
.applyPattern(UnicodeString(pattern
,patternLen
),status
);
112 if(U_SUCCESS(status
) &&
115 status
= U_MEMORY_ALLOCATION_ERROR
;
125 UBool
NamePrepTransform::isProhibited(UChar32 ch
){
126 return (UBool
)(ch
!= ASCII_SPACE
);
129 NamePrepTransform::~NamePrepTransform(){
139 int32_t NamePrepTransform::map(const UChar
* src
, int32_t srcLength
,
140 UChar
* dest
, int32_t destCapacity
,
141 UBool allowUnassigned
,
142 UParseError
* /*parseError*/,
143 UErrorCode
& status
){
145 if(U_FAILURE(status
)){
149 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
150 status
=U_ILLEGAL_ARGUMENT_ERROR
;
154 UnicodeString
rsource(src
,srcLength
);
155 // map the code points
156 // transliteration also performs NFKC
157 mapping
->transliterate(rsource
);
159 const UChar
* buffer
= rsource
.getBuffer();
160 int32_t bufLen
= rsource
.length();
161 // check if unassigned
162 if(allowUnassigned
== FALSE
){
165 for(;bufIndex
<bufLen
;){
166 U16_NEXT(buffer
, bufIndex
, bufLen
, ch
);
167 if(unassigned
.contains(ch
)){
168 status
= U_IDNA_UNASSIGNED_ERROR
;
173 // check if there is enough room in the output
174 if(bufLen
< destCapacity
){
175 uprv_memcpy(dest
,buffer
,bufLen
*U_SIZEOF_UCHAR
);
178 return u_terminateUChars(dest
, destCapacity
, bufLen
, &status
);
182 #define MAX_BUFFER_SIZE 300
184 int32_t NamePrepTransform::process( const UChar
* src
, int32_t srcLength
,
185 UChar
* dest
, int32_t destCapacity
,
186 UBool allowUnassigned
,
187 UParseError
* parseError
,
188 UErrorCode
& status
){
189 // check error status
190 if(U_FAILURE(status
)){
195 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
196 status
=U_ILLEGAL_ARGUMENT_ERROR
;
200 UChar b1Stack
[MAX_BUFFER_SIZE
];
202 int32_t b1Len
,b1Capacity
= MAX_BUFFER_SIZE
;
205 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
206 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
208 b1Len
= map(src
,srcLength
, b1
, b1Capacity
,allowUnassigned
,parseError
, status
);
210 if(status
== U_BUFFER_OVERFLOW_ERROR
){
211 // redo processing of string
212 /* we do not have enough room so grow the buffer*/
213 if(!u_growBufferFromStatic(b1Stack
,&b1
,&b1Capacity
,b1Len
,0)){
214 status
= U_MEMORY_ALLOCATION_ERROR
;
218 status
= U_ZERO_ERROR
; // reset error
220 b1Len
= map(src
,srcLength
, b1
, b1Len
,allowUnassigned
, parseError
, status
);
224 if(U_FAILURE(status
)){
229 for(; b1Index
<b1Len
; ){
233 U16_NEXT(b1
, b1Index
, b1Len
, ch
);
235 if(prohibited
.contains(ch
) && ch
!=0x0020){
236 status
= U_IDNA_PROHIBITED_ERROR
;
240 direction
= u_charDirection(ch
);
241 if(firstCharDir
==U_CHAR_DIRECTION_COUNT
){
242 firstCharDir
= direction
;
244 if(direction
== U_LEFT_TO_RIGHT
){
247 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
253 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
254 status
= U_IDNA_CHECK_BIDI_ERROR
;
259 if( rightToLeft
== TRUE
&&
260 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
261 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
263 status
= U_IDNA_CHECK_BIDI_ERROR
;
267 if(b1Len
<= destCapacity
){
268 uprv_memmove(dest
,b1
, b1Len
*U_SIZEOF_UCHAR
);
276 return u_terminateUChars(dest
, destCapacity
, b1Len
, &status
);
279 UBool
NamePrepTransform::isLabelSeparator(UChar32 ch
, UErrorCode
& status
){
280 // check error status
281 if(U_FAILURE(status
)){
285 return labelSeparatorSet
.contains(ch
);
288 #endif /* #if !UCONFIG_NO_IDNA */
289 #endif /* #if !UCONFIG_NO_TRANSLITERATION */