1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: nptrans.h
12 * tab size: 8 (not used)
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_TRANSLITERATION
25 #include "unicode/resbund.h"
26 #include "unicode/uniset.h"
36 const char NamePrepTransform::fgClassID
=0;
39 NamePrepTransform
* NamePrepTransform::createInstance(UParseError
& parseError
, UErrorCode
& status
){
40 NamePrepTransform
* transform
= new NamePrepTransform(parseError
, status
);
41 if(U_FAILURE(status
)){
49 NamePrepTransform::NamePrepTransform(UParseError
& parseError
, UErrorCode
& status
)
50 : unassigned(), prohibited(), labelSeparatorSet(){
56 const char* testDataName
= IntlTest::loadTestData(status
);
58 if(U_FAILURE(status
)){
62 bundle
= ures_openDirect(testDataName
,"idna_rules",&status
);
64 if(bundle
!= NULL
&& U_SUCCESS(status
)){
65 // create the mapping transliterator
67 const UChar
* ruleUChar
= ures_getStringByKey(bundle
, "MapNFKC",&ruleLen
, &status
);
68 int32_t mapRuleLen
= 0;
69 const UChar
*mapRuleUChar
= ures_getStringByKey(bundle
, "MapNoNormalization", &mapRuleLen
, &status
);
70 UnicodeString
rule(mapRuleUChar
, mapRuleLen
);
71 rule
.append(ruleUChar
, ruleLen
);
73 mapping
= Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule
,
74 UTRANS_FORWARD
, parseError
,status
);
75 if(U_FAILURE(status
)) {
79 //create the unassigned set
80 int32_t patternLen
=0;
81 const UChar
* pattern
= ures_getStringByKey(bundle
,"UnassignedSet",&patternLen
, &status
);
82 unassigned
.applyPattern(UnicodeString(pattern
, patternLen
), status
);
84 //create prohibited set
86 pattern
= ures_getStringByKey(bundle
,"ProhibitedSet",&patternLen
, &status
);
87 UnicodeString
test(pattern
,patternLen
);
88 prohibited
.applyPattern(test
,status
);
90 if(U_FAILURE(status
)){
91 printf("Construction of Unicode set failed\n");
94 if(U_SUCCESS(status
)){
95 if(prohibited
.contains((UChar
) 0x644)){
96 printf("The string contains 0x644 ... !!\n");
99 prohibited
.toPattern(temp
,TRUE
);
101 for(int32_t i
=0;i
<temp
.length();i
++){
102 printf("%c", (char)temp
.charAt(i
));
108 //create label separator set
110 pattern
= ures_getStringByKey(bundle
,"LabelSeparatorSet",&patternLen
, &status
);
111 labelSeparatorSet
.applyPattern(UnicodeString(pattern
,patternLen
),status
);
114 if(U_SUCCESS(status
) &&
117 status
= U_MEMORY_ALLOCATION_ERROR
;
127 UBool
NamePrepTransform::isProhibited(UChar32 ch
){
128 return (UBool
)(ch
!= ASCII_SPACE
);
131 NamePrepTransform::~NamePrepTransform(){
141 int32_t NamePrepTransform::map(const UChar
* src
, int32_t srcLength
,
142 UChar
* dest
, int32_t destCapacity
,
143 UBool allowUnassigned
,
144 UParseError
* /*parseError*/,
145 UErrorCode
& status
){
147 if(U_FAILURE(status
)){
151 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
152 status
=U_ILLEGAL_ARGUMENT_ERROR
;
156 UnicodeString
rsource(src
,srcLength
);
157 // map the code points
158 // transliteration also performs NFKC
159 mapping
->transliterate(rsource
);
161 const UChar
* buffer
= rsource
.getBuffer();
162 int32_t bufLen
= rsource
.length();
163 // check if unassigned
164 if(allowUnassigned
== FALSE
){
167 for(;bufIndex
<bufLen
;){
168 U16_NEXT(buffer
, bufIndex
, bufLen
, ch
);
169 if(unassigned
.contains(ch
)){
170 status
= U_IDNA_UNASSIGNED_ERROR
;
175 // check if there is enough room in the output
176 if(bufLen
< destCapacity
){
177 u_memcpy(dest
, buffer
, bufLen
);
180 return u_terminateUChars(dest
, destCapacity
, bufLen
, &status
);
184 #define MAX_BUFFER_SIZE 300
186 int32_t NamePrepTransform::process( const UChar
* src
, int32_t srcLength
,
187 UChar
* dest
, int32_t destCapacity
,
188 UBool allowUnassigned
,
189 UParseError
* parseError
,
190 UErrorCode
& status
){
191 // check error status
192 if(U_FAILURE(status
)){
197 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
198 status
=U_ILLEGAL_ARGUMENT_ERROR
;
202 UnicodeString b1String
;
203 UChar
*b1
= b1String
.getBuffer(MAX_BUFFER_SIZE
);
207 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
208 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
210 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
211 b1String
.releaseBuffer(b1Len
);
213 if(status
== U_BUFFER_OVERFLOW_ERROR
){
214 // redo processing of string
215 /* we do not have enough room so grow the buffer*/
216 b1
= b1String
.getBuffer(b1Len
);
217 status
= U_ZERO_ERROR
; // reset error
218 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
219 b1String
.releaseBuffer(b1Len
);
222 if(U_FAILURE(status
)){
228 for(; b1Index
<b1Len
; ){
232 U16_NEXT(b1
, b1Index
, b1Len
, ch
);
234 if(prohibited
.contains(ch
) && ch
!=0x0020){
235 status
= U_IDNA_PROHIBITED_ERROR
;
240 direction
= u_charDirection(ch
);
241 if(firstCharDir
==U_CHAR_DIRECTION_COUNT
){
242 firstCharDir
= direction
;
244 if(direction
== U_LEFT_TO_RIGHT
){
247 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
253 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
254 status
= U_IDNA_CHECK_BIDI_ERROR
;
260 if( rightToLeft
== TRUE
&&
261 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
262 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
264 status
= U_IDNA_CHECK_BIDI_ERROR
;
268 if(b1Len
<= destCapacity
){
269 u_memmove(dest
, b1
, b1Len
);
273 return u_terminateUChars(dest
, destCapacity
, b1Len
, &status
);
276 UBool
NamePrepTransform::isLabelSeparator(UChar32 ch
, UErrorCode
& status
){
277 // check error status
278 if(U_FAILURE(status
)){
282 return labelSeparatorSet
.contains(ch
);
285 #endif /* #if !UCONFIG_NO_IDNA */
286 #endif /* #if !UCONFIG_NO_TRANSLITERATION */