1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: nptrans.h
12 * tab size: 8 (not used)
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_TRANSLITERATION
25 #include "unicode/resbund.h"
26 #include "unicode/uniset.h"
36 const char NamePrepTransform::fgClassID
=0;
39 NamePrepTransform
* NamePrepTransform::createInstance(UParseError
& parseError
, UErrorCode
& status
){
40 NamePrepTransform
* transform
= new NamePrepTransform(parseError
, status
);
41 if(U_FAILURE(status
)){
49 NamePrepTransform::NamePrepTransform(UParseError
& parseError
, UErrorCode
& status
)
50 : mapping(nullptr), unassigned(), prohibited(), labelSeparatorSet(), bundle(nullptr) {
52 LocalPointer
<Transliterator
> lmapping
;
53 LocalUResourceBundlePointer lbundle
;
55 const char* testDataName
= IntlTest::loadTestData(status
);
57 if(U_FAILURE(status
)){
61 lbundle
.adoptInstead(ures_openDirect(testDataName
,"idna_rules",&status
));
63 if(lbundle
.isValid() && U_SUCCESS(status
)){
64 // create the mapping transliterator
66 const UChar
* ruleUChar
= ures_getStringByKey(lbundle
.getAlias(), "MapNFKC",&ruleLen
, &status
);
67 int32_t mapRuleLen
= 0;
68 const UChar
*mapRuleUChar
= ures_getStringByKey(lbundle
.getAlias(), "MapNoNormalization", &mapRuleLen
, &status
);
69 UnicodeString
rule(mapRuleUChar
, mapRuleLen
);
70 rule
.append(ruleUChar
, ruleLen
);
72 lmapping
.adoptInstead( Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule
,
73 UTRANS_FORWARD
, parseError
,status
));
74 if(U_FAILURE(status
)) {
78 //create the unassigned set
79 int32_t patternLen
=0;
80 const UChar
* pattern
= ures_getStringByKey(lbundle
.getAlias(),"UnassignedSet",&patternLen
, &status
);
81 unassigned
.applyPattern(UnicodeString(pattern
, patternLen
), status
);
83 //create prohibited set
85 pattern
= ures_getStringByKey(lbundle
.getAlias(),"ProhibitedSet",&patternLen
, &status
);
86 UnicodeString
test(pattern
,patternLen
);
87 prohibited
.applyPattern(test
,status
);
89 if(U_FAILURE(status
)){
90 printf("Construction of Unicode set failed\n");
93 if(U_SUCCESS(status
)){
94 if(prohibited
.contains((UChar
) 0x644)){
95 printf("The string contains 0x644 ... !!\n");
98 prohibited
.toPattern(temp
,TRUE
);
100 for(int32_t i
=0;i
<temp
.length();i
++){
101 printf("%c", (char)temp
.charAt(i
));
107 //create label separator set
109 pattern
= ures_getStringByKey(lbundle
.getAlias(), "LabelSeparatorSet", &patternLen
, &status
);
110 labelSeparatorSet
.applyPattern(UnicodeString(pattern
,patternLen
),status
);
113 if(U_SUCCESS(status
) && (lmapping
.isNull())) {
114 status
= U_MEMORY_ALLOCATION_ERROR
;
116 if (U_FAILURE(status
)) {
119 mapping
= lmapping
.orphan();
120 bundle
= lbundle
.orphan();
124 UBool
NamePrepTransform::isProhibited(UChar32 ch
){
125 return (UBool
)(ch
!= ASCII_SPACE
);
128 NamePrepTransform::~NamePrepTransform(){
138 int32_t NamePrepTransform::map(const UChar
* src
, int32_t srcLength
,
139 UChar
* dest
, int32_t destCapacity
,
140 UBool allowUnassigned
,
141 UParseError
* /*parseError*/,
142 UErrorCode
& status
){
144 if(U_FAILURE(status
)){
148 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
149 status
=U_ILLEGAL_ARGUMENT_ERROR
;
153 UnicodeString
rsource(src
,srcLength
);
154 // map the code points
155 // transliteration also performs NFKC
156 mapping
->transliterate(rsource
);
158 const UChar
* buffer
= rsource
.getBuffer();
159 int32_t bufLen
= rsource
.length();
160 // check if unassigned
161 if(allowUnassigned
== FALSE
){
164 for(;bufIndex
<bufLen
;){
165 U16_NEXT(buffer
, bufIndex
, bufLen
, ch
);
166 if(unassigned
.contains(ch
)){
167 status
= U_IDNA_UNASSIGNED_ERROR
;
172 // check if there is enough room in the output
173 if(bufLen
< destCapacity
){
174 u_memcpy(dest
, buffer
, bufLen
);
177 return u_terminateUChars(dest
, destCapacity
, bufLen
, &status
);
181 #define MAX_BUFFER_SIZE 300
183 int32_t NamePrepTransform::process( const UChar
* src
, int32_t srcLength
,
184 UChar
* dest
, int32_t destCapacity
,
185 UBool allowUnassigned
,
186 UParseError
* parseError
,
187 UErrorCode
& status
){
188 // check error status
189 if(U_FAILURE(status
)){
194 if(src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
195 status
=U_ILLEGAL_ARGUMENT_ERROR
;
199 UnicodeString b1String
;
200 UChar
*b1
= b1String
.getBuffer(MAX_BUFFER_SIZE
);
204 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
205 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
207 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
208 b1String
.releaseBuffer(b1Len
);
210 if(status
== U_BUFFER_OVERFLOW_ERROR
){
211 // redo processing of string
212 /* we do not have enough room so grow the buffer*/
213 b1
= b1String
.getBuffer(b1Len
);
214 status
= U_ZERO_ERROR
; // reset error
215 b1Len
= map(src
, srcLength
, b1
, b1String
.getCapacity(), allowUnassigned
, parseError
, status
);
216 b1String
.releaseBuffer(b1Len
);
219 if(U_FAILURE(status
)){
225 for(; b1Index
<b1Len
; ){
229 U16_NEXT(b1
, b1Index
, b1Len
, ch
);
231 if(prohibited
.contains(ch
) && ch
!=0x0020){
232 status
= U_IDNA_PROHIBITED_ERROR
;
237 direction
= u_charDirection(ch
);
238 if(firstCharDir
==U_CHAR_DIRECTION_COUNT
){
239 firstCharDir
= direction
;
241 if(direction
== U_LEFT_TO_RIGHT
){
244 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
250 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
251 status
= U_IDNA_CHECK_BIDI_ERROR
;
257 if( rightToLeft
== TRUE
&&
258 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
259 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
261 status
= U_IDNA_CHECK_BIDI_ERROR
;
265 if(b1Len
<= destCapacity
){
266 u_memmove(dest
, b1
, b1Len
);
270 return u_terminateUChars(dest
, destCapacity
, b1Len
, &status
);
273 UBool
NamePrepTransform::isLabelSeparator(UChar32 ch
, UErrorCode
& status
){
274 // check error status
275 if(U_FAILURE(status
)){
279 return labelSeparatorSet
.contains(ch
);
282 #endif /* #if !UCONFIG_NO_IDNA */
283 #endif /* #if !UCONFIG_NO_TRANSLITERATION */