1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ********************************************************************** 
   5 *   Copyright (C) 2002-2016, International Business Machines 
   6 *   Corporation and others.  All Rights Reserved. 
   7 ********************************************************************** 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2002jul01 
  14 *   created by: Markus W. Scherer 
  16 *   UTF-7 converter implementation. Used to be in ucnv_utf.c. 
  19 #include "unicode/utypes.h" 
  21 #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 
  24 #include "unicode/ucnv.h" 
  29 /* UTF-7 -------------------------------------------------------------------- */ 
  32  * UTF-7 is a stateful encoding of Unicode. 
  33  * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt) 
  34  * It was intended for use in Internet email systems, using in its bytewise 
  35  * encoding only a subset of 7-bit US-ASCII. 
  36  * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still 
  39  * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII 
  40  * characters directly or in base64. Especially, the characters in set O 
  41  * as defined in the RFC (see below) may be encoded directly but are not 
  42  * allowed in, e.g., email headers. 
  43  * By default, the ICU UTF-7 converter encodes set O directly. 
  44  * By choosing the option "version=1", set O will be escaped instead. 
  46  *     utf7Converter=ucnv_open("UTF-7,version=1"); 
  48  * For details about email headers see RFC 2047. 
  52  * Tests for US-ASCII characters belonging to character classes 
  55  * Set D (directly encoded characters) consists of the following 
  56  * characters: the upper and lower case letters A through Z 
  57  * and a through z, the 10 digits 0-9, and the following nine special 
  58  * characters (note that "+" and "=" are omitted): 
  61  * Set O (optional direct characters) consists of the following 
  62  * characters (note that "\" and "~" are omitted): 
  63  *     !"#$%&*;<=>@[]^_`{|} 
  65  * According to the rules in RFC 2152, the byte values for the following 
  66  * US-ASCII characters are not used in UTF-7 and are therefore illegal: 
  67  * - all C0 control codes except for CR LF TAB 
  71  * - all codes beyond US-ASCII, i.e. all >127 
  74     ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \ 
  75      (uint8_t)((c)-48)<10 ||    /* digits */ \ 
  76      (uint8_t)((c)-39)<3 ||     /* '() */ \ 
  77      (uint8_t)((c)-44)<4 ||     /* ,-./ */ \ 
  78      (c)==58 || (c)==63         /* :? */ \ 
  82     ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \ 
  83      (uint8_t)((c)-59)<4 ||         /* ;<=> */ \ 
  84      (uint8_t)((c)-93)<4 ||         /* ]^_` */ \ 
  85      (uint8_t)((c)-123)<3 ||        /* {|} */ \ 
  86      (c)==42 || (c)==64 || (c)==91  /* *@[ */ \ 
  89 #define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9) 
  90 #define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9) 
  97 /* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */ 
  98 #define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c)) 
 100 /* encode directly sets D and O and CR LF SP TAB */ 
 101 static const UBool encodeDirectlyMaximum
[128]={ 
 102  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */ 
 103     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 
 104     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 106     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 
 107     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 109     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 110     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 
 112     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 113     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 
 116 /* encode directly set D and CR LF SP TAB but not set O */ 
 117 static const UBool encodeDirectlyRestricted
[128]={ 
 118  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */ 
 119     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 
 120     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 122     1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 
 123     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 
 125     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 126     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
 128     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 129     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 
 135     65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 
 136     78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 
 138     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 
 139     110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 
 141     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 148     /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ 
 149     -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, 
 150     -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, 
 152     /* general punctuation with + and / and a special value (-2) for - */ 
 153     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, 
 155     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 
 158     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
 159     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, 
 162     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
 163     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 
 167  * converter status values: 
 170  *     24 inDirectMode (boolean) 
 171  * 23..16 base64Counter (-1..7) 
 172  * 15..0  bits (up to 14 bits incoming base64) 
 175  * 31..28 version (0: set O direct  1: set O escaped) 
 176  *     24 inDirectMode (boolean) 
 177  * 23..16 base64Counter (0..2) 
 178  *  7..0  bits (6 bits outgoing base64) 
 183 static void U_CALLCONV
 
 184 _UTF7Reset(UConverter 
*cnv
, UConverterResetChoice choice
) { 
 185     if(choice
<=UCNV_RESET_TO_UNICODE
) { 
 186         /* reset toUnicode */ 
 187         cnv
->toUnicodeStatus
=0x1000000; /* inDirectMode=TRUE */ 
 190     if(choice
!=UCNV_RESET_TO_UNICODE
) { 
 191         /* reset fromUnicode */ 
 192         cnv
->fromUnicodeStatus
=(cnv
->fromUnicodeStatus
&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ 
 196 static void U_CALLCONV
 
 197 _UTF7Open(UConverter 
*cnv
, 
 198           UConverterLoadArgs 
*pArgs
, 
 199           UErrorCode 
*pErrorCode
) { 
 201     if(UCNV_GET_VERSION(cnv
)<=1) { 
 202         /* TODO(markus): Should just use cnv->options rather than copying the version number. */ 
 203         cnv
->fromUnicodeStatus
=UCNV_GET_VERSION(cnv
)<<28; 
 204         _UTF7Reset(cnv
, UCNV_RESET_BOTH
); 
 206         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 210 static void U_CALLCONV
 
 211 _UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs 
*pArgs
, 
 212                           UErrorCode 
*pErrorCode
) { 
 214     const uint8_t *source
, *sourceLimit
; 
 216     const UChar 
*targetLimit
; 
 222     int32_t length
, targetCapacity
; 
 226     int8_t base64Counter
; 
 231     int32_t sourceIndex
, nextSourceIndex
; 
 234     /* set up the local pointers */ 
 235     cnv
=pArgs
->converter
; 
 237     source
=(const uint8_t *)pArgs
->source
; 
 238     sourceLimit
=(const uint8_t *)pArgs
->sourceLimit
; 
 239     target
=pArgs
->target
; 
 240     targetLimit
=pArgs
->targetLimit
; 
 241     offsets
=pArgs
->offsets
; 
 242     /* get the state machine state */ 
 244         uint32_t status
=cnv
->toUnicodeStatus
; 
 245         inDirectMode
=(UBool
)((status
>>24)&1); 
 246         base64Counter
=(int8_t)(status
>>16); 
 247         bits
=(uint16_t)status
; 
 250     byteIndex
=cnv
->toULength
; 
 252     /* sourceIndex=-1 if the current character began in the previous buffer */ 
 253     sourceIndex
=byteIndex
==0 ? 0 : -1; 
 259          * In Direct Mode, most US-ASCII characters are encoded directly, i.e., 
 260          * with their US-ASCII byte values. 
 261          * Backslash and Tilde and most control characters are not allowed in UTF-7. 
 262          * A plus sign starts Unicode (or "escape") Mode. 
 264          * In Direct Mode, only the sourceIndex is used. 
 267         length
=(int32_t)(sourceLimit
-source
); 
 268         targetCapacity
=(int32_t)(targetLimit
-target
); 
 269         if(length
>targetCapacity
) { 
 270             length
=targetCapacity
; 
 274             if(!isLegalUTF7(b
)) { 
 278                 *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 281                 /* write directly encoded character */ 
 284                     *offsets
++=sourceIndex
++; 
 287                 /* switch to Unicode mode */ 
 288                 nextSourceIndex
=++sourceIndex
; 
 297         if(source
<sourceLimit 
&& target
>=targetLimit
) { 
 299             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 304          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. 
 305          * The base64 sequence ends with any character that is not in the base64 alphabet. 
 306          * A terminating minus sign is consumed. 
 308          * In Unicode Mode, the sourceIndex has the index to the start of the current 
 309          * base64 bytes, while nextSourceIndex is precisely parallel to source, 
 310          * keeping the index to the following byte. 
 311          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. 
 313         while(source
<sourceLimit
) { 
 314             if(target
<targetLimit
) { 
 315                 bytes
[byteIndex
++]=b
=*source
++; 
 317                 base64Value 
= -3; /* initialize as illegal */ 
 318                 if(b
>=126 || (base64Value
=fromBase64
[b
])==-3 || base64Value
==-1) { 
 320                      * base64Value==-1 for any legal character except base64 and minus sign, or 
 321                      * base64Value==-3 for illegal characters: 
 322                      * 1. In either case, leave Unicode mode. 
 323                      * 2.1. If we ended with an incomplete UChar or none after the +, then 
 324                      *      generate an error for the preceding erroneous sequence and deal with 
 325                      *      the current (possibly illegal) character next time through. 
 326                      * 2.2. Else the current char comes after a complete UChar, which was already 
 327                      *      pushed to the output buf, so: 
 328                      * 2.2.1. If the current char is legal, just save it for processing next time. 
 329                      *        It may be for example, a plus which we need to deal with in direct mode. 
 330                      * 2.2.2. Else if the current char is illegal, we might as well deal with it here. 
 333                     if(base64Counter
==-1) { 
 334                         /* illegal: + immediately followed by something other than base64 or minus sign */ 
 335                         /* include the plus sign in the reported sequence, but not the subsequent char */ 
 339                         *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 342                         /* bits are illegally left over, a UChar is incomplete */ 
 343                         /* don't include current char (legal or illegal) in error seq */ 
 346                         *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 349                         /* previous UChar was complete */ 
 350                         if(base64Value
==-3) { 
 351                             /* current character is illegal, deal with it here */ 
 352                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 355                             /* un-read the current character in case it is a plus sign */ 
 357                             sourceIndex
=nextSourceIndex
-1; 
 361                 } else if(base64Value
>=0) { 
 362                     /* collect base64 bytes into UChars */ 
 363                     switch(base64Counter
) { 
 364                     case -1: /* -1 is immediately after the + */ 
 373                         bits
=(uint16_t)((bits
<<6)|base64Value
); 
 377                         *target
++=(UChar
)((bits
<<4)|(base64Value
>>2)); 
 379                             *offsets
++=sourceIndex
; 
 380                             sourceIndex
=nextSourceIndex
-1; 
 382                         bytes
[0]=b
; /* keep this byte in case an error occurs */ 
 384                         bits
=(uint16_t)(base64Value
&3); 
 388                         *target
++=(UChar
)((bits
<<2)|(base64Value
>>4)); 
 390                             *offsets
++=sourceIndex
; 
 391                             sourceIndex
=nextSourceIndex
-1; 
 393                         bytes
[0]=b
; /* keep this byte in case an error occurs */ 
 395                         bits
=(uint16_t)(base64Value
&15); 
 399                         *target
++=(UChar
)((bits
<<6)|base64Value
); 
 401                             *offsets
++=sourceIndex
; 
 402                             sourceIndex
=nextSourceIndex
; 
 409                         /* will never occur */ 
 412                 } else /*base64Value==-2*/ { 
 413                     /* minus sign terminates the base64 sequence */ 
 415                     if(base64Counter
==-1) { 
 416                         /* +- i.e. a minus immediately following a plus */ 
 419                             *offsets
++=sourceIndex
-1; 
 422                         /* absorb the minus and leave the Unicode Mode */ 
 424                             /* bits are illegally left over, a UChar is incomplete */ 
 425                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 429                     sourceIndex
=nextSourceIndex
; 
 434                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 440     if(U_SUCCESS(*pErrorCode
) && pArgs
->flush 
&& source
==sourceLimit 
&& bits
==0) { 
 442          * if we are in Unicode mode, then the byteIndex might not be 0, 
 443          * but that is ok if bits==0 
 444          * -> we set byteIndex=0 at the end of the stream to avoid a truncated error 
 445          * (not true for IMAP-mailbox-name where we must end in direct mode) 
 450     /* set the converter state back into UConverter */ 
 451     cnv
->toUnicodeStatus
=((uint32_t)inDirectMode
<<24)|((uint32_t)((uint8_t)base64Counter
)<<16)|(uint32_t)bits
; 
 452     cnv
->toULength
=byteIndex
; 
 454     /* write back the updated pointers */ 
 455     pArgs
->source
=(const char *)source
; 
 456     pArgs
->target
=target
; 
 457     pArgs
->offsets
=offsets
; 
 461 static void U_CALLCONV
 
 462 _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs 
*pArgs
, 
 463                             UErrorCode 
*pErrorCode
) { 
 465     const UChar 
*source
, *sourceLimit
; 
 466     uint8_t *target
, *targetLimit
; 
 469     int32_t length
, targetCapacity
, sourceIndex
; 
 473     const UBool 
*encodeDirectly
; 
 475     int8_t base64Counter
; 
 478     /* set up the local pointers */ 
 479     cnv
=pArgs
->converter
; 
 481     /* set up the local pointers */ 
 482     source
=pArgs
->source
; 
 483     sourceLimit
=pArgs
->sourceLimit
; 
 484     target
=(uint8_t *)pArgs
->target
; 
 485     targetLimit
=(uint8_t *)pArgs
->targetLimit
; 
 486     offsets
=pArgs
->offsets
; 
 488     /* get the state machine state */ 
 490         uint32_t status
=cnv
->fromUnicodeStatus
; 
 491         encodeDirectly
= status
<0x10000000 ? encodeDirectlyMaximum 
: encodeDirectlyRestricted
; 
 492         inDirectMode
=(UBool
)((status
>>24)&1); 
 493         base64Counter
=(int8_t)(status
>>16); 
 494         bits
=(uint8_t)status
; 
 495         U_ASSERT(bits
<=UPRV_LENGTHOF(toBase64
)); 
 498     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ 
 503         length
=(int32_t)(sourceLimit
-source
); 
 504         targetCapacity
=(int32_t)(targetLimit
-target
); 
 505         if(length
>targetCapacity
) { 
 506             length
=targetCapacity
; 
 510             /* currently always encode CR LF SP TAB directly */ 
 511             if(c
<=127 && encodeDirectly
[c
]) { 
 512                 /* encode directly */ 
 513                 *target
++=(uint8_t)c
; 
 515                     *offsets
++=sourceIndex
++; 
 518                 /* output +- for + */ 
 520                 if(target
<targetLimit
) { 
 523                         *offsets
++=sourceIndex
; 
 524                         *offsets
++=sourceIndex
++; 
 526                     /* realign length and targetCapacity */ 
 530                         *offsets
++=sourceIndex
++; 
 532                     cnv
->charErrorBuffer
[0]=MINUS
; 
 533                     cnv
->charErrorBufferLength
=1; 
 534                     *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 538                 /* un-read this character and switch to Unicode Mode */ 
 542                     *offsets
++=sourceIndex
; 
 550         if(source
<sourceLimit 
&& target
>=targetLimit
) { 
 552             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 556         while(source
<sourceLimit
) { 
 557             if(target
<targetLimit
) { 
 559                 if(c
<=127 && encodeDirectly
[c
]) { 
 560                     /* encode directly */ 
 563                     /* trick: back out this character to make this easier */ 
 566                     /* terminate the base64 sequence */ 
 567                     if(base64Counter
!=0) { 
 568                         /* write remaining bits for the previous character */ 
 569                         *target
++=toBase64
[bits
]; 
 571                             *offsets
++=sourceIndex
-1; 
 574                     if(fromBase64
[c
]!=-1) { 
 575                         /* need to terminate with a minus */ 
 576                         if(target
<targetLimit
) { 
 579                                 *offsets
++=sourceIndex
-1; 
 582                             cnv
->charErrorBuffer
[0]=MINUS
; 
 583                             cnv
->charErrorBufferLength
=1; 
 584                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 591                      * base64 this character: 
 592                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character 
 593                      * and the bits of this character, each implicitly in UTF-16BE. 
 595                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one 
 596                      * character to the next. The actual 2 or 4 bits are shifted to the left edge 
 597                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. 
 599                     switch(base64Counter
) { 
 601                         *target
++=toBase64
[c
>>10]; 
 602                         if(target
<targetLimit
) { 
 603                             *target
++=toBase64
[(c
>>4)&0x3f]; 
 605                                 *offsets
++=sourceIndex
; 
 606                                 *offsets
++=sourceIndex
++; 
 610                                 *offsets
++=sourceIndex
++; 
 612                             cnv
->charErrorBuffer
[0]=toBase64
[(c
>>4)&0x3f]; 
 613                             cnv
->charErrorBufferLength
=1; 
 614                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 616                         bits
=(uint8_t)((c
&15)<<2); 
 620                         *target
++=toBase64
[bits
|(c
>>14)]; 
 621                         if(target
<targetLimit
) { 
 622                             *target
++=toBase64
[(c
>>8)&0x3f]; 
 623                             if(target
<targetLimit
) { 
 624                                 *target
++=toBase64
[(c
>>2)&0x3f]; 
 626                                     *offsets
++=sourceIndex
; 
 627                                     *offsets
++=sourceIndex
; 
 628                                     *offsets
++=sourceIndex
++; 
 632                                     *offsets
++=sourceIndex
; 
 633                                     *offsets
++=sourceIndex
++; 
 635                                 cnv
->charErrorBuffer
[0]=toBase64
[(c
>>2)&0x3f]; 
 636                                 cnv
->charErrorBufferLength
=1; 
 637                                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 641                                 *offsets
++=sourceIndex
++; 
 643                             cnv
->charErrorBuffer
[0]=toBase64
[(c
>>8)&0x3f]; 
 644                             cnv
->charErrorBuffer
[1]=toBase64
[(c
>>2)&0x3f]; 
 645                             cnv
->charErrorBufferLength
=2; 
 646                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 648                         bits
=(uint8_t)((c
&3)<<4); 
 652                         *target
++=toBase64
[bits
|(c
>>12)]; 
 653                         if(target
<targetLimit
) { 
 654                             *target
++=toBase64
[(c
>>6)&0x3f]; 
 655                             if(target
<targetLimit
) { 
 656                                 *target
++=toBase64
[c
&0x3f]; 
 658                                     *offsets
++=sourceIndex
; 
 659                                     *offsets
++=sourceIndex
; 
 660                                     *offsets
++=sourceIndex
++; 
 664                                     *offsets
++=sourceIndex
; 
 665                                     *offsets
++=sourceIndex
++; 
 667                                 cnv
->charErrorBuffer
[0]=toBase64
[c
&0x3f]; 
 668                                 cnv
->charErrorBufferLength
=1; 
 669                                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 673                                 *offsets
++=sourceIndex
++; 
 675                             cnv
->charErrorBuffer
[0]=toBase64
[(c
>>6)&0x3f]; 
 676                             cnv
->charErrorBuffer
[1]=toBase64
[c
&0x3f]; 
 677                             cnv
->charErrorBufferLength
=2; 
 678                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 684                         /* will never occur */ 
 690                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 696     if(pArgs
->flush 
&& source
>=sourceLimit
) { 
 697         /* flush remaining bits to the target */ 
 699             if (base64Counter
!=0) { 
 700                 if(target
<targetLimit
) { 
 701                     *target
++=toBase64
[bits
]; 
 703                         *offsets
++=sourceIndex
-1; 
 706                     cnv
->charErrorBuffer
[cnv
->charErrorBufferLength
++]=toBase64
[bits
]; 
 707                     *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 710             /* Add final MINUS to terminate unicodeMode */ 
 711             if(target
<targetLimit
) { 
 714                     *offsets
++=sourceIndex
-1; 
 717                 cnv
->charErrorBuffer
[cnv
->charErrorBufferLength
++]=MINUS
; 
 718                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 721         /* reset the state for the next conversion */ 
 722         cnv
->fromUnicodeStatus
=(cnv
->fromUnicodeStatus
&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ 
 724         /* set the converter state back into UConverter */ 
 725         cnv
->fromUnicodeStatus
= 
 726             (cnv
->fromUnicodeStatus
&0xf0000000)|    /* keep version*/ 
 727             ((uint32_t)inDirectMode
<<24)|((uint32_t)base64Counter
<<16)|(uint32_t)bits
; 
 730     /* write back the updated pointers */ 
 731     pArgs
->source
=source
; 
 732     pArgs
->target
=(char *)target
; 
 733     pArgs
->offsets
=offsets
; 
 737 static const char * U_CALLCONV
 
 738 _UTF7GetName(const UConverter 
*cnv
) { 
 739     switch(cnv
->fromUnicodeStatus
>>28) { 
 741         return "UTF-7,version=1"; 
 748 static const UConverterImpl _UTF7Impl
={ 
 758     _UTF7ToUnicodeWithOffsets
, 
 759     _UTF7ToUnicodeWithOffsets
, 
 760     _UTF7FromUnicodeWithOffsets
, 
 761     _UTF7FromUnicodeWithOffsets
, 
 766     NULL
, /* we don't need writeSub() because we never call a callback at fromUnicode() */ 
 768     ucnv_getCompleteUnicodeSet
, 
 774 static const UConverterStaticData _UTF7StaticData
={ 
 775     sizeof(UConverterStaticData
), 
 777     0, /* TODO CCSID for UTF-7 */ 
 780     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ 
 784     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
 787 const UConverterSharedData _UTF7Data
= 
 788         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData
, &_UTF7Impl
); 
 790 /* IMAP mailbox name encoding ----------------------------------------------- */ 
 793  * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 
 794  * http://www.ietf.org/rfc/rfc2060.txt 
 796  * 5.1.3.  Mailbox International Naming Convention 
 798  * By convention, international mailbox names are specified using a 
 799  * modified version of the UTF-7 encoding described in [UTF-7].  The 
 800  * purpose of these modifications is to correct the following problems 
 803  *    1) UTF-7 uses the "+" character for shifting; this conflicts with 
 804  *       the common use of "+" in mailbox names, in particular USENET 
 807  *    2) UTF-7's encoding is BASE64 which uses the "/" character; this 
 808  *       conflicts with the use of "/" as a popular hierarchy delimiter. 
 810  *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with 
 811  *       the use of "\" as a popular hierarchy delimiter. 
 813  *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with 
 814  *       the use of "~" in some servers as a home directory indicator. 
 816  *    5) UTF-7 permits multiple alternate forms to represent the same 
 817  *       string; in particular, printable US-ASCII chararacters can be 
 818  *       represented in encoded form. 
 820  * In modified UTF-7, printable US-ASCII characters except for "&" 
 821  * represent themselves; that is, characters with octet values 0x20-0x25 
 822  * and 0x27-0x7e.  The character "&" (0x26) is represented by the two- 
 823  * octet sequence "&-". 
 825  * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all 
 826  * Unicode 16-bit octets) are represented in modified BASE64, with a 
 827  * further modification from [UTF-7] that "," is used instead of "/". 
 828  * Modified BASE64 MUST NOT be used to represent any printing US-ASCII 
 829  * character which can represent itself. 
 831  * "&" is used to shift to modified BASE64 and "-" to shift back to US- 
 832  * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that 
 833  * is, a name that ends with a Unicode 16-bit octet MUST end with a "- 
 836  * For example, here is a mailbox name which mixes English, Japanese, 
 837  * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- 
 841  * Tests for US-ASCII characters belonging to character classes 
 844  * Set D (directly encoded characters) consists of the following 
 845  * characters: the upper and lower case letters A through Z 
 846  * and a through z, the 10 digits 0-9, and the following nine special 
 847  * characters (note that "+" and "=" are omitted): 
 850  * Set O (optional direct characters) consists of the following 
 851  * characters (note that "\" and "~" are omitted): 
 852  *     !"#$%&*;<=>@[]^_`{|} 
 854  * According to the rules in RFC 2152, the byte values for the following 
 855  * US-ASCII characters are not used in UTF-7 and are therefore illegal: 
 856  * - all C0 control codes except for CR LF TAB 
 860  * - all codes beyond US-ASCII, i.e. all >127 
 863 /* uses '&' not '+' to start a base64 sequence */ 
 864 #define AMPERSAND 0x26 
 868 /* legal byte values: all US-ASCII graphic characters 0x20..0x7e */ 
 869 #define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e) 
 871 /* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */ 
 872 #define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND) 
 874 #define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA) 
 875 #define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c]) 
 878  * converter status values: 
 881  *     24 inDirectMode (boolean) 
 882  * 23..16 base64Counter (-1..7) 
 883  * 15..0  bits (up to 14 bits incoming base64) 
 886  *     24 inDirectMode (boolean) 
 887  * 23..16 base64Counter (0..2) 
 888  *  7..0  bits (6 bits outgoing base64) 
 894 static void U_CALLCONV
 
 895 _IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs 
*pArgs
, 
 896                           UErrorCode 
*pErrorCode
) { 
 898     const uint8_t *source
, *sourceLimit
; 
 900     const UChar 
*targetLimit
; 
 906     int32_t length
, targetCapacity
; 
 910     int8_t base64Counter
; 
 915     int32_t sourceIndex
, nextSourceIndex
; 
 920     /* set up the local pointers */ 
 921     cnv
=pArgs
->converter
; 
 923     source
=(const uint8_t *)pArgs
->source
; 
 924     sourceLimit
=(const uint8_t *)pArgs
->sourceLimit
; 
 925     target
=pArgs
->target
; 
 926     targetLimit
=pArgs
->targetLimit
; 
 927     offsets
=pArgs
->offsets
; 
 928     /* get the state machine state */ 
 930         uint32_t status
=cnv
->toUnicodeStatus
; 
 931         inDirectMode
=(UBool
)((status
>>24)&1); 
 932         base64Counter
=(int8_t)(status
>>16); 
 933         bits
=(uint16_t)status
; 
 936     byteIndex
=cnv
->toULength
; 
 938     /* sourceIndex=-1 if the current character began in the previous buffer */ 
 939     sourceIndex
=byteIndex
==0 ? 0 : -1; 
 945          * In Direct Mode, US-ASCII characters are encoded directly, i.e., 
 946          * with their US-ASCII byte values. 
 947          * An ampersand starts Unicode (or "escape") Mode. 
 949          * In Direct Mode, only the sourceIndex is used. 
 952         length
=(int32_t)(sourceLimit
-source
); 
 953         targetCapacity
=(int32_t)(targetLimit
-target
); 
 954         if(length
>targetCapacity
) { 
 955             length
=targetCapacity
; 
 959             if(!isLegalIMAP(b
)) { 
 963                 *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 965             } else if(b
!=AMPERSAND
) { 
 966                 /* write directly encoded character */ 
 969                     *offsets
++=sourceIndex
++; 
 971             } else /* AMPERSAND */ { 
 972                 /* switch to Unicode mode */ 
 973                 nextSourceIndex
=++sourceIndex
; 
 982         if(source
<sourceLimit 
&& target
>=targetLimit
) { 
 984             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 989          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. 
 990          * The base64 sequence ends with any character that is not in the base64 alphabet. 
 991          * A terminating minus sign is consumed. 
 992          * US-ASCII must not be base64-ed. 
 994          * In Unicode Mode, the sourceIndex has the index to the start of the current 
 995          * base64 bytes, while nextSourceIndex is precisely parallel to source, 
 996          * keeping the index to the following byte. 
 997          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. 
 999         while(source
<sourceLimit
) { 
1000             if(target
<targetLimit
) { 
1001                 bytes
[byteIndex
++]=b
=*source
++; 
1004                     /* illegal - test other illegal US-ASCII values by base64Value==-3 */ 
1006                     *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1008                 } else if((base64Value
=FROM_BASE64_IMAP(b
))>=0) { 
1009                     /* collect base64 bytes into UChars */ 
1010                     switch(base64Counter
) { 
1011                     case -1: /* -1 is immediately after the & */ 
1020                         bits
=(uint16_t)((bits
<<6)|base64Value
); 
1024                         c
=(UChar
)((bits
<<4)|(base64Value
>>2)); 
1025                         if(isLegalIMAP(c
)) { 
1028                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1033                             *offsets
++=sourceIndex
; 
1034                             sourceIndex
=nextSourceIndex
-1; 
1036                         bytes
[0]=b
; /* keep this byte in case an error occurs */ 
1038                         bits
=(uint16_t)(base64Value
&3); 
1042                         c
=(UChar
)((bits
<<2)|(base64Value
>>4)); 
1043                         if(isLegalIMAP(c
)) { 
1046                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1051                             *offsets
++=sourceIndex
; 
1052                             sourceIndex
=nextSourceIndex
-1; 
1054                         bytes
[0]=b
; /* keep this byte in case an error occurs */ 
1056                         bits
=(uint16_t)(base64Value
&15); 
1060                         c
=(UChar
)((bits
<<6)|base64Value
); 
1061                         if(isLegalIMAP(c
)) { 
1064                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1069                             *offsets
++=sourceIndex
; 
1070                             sourceIndex
=nextSourceIndex
; 
1077                         /* will never occur */ 
1080                 } else if(base64Value
==-2) { 
1081                     /* minus sign terminates the base64 sequence */ 
1083                     if(base64Counter
==-1) { 
1084                         /* &- i.e. a minus immediately following an ampersand */ 
1085                         *target
++=AMPERSAND
; 
1087                             *offsets
++=sourceIndex
-1; 
1090                         /* absorb the minus and leave the Unicode Mode */ 
1091                         if(bits
!=0 || (base64Counter
!=0 && base64Counter
!=3 && base64Counter
!=6)) { 
1092                             /* bits are illegally left over, a UChar is incomplete */ 
1093                             /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */ 
1094                             *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1098                     sourceIndex
=nextSourceIndex
; 
1101                     if(base64Counter
==-1) { 
1102                         /* illegal: & immediately followed by something other than base64 or minus sign */ 
1103                         /* include the ampersand in the reported sequence */ 
1109                     /* base64Value==-1 for characters that are illegal only in Unicode mode */ 
1110                     /* base64Value==-3 for illegal characters */ 
1113                     *pErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
1117                 /* target is full */ 
1118                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1126      * the end of the input stream and detection of truncated input 
1127      * are handled by the framework, but here we must check if we are in Unicode 
1128      * mode and byteIndex==0 because we must end in direct mode 
1132      *   in Unicode mode and byteIndex==0 
1133      *   end of input and no truncated input 
1135     if( U_SUCCESS(*pErrorCode
) && 
1136         !inDirectMode 
&& byteIndex
==0 && 
1137         pArgs
->flush 
&& source
>=sourceLimit
 
1139         if(base64Counter
==-1) { 
1140             /* & at the very end of the input */ 
1141             /* make the ampersand the reported sequence */ 
1145         /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */ 
1147         inDirectMode
=TRUE
; /* avoid looping */ 
1148         *pErrorCode
=U_TRUNCATED_CHAR_FOUND
; 
1151     /* set the converter state back into UConverter */ 
1152     cnv
->toUnicodeStatus
=((uint32_t)inDirectMode
<<24)|((uint32_t)((uint8_t)base64Counter
)<<16)|(uint32_t)bits
; 
1153     cnv
->toULength
=byteIndex
; 
1155     /* write back the updated pointers */ 
1156     pArgs
->source
=(const char *)source
; 
1157     pArgs
->target
=target
; 
1158     pArgs
->offsets
=offsets
; 
1162 static void U_CALLCONV
 
1163 _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs 
*pArgs
, 
1164                             UErrorCode 
*pErrorCode
) { 
1166     const UChar 
*source
, *sourceLimit
; 
1167     uint8_t *target
, *targetLimit
; 
1170     int32_t length
, targetCapacity
, sourceIndex
; 
1176     int8_t base64Counter
; 
1179     /* set up the local pointers */ 
1180     cnv
=pArgs
->converter
; 
1182     /* set up the local pointers */ 
1183     source
=pArgs
->source
; 
1184     sourceLimit
=pArgs
->sourceLimit
; 
1185     target
=(uint8_t *)pArgs
->target
; 
1186     targetLimit
=(uint8_t *)pArgs
->targetLimit
; 
1187     offsets
=pArgs
->offsets
; 
1189     /* get the state machine state */ 
1191         uint32_t status
=cnv
->fromUnicodeStatus
; 
1192         inDirectMode
=(UBool
)((status
>>24)&1); 
1193         base64Counter
=(int8_t)(status
>>16); 
1194         bits
=(uint8_t)status
; 
1197     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ 
1202         length
=(int32_t)(sourceLimit
-source
); 
1203         targetCapacity
=(int32_t)(targetLimit
-target
); 
1204         if(length
>targetCapacity
) { 
1205             length
=targetCapacity
; 
1209             /* encode 0x20..0x7e except '&' directly */ 
1211                 /* encode directly */ 
1212                 *target
++=(uint8_t)c
; 
1214                     *offsets
++=sourceIndex
++; 
1216             } else if(c
==AMPERSAND
) { 
1217                 /* output &- for & */ 
1218                 *target
++=AMPERSAND
; 
1219                 if(target
<targetLimit
) { 
1222                         *offsets
++=sourceIndex
; 
1223                         *offsets
++=sourceIndex
++; 
1225                     /* realign length and targetCapacity */ 
1229                         *offsets
++=sourceIndex
++; 
1231                     cnv
->charErrorBuffer
[0]=MINUS
; 
1232                     cnv
->charErrorBufferLength
=1; 
1233                     *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1237                 /* un-read this character and switch to Unicode Mode */ 
1239                 *target
++=AMPERSAND
; 
1241                     *offsets
++=sourceIndex
; 
1249         if(source
<sourceLimit 
&& target
>=targetLimit
) { 
1250             /* target is full */ 
1251             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1255         while(source
<sourceLimit
) { 
1256             if(target
<targetLimit
) { 
1258                 if(isLegalIMAP(c
)) { 
1259                     /* encode directly */ 
1262                     /* trick: back out this character to make this easier */ 
1265                     /* terminate the base64 sequence */ 
1266                     if(base64Counter
!=0) { 
1267                         /* write remaining bits for the previous character */ 
1268                         *target
++=TO_BASE64_IMAP(bits
); 
1270                             *offsets
++=sourceIndex
-1; 
1273                     /* need to terminate with a minus */ 
1274                     if(target
<targetLimit
) { 
1277                             *offsets
++=sourceIndex
-1; 
1280                         cnv
->charErrorBuffer
[0]=MINUS
; 
1281                         cnv
->charErrorBufferLength
=1; 
1282                         *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1288                      * base64 this character: 
1289                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character 
1290                      * and the bits of this character, each implicitly in UTF-16BE. 
1292                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one 
1293                      * character to the next. The actual 2 or 4 bits are shifted to the left edge 
1294                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. 
1296                     switch(base64Counter
) { 
1299                         *target
++=TO_BASE64_IMAP(b
); 
1300                         if(target
<targetLimit
) { 
1301                             b
=(uint8_t)((c
>>4)&0x3f); 
1302                             *target
++=TO_BASE64_IMAP(b
); 
1304                                 *offsets
++=sourceIndex
; 
1305                                 *offsets
++=sourceIndex
++; 
1309                                 *offsets
++=sourceIndex
++; 
1311                             b
=(uint8_t)((c
>>4)&0x3f); 
1312                             cnv
->charErrorBuffer
[0]=TO_BASE64_IMAP(b
); 
1313                             cnv
->charErrorBufferLength
=1; 
1314                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1316                         bits
=(uint8_t)((c
&15)<<2); 
1320                         b
=(uint8_t)(bits
|(c
>>14)); 
1321                         *target
++=TO_BASE64_IMAP(b
); 
1322                         if(target
<targetLimit
) { 
1323                             b
=(uint8_t)((c
>>8)&0x3f); 
1324                             *target
++=TO_BASE64_IMAP(b
); 
1325                             if(target
<targetLimit
) { 
1326                                 b
=(uint8_t)((c
>>2)&0x3f); 
1327                                 *target
++=TO_BASE64_IMAP(b
); 
1329                                     *offsets
++=sourceIndex
; 
1330                                     *offsets
++=sourceIndex
; 
1331                                     *offsets
++=sourceIndex
++; 
1335                                     *offsets
++=sourceIndex
; 
1336                                     *offsets
++=sourceIndex
++; 
1338                                 b
=(uint8_t)((c
>>2)&0x3f); 
1339                                 cnv
->charErrorBuffer
[0]=TO_BASE64_IMAP(b
); 
1340                                 cnv
->charErrorBufferLength
=1; 
1341                                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1345                                 *offsets
++=sourceIndex
++; 
1347                             b
=(uint8_t)((c
>>8)&0x3f); 
1348                             cnv
->charErrorBuffer
[0]=TO_BASE64_IMAP(b
); 
1349                             b
=(uint8_t)((c
>>2)&0x3f); 
1350                             cnv
->charErrorBuffer
[1]=TO_BASE64_IMAP(b
); 
1351                             cnv
->charErrorBufferLength
=2; 
1352                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1354                         bits
=(uint8_t)((c
&3)<<4); 
1358                         b
=(uint8_t)(bits
|(c
>>12)); 
1359                         *target
++=TO_BASE64_IMAP(b
); 
1360                         if(target
<targetLimit
) { 
1361                             b
=(uint8_t)((c
>>6)&0x3f); 
1362                             *target
++=TO_BASE64_IMAP(b
); 
1363                             if(target
<targetLimit
) { 
1364                                 b
=(uint8_t)(c
&0x3f); 
1365                                 *target
++=TO_BASE64_IMAP(b
); 
1367                                     *offsets
++=sourceIndex
; 
1368                                     *offsets
++=sourceIndex
; 
1369                                     *offsets
++=sourceIndex
++; 
1373                                     *offsets
++=sourceIndex
; 
1374                                     *offsets
++=sourceIndex
++; 
1376                                 b
=(uint8_t)(c
&0x3f); 
1377                                 cnv
->charErrorBuffer
[0]=TO_BASE64_IMAP(b
); 
1378                                 cnv
->charErrorBufferLength
=1; 
1379                                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1383                                 *offsets
++=sourceIndex
++; 
1385                             b
=(uint8_t)((c
>>6)&0x3f); 
1386                             cnv
->charErrorBuffer
[0]=TO_BASE64_IMAP(b
); 
1387                             b
=(uint8_t)(c
&0x3f); 
1388                             cnv
->charErrorBuffer
[1]=TO_BASE64_IMAP(b
); 
1389                             cnv
->charErrorBufferLength
=2; 
1390                             *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1396                         /* will never occur */ 
1401                 /* target is full */ 
1402                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1408     if(pArgs
->flush 
&& source
>=sourceLimit
) { 
1409         /* flush remaining bits to the target */ 
1411             if(base64Counter
!=0) { 
1412                 if(target
<targetLimit
) { 
1413                     *target
++=TO_BASE64_IMAP(bits
); 
1415                         *offsets
++=sourceIndex
-1; 
1418                     cnv
->charErrorBuffer
[cnv
->charErrorBufferLength
++]=TO_BASE64_IMAP(bits
); 
1419                     *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1422             /* need to terminate with a minus */ 
1423             if(target
<targetLimit
) { 
1426                     *offsets
++=sourceIndex
-1; 
1429                 cnv
->charErrorBuffer
[cnv
->charErrorBufferLength
++]=MINUS
; 
1430                 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
1433         /* reset the state for the next conversion */ 
1434         cnv
->fromUnicodeStatus
=(cnv
->fromUnicodeStatus
&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ 
1436         /* set the converter state back into UConverter */ 
1437         cnv
->fromUnicodeStatus
= 
1438             (cnv
->fromUnicodeStatus
&0xf0000000)|    /* keep version*/ 
1439             ((uint32_t)inDirectMode
<<24)|((uint32_t)base64Counter
<<16)|(uint32_t)bits
; 
1442     /* write back the updated pointers */ 
1443     pArgs
->source
=source
; 
1444     pArgs
->target
=(char *)target
; 
1445     pArgs
->offsets
=offsets
; 
1450 static const UConverterImpl _IMAPImpl
={ 
1460     _IMAPToUnicodeWithOffsets
, 
1461     _IMAPToUnicodeWithOffsets
, 
1462     _IMAPFromUnicodeWithOffsets
, 
1463     _IMAPFromUnicodeWithOffsets
, 
1468     NULL
, /* we don't need writeSub() because we never call a callback at fromUnicode() */ 
1470     ucnv_getCompleteUnicodeSet
, 
1475 static const UConverterStaticData _IMAPStaticData
={ 
1476     sizeof(UConverterStaticData
), 
1477     "IMAP-mailbox-name", 
1478     0, /* TODO CCSID for IMAP-mailbox-name */ 
1479     UCNV_IBM
, UCNV_IMAP_MAILBOX
, 
1481     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ 
1485     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
1488 const UConverterSharedData _IMAPData
= 
1489         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData
, &_IMAPImpl
);