icuSources/common/punycode.h

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2002-2003, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  punycode.h
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2002jan31
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 /* This ICU code derived from: */
  18 /*
  19 punycode.c 0.4.0 (2001-Nov-17-Sat)
  20 http://www.cs.berkeley.edu/~amc/idn/
  21 Adam M. Costello
  22 http://www.nicemice.net/amc/
  23 */
  24
  25 #ifndef __PUNYCODE_H__
  26 #define __PUNYCODE_H__
  27
  28 #include "unicode/utypes.h"
  29
  30 #if !UCONFIG_NO_IDNA
  31
  32 /**
  33  * u_strToPunycode() converts Unicode to Punycode.
  34  *
  35  * The input string must not contain single, unpaired surrogates.
  36  * The output will be represented as an array of ASCII code points.
  37  *
  38  * The output string is NUL-terminated according to normal ICU
  39  * string output rules.
  40  *
  41  * @param src Input Unicode string.
  42  *            This function handles a limited amount of code points
  43  *            (the limit is >=64).
  44  *            U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
  45  * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  46  * @param dest Output Punycode array.
  47  * @param destCapacity Size of dest.
  48  * @param caseFlags Vector of boolean values, one per input UChar,
  49  *                  indicating that the corresponding character is to be
  50  *                  marked for the decoder optionally
  51  *                  uppercasing (TRUE) or lowercasing (FALSE)
  52  *                  the character.
  53  *                  ASCII characters are output directly in the case as marked.
  54  *                  Flags corresponding to trail surrogates are ignored.
  55  *                  If caseFlags==NULL then input characters are not
  56  *                  case-mapped.
  57  * @param pErrorCode ICU in/out error code parameter.
  58  *                   U_INVALID_CHAR_FOUND if src contains
  59  *                   unmatched single surrogates.
  60  *                   U_INDEX_OUTOFBOUNDS_ERROR if src contains
  61  *                   too many code points.
  62  * @return Number of ASCII characters in puny.
  63  *
  64  * @see u_strFromPunycode
  65  */
  66 U_CFUNC int32_t
  67 u_strToPunycode(const UChar *src, int32_t srcLength,
  68                 UChar *dest, int32_t destCapacity,
  69                 const UBool *caseFlags,
  70                 UErrorCode *pErrorCode);
  71
  72 /**
  73  * u_strFromPunycode() converts Punycode to Unicode.
  74  * The Unicode string will be at most as long (in UChars)
  75  * than the Punycode string (in chars).
  76  *
  77  * @param src Input Punycode string.
  78  * @param srcLength Length of puny, or -1 if NUL-terminated
  79  * @param dest Output Unicode string buffer.
  80  * @param destCapacity Size of dest in number of UChars,
  81  *                     and of caseFlags in numbers of UBools.
  82  * @param caseFlags Output array for case flags as
  83  *                  defined by the Punycode string.
  84  *                  The caller should uppercase (TRUE) or lowercase (FASLE)
  85  *                  the corresponding character in dest.
  86  *                  For supplementary characters, only the lead surrogate
  87  *                  is marked, and FALSE is stored for the trail surrogate.
  88  *                  This is redundant and not necessary for ASCII characters
  89  *                  because they are already in the case indicated.
  90  *                  Can be NULL if the case flags are not needed.
  91  * @param pErrorCode ICU in/out error code parameter.
  92  *                   U_INVALID_CHAR_FOUND if a non-ASCII character
  93  *                   precedes the last delimiter ('-'),
  94  *                   or if an invalid character (not a-zA-Z0-9) is found
  95  *                   after the last delimiter.
  96  *                   U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
  97  * @return Number of UChars written to dest.
  98  *
  99  * @see u_strToPunycode
 100  */
 101 U_CFUNC int32_t
 102 u_strFromPunycode(const UChar *src, int32_t srcLength,
 103                   UChar *dest, int32_t destCapacity,
 104                   UBool *caseFlags,
 105                   UErrorCode *pErrorCode);
 106
 107 #endif /* #if !UCONFIG_NO_IDNA */
 108
 109 #endif
 110
 111 /*
 112  * Hey, Emacs, please set the following:
 113  *
 114  * Local Variables:
 115  * indent-tabs-mode: nil
 116  * End:
 117  *
 118  */