ICU-57166.0.1.tar.gz

[apple/icu.git] / icuSources / common / unicode / uloc.h
diff --git a/icuSources/common/unicode/uloc.h b/icuSources/common/unicode/uloc.h

index 2912f6770ef73f8d8317854e6233d085f20d47a9..a96b1cd8096ff4ada5e1d11d89878010b3e63784 100644 (file)
--- a/icuSources/common/unicode/uloc.h
+++ b/icuSources/common/unicode/uloc.h
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1997-2007, International Business Machines
+*   Copyright (C) 1997-2016, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *
@@ -256,10 +256,10 @@
  #define ULOC_COUNTRY_CAPACITY 4
  /**
   * Useful constant for the maximum size of the whole locale ID
- * (including the terminating NULL).
+ * (including the terminating NULL and all keywords).
   * @stable ICU 2.0
   */
-#define ULOC_FULLNAME_CAPACITY 56
+#define ULOC_FULLNAME_CAPACITY 157
  
  /**
   * Useful constant for the maximum size of the script part of a locale ID
@@ -272,31 +272,53 @@
   * Useful constant for the maximum size of keywords in a locale
   * @stable ICU 2.8
   */
-#define ULOC_KEYWORDS_CAPACITY 50
+#define ULOC_KEYWORDS_CAPACITY 96
  
  /**
- * Useful constant for the maximum SIZE of keywords in a locale
+ * Useful constant for the maximum total size of keywords and their values in a locale
   * @stable ICU 2.8
   */
  #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
  
  /**
- * Character separating keywords from the locale string
- * different for EBCDIC - TODO
+ * Invariant character separating keywords from the locale string
   * @stable ICU 2.8
   */
  #define ULOC_KEYWORD_SEPARATOR '@'
+
+/**
+  * Unicode code point for '@' separating keywords from the locale string.
+  * @see ULOC_KEYWORD_SEPARATOR
+  * @stable ICU 4.6
+  */
+#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
+
  /**
- * Character for assigning value to a keyword
+ * Invariant character for assigning value to a keyword
   * @stable ICU 2.8
   */
  #define ULOC_KEYWORD_ASSIGN '='
+
+/**
+  * Unicode code point for '=' for assigning value to a keyword.
+  * @see ULOC_KEYWORD_ASSIGN
+  * @stable ICU 4.6 
+  */
+#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
+
  /**
- * Character separating keywords
+ * Invariant character separating keywords
   * @stable ICU 2.8
   */
  #define ULOC_KEYWORD_ITEM_SEPARATOR ';'
  
+/**
+  * Unicode code point for ';' separating keywords
+  * @see ULOC_KEYWORD_ITEM_SEPARATOR
+  * @stable ICU 4.6
+  */
+#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
+
  /**
   * Constants for *_getLocale()
   * Allow user to select whether she wants information on 
@@ -331,7 +353,7 @@ typedef enum {
    ULOC_DATA_LOCALE_TYPE_LIMIT = 3
  } ULocDataLocaleType ;
  
-
+#ifndef U_HIDE_SYSTEM_API
  /**
   * Gets ICU's default locale.  
   * The returned string is a snapshot in time, and will remain valid
@@ -366,6 +388,7 @@ uloc_getDefault(void);
  U_STABLE void U_EXPORT2
  uloc_setDefault(const char* localeID,
          UErrorCode*       status);
+#endif  /* U_HIDE_SYSTEM_API */
  
  /**
   * Gets the language code for the specified locale.
@@ -565,6 +588,8 @@ uloc_getDisplayScript(const char* locale,
  
  /**
   * Gets the country name suitable for display for the specified locale.
+ * Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR").
+ * To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.
   *
   * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
   * @param displayLocale Specifies the locale to be used to display the name.  In other words,
@@ -730,7 +755,9 @@ U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
  
  /**
   *
- * Gets a list of all available language codes defined in ISO 639.  This is a pointer
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
   * to an array of pointers to arrays of char.  All of these pointers are owned
   * by ICU-- do not delete them, and do not write through them.  The array is
   * terminated with a null pointer.
@@ -742,10 +769,11 @@ uloc_getISOLanguages(void);
  
  /**
   *
- * Gets a list of all available 2-letter country codes defined in ISO 639.  This is a
- * pointer to an array of pointers to arrays of char.  All of these pointers are
- * owned by ICU-- do not delete them, and do not write through them.  The array is
- * terminated with a null pointer.
+ * Gets a list of all available 2-letter country codes which are valid regular
+ * region codes in CLDR; these are based on the non-deprecated alpha-2 region
+ * codes in ISO 3166-1. The return value is a pointer to an array of pointers
+ * C strings.  All of these pointers are owned by ICU; do not delete them, and
+ * do not write through them.  The array is terminated with a null pointer.
   * @return a list of all available country codes
   * @stable ICU 2.0
   */
@@ -775,12 +803,15 @@ uloc_getParent(const char*    localeID,
  
  
  /**
- * Gets the full name for the specified locale.
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
   * Note: This has the effect of 'canonicalizing' the string to
   * a certain extent. Upper and lower case are set as needed,
   * and if the components were in 'POSIX' format they are changed to
   * ICU format.  It does NOT map aliased names in any way.
   * See the top of this header file.
+ *
   * This API strips off the keyword part, so "de_DE\@collation=phonebook" 
   * will become "de_DE". 
   * This API supports preflighting.
@@ -831,13 +862,16 @@ uloc_getKeywordValue(const char* localeID,
  
  
  /**
- * Set the value of the specified keyword.
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
   * NOTE: Unlike almost every other ICU function which takes a
   * buffer, this function will NOT truncate the output text. If a
   * BUFFER_OVERFLOW_ERROR is received, it means that the original
   * buffer is untouched. This is done to prevent incorrect or possibly
   * even malformed locales from being generated and used.
- * 
+ *
   * @param keywordName name of the keyword to be set. Case insensitive.
   * @param keywordValue value of the keyword to be set. If 0-length or
   *  NULL, will result in the keyword being removed. No error is given if 
@@ -855,6 +889,60 @@ uloc_setKeywordValue(const char* keywordName,
                       char* buffer, int32_t bufferCapacity,
                       UErrorCode* status);
  
+/**
+ * Returns whether the locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then FALSE is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns TRUE for "ar" and "en-Hebr", FALSE for "zh" and "fa-Cyrl".
+ *
+ * @param locale input locale ID
+ * @return TRUE if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+U_STABLE UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale);
+
+/**
+ * enums for the  return value for the character and line orientation
+ * functions.
+ * @stable ICU 4.0
+ */
+typedef enum {
+  ULOC_LAYOUT_LTR   = 0,  /* left-to-right. */
+  ULOC_LAYOUT_RTL    = 1,  /* right-to-left. */
+  ULOC_LAYOUT_TTB    = 2,  /* top-to-bottom. */
+  ULOC_LAYOUT_BTT    = 3,   /* bottom-to-top. */
+  ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ * 
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @stable ICU 4.0
+ */
+U_STABLE ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+                             UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ * 
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @stable ICU 4.0
+ */
+U_STABLE ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+                        UErrorCode *status);
+
  /**
   * enums for the 'outResult' parameter return value
   * @see uloc_acceptLanguageFromHTTP
@@ -920,12 +1008,247 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
   * @param status an error is returned if the LCID is unrecognized or the output buffer
   *  is too small
   * @return actual the actual size of the locale ID, not including NUL-termination 
- * @draft ICU 3.8
+ * @stable ICU 3.8
   */
-U_DRAFT int32_t U_EXPORT2
-uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
+U_STABLE int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
                      UErrorCode *status);
  
-#endif /*_ULOC*/
  
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer.  For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale.  If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_addLikelySubtags(const char*    localeID,
+         char* maximizedLocaleID,
+         int32_t maximizedLocaleIDCapacity,
+         UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer.  Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale.  If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_minimizeSubtags(const char*    localeID,
+         char* minimizedLocaleID,
+         int32_t minimizedLocaleIDCapacity,
+         UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p> 
+ * This implements the 'Language-Tag' production of BCP47, and so
+ * supports grandfathered (regular and irregular) as well as private
+ * use language tags.  Private use tags are represented as 'x-whatever',
+ * and grandfathered tags are converted to their canonical replacements
+ * where they exist.  Note that a few grandfathered tags have no modern
+ * replacement, these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ * @param langtag   the input BCP47 language tag.
+ * @param localeID  the output buffer receiving a locale ID for the
+ *                  specified BCP47 language tag.
+ * @param localeIDCapacity  the size of the locale ID output buffer.
+ * @param parsedLength  if not NULL, successfully parsed length
+ *                      for the input language tag is set.
+ * @param err       error information if receiving the locald ID
+ *                  failed.
+ * @return          the length of the locale ID.
+ * @stable ICU 4.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+                    char* localeID,
+                    int32_t localeIDCapacity,
+                    int32_t* parsedLength,
+                    UErrorCode* err);
  
+/**
+ * Returns a well-formed language tag for this locale ID. 
+ * <p> 
+ * <b>Note</b>: When <code>strict</code> is FALSE, any locale
+ * fields which do not satisfy the BCP47 syntax requirement will
+ * be omitted from the result.  When <code>strict</code> is
+ * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
+ * <code>err</code> if any locale fields do not satisfy the
+ * BCP47 syntax requirement.
+ * @param localeID  the input locale ID
+ * @param langtag   the output buffer receiving BCP47 language
+ *                  tag for the locale ID.
+ * @param langtagCapacity   the size of the BCP47 language tag
+ *                          output buffer.
+ * @param strict    boolean value indicating if the function returns
+ *                  an error for an ill-formed input locale ID.
+ * @param err       error information if receiving the language
+ *                  tag failed.
+ * @return          The length of the BCP47 language tag.
+ * @stable ICU 4.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+                   char* langtag,
+                   int32_t langtagCapacity,
+                   UBool strict,
+                   UErrorCode* err);
+
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ * 
+ * @param keyword       the input locale keyword (either legacy key
+ *                      such as "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @return              the well-formed BCP 47 Unicode locale extension key,
+ *                      or NULL if the specified locale keyword cannot be
+ *                      mapped to a well-formed BCP 47 Unicode locale extension
+ *                      key. 
+ * @see uloc_toLegacyKey
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax,  then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ * 
+ * @param keyword       the locale keyword (either legacy key such as
+ *                      "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @param value         the locale keyword value (either legacy type
+ *                      such as "phonebook" or BCP 47 Unicode locale extension
+ *                      type such as "phonebk").
+ * @return              the well-formed BCP47 Unicode locale extension type,
+ *                      or NULL if the locale keyword value cannot be mapped to
+ *                      a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ * 
+ * @param keyword       the input locale keyword (either BCP 47 Unicode locale
+ *                      extension key or legacy key).
+ * @return              the well-formed legacy key, or NULL if the specified
+ *                      keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword       the locale keyword (either legacy keyword such as
+ *                      "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @param value         the locale keyword value (either BCP 47 Unicode locale
+ *                      extension type such as "phonebk" or legacy keyword value
+ *                      such as "phonebook").
+ * @return              the well-formed legacy type, or NULL if the specified
+ *                      keyword value cannot be mapped to a well-formed legacy
+ *                      type.
+ * @see toUnicodeLocaleType
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /*_ULOC*/