/*
**********************************************************************
-* Copyright (C) 1997-2007, International Business Machines
+* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
#define ULOC_COUNTRY_CAPACITY 4
/**
* Useful constant for the maximum size of the whole locale ID
- * (including the terminating NULL).
+ * (including the terminating NULL and all keywords).
* @stable ICU 2.0
*/
-#define ULOC_FULLNAME_CAPACITY 56
+#define ULOC_FULLNAME_CAPACITY 157
/**
* Useful constant for the maximum size of the script part of a locale ID
* Useful constant for the maximum size of keywords in a locale
* @stable ICU 2.8
*/
-#define ULOC_KEYWORDS_CAPACITY 50
+#define ULOC_KEYWORDS_CAPACITY 96
/**
- * Useful constant for the maximum SIZE of keywords in a locale
+ * Useful constant for the maximum total size of keywords and their values in a locale
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
/**
- * Character separating keywords from the locale string
- * different for EBCDIC - TODO
+ * Invariant character separating keywords from the locale string
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_SEPARATOR '@'
+
+/**
+ * Unicode code point for '@' separating keywords from the locale string.
+ * @see ULOC_KEYWORD_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
+
/**
- * Character for assigning value to a keyword
+ * Invariant character for assigning value to a keyword
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_ASSIGN '='
+
+/**
+ * Unicode code point for '=' for assigning value to a keyword.
+ * @see ULOC_KEYWORD_ASSIGN
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
+
/**
- * Character separating keywords
+ * Invariant character separating keywords
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+/**
+ * Unicode code point for ';' separating keywords
+ * @see ULOC_KEYWORD_ITEM_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
+
/**
* Constants for *_getLocale()
* Allow user to select whether she wants information on
ULOC_DATA_LOCALE_TYPE_LIMIT = 3
} ULocDataLocaleType ;
-
+#ifndef U_HIDE_SYSTEM_API
/**
* Gets ICU's default locale.
* The returned string is a snapshot in time, and will remain valid
U_STABLE void U_EXPORT2
uloc_setDefault(const char* localeID,
UErrorCode* status);
+#endif /* U_HIDE_SYSTEM_API */
/**
* Gets the language code for the specified locale.
/**
* Gets the country name suitable for display for the specified locale.
+ * Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR").
+ * To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.
*
* @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
/**
*
- * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
* to an array of pointers to arrays of char. All of these pointers are owned
* by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
/**
*
- * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
- * pointer to an array of pointers to arrays of char. All of these pointers are
- * owned by ICU-- do not delete them, and do not write through them. The array is
- * terminated with a null pointer.
+ * Gets a list of all available 2-letter country codes which are valid regular
+ * region codes in CLDR; these are based on the non-deprecated alpha-2 region
+ * codes in ISO 3166-1. The return value is a pointer to an array of pointers
+ * C strings. All of these pointers are owned by ICU; do not delete them, and
+ * do not write through them. The array is terminated with a null pointer.
* @return a list of all available country codes
* @stable ICU 2.0
*/
/**
- * Gets the full name for the specified locale.
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
* Note: This has the effect of 'canonicalizing' the string to
* a certain extent. Upper and lower case are set as needed,
* and if the components were in 'POSIX' format they are changed to
* ICU format. It does NOT map aliased names in any way.
* See the top of this header file.
+ *
* This API strips off the keyword part, so "de_DE\@collation=phonebook"
* will become "de_DE".
* This API supports preflighting.
/**
- * Set the value of the specified keyword.
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
* NOTE: Unlike almost every other ICU function which takes a
* buffer, this function will NOT truncate the output text. If a
* BUFFER_OVERFLOW_ERROR is received, it means that the original
* buffer is untouched. This is done to prevent incorrect or possibly
* even malformed locales from being generated and used.
- *
+ *
* @param keywordName name of the keyword to be set. Case insensitive.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
char* buffer, int32_t bufferCapacity,
UErrorCode* status);
+/**
+ * Returns whether the locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then FALSE is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns TRUE for "ar" and "en-Hebr", FALSE for "zh" and "fa-Cyrl".
+ *
+ * @param locale input locale ID
+ * @return TRUE if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+U_STABLE UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale);
+
+/**
+ * enums for the return value for the character and line orientation
+ * functions.
+ * @stable ICU 4.0
+ */
+typedef enum {
+ ULOC_LAYOUT_LTR = 0, /* left-to-right. */
+ ULOC_LAYOUT_RTL = 1, /* right-to-left. */
+ ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */
+ ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */
+ ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @stable ICU 4.0
+ */
+U_STABLE ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @stable ICU 4.0
+ */
+U_STABLE ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+ UErrorCode *status);
+
/**
* enums for the 'outResult' parameter return value
* @see uloc_acceptLanguageFromHTTP
* @param status an error is returned if the LCID is unrecognized or the output buffer
* is too small
* @return actual the actual size of the locale ID, not including NUL-termination
- * @draft ICU 3.8
+ * @stable ICU 3.8
*/
-U_DRAFT int32_t U_EXPORT2
-uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
+U_STABLE int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
UErrorCode *status);
-#endif /*_ULOC*/
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale. If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale. If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP47, and so
+ * supports grandfathered (regular and irregular) as well as private
+ * use language tags. Private use tags are represented as 'x-whatever',
+ * and grandfathered tags are converted to their canonical replacements
+ * where they exist. Note that a few grandfathered tags have no modern
+ * replacement, these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ * @param langtag the input BCP47 language tag.
+ * @param localeID the output buffer receiving a locale ID for the
+ * specified BCP47 language tag.
+ * @param localeIDCapacity the size of the locale ID output buffer.
+ * @param parsedLength if not NULL, successfully parsed length
+ * for the input language tag is set.
+ * @param err error information if receiving the locald ID
+ * failed.
+ * @return the length of the locale ID.
+ * @stable ICU 4.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* err);
+/**
+ * Returns a well-formed language tag for this locale ID.
+ * <p>
+ * <b>Note</b>: When <code>strict</code> is FALSE, any locale
+ * fields which do not satisfy the BCP47 syntax requirement will
+ * be omitted from the result. When <code>strict</code> is
+ * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
+ * <code>err</code> if any locale fields do not satisfy the
+ * BCP47 syntax requirement.
+ * @param localeID the input locale ID
+ * @param langtag the output buffer receiving BCP47 language
+ * tag for the locale ID.
+ * @param langtagCapacity the size of the BCP47 language tag
+ * output buffer.
+ * @param strict boolean value indicating if the function returns
+ * an error for an ill-formed input locale ID.
+ * @param err error information if receiving the language
+ * tag failed.
+ * @return The length of the BCP47 language tag.
+ * @stable ICU 4.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* err);
+
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @stable ICU 54
+ */
+U_STABLE const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /*_ULOC*/