1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
5 // created: 2019may08 Markus W. Scherer
7 #ifndef __LOCALEMATCHER_H__
8 #define __LOCALEMATCHER_H__
10 #include "unicode/utypes.h"
12 #if U_SHOW_CPLUSPLUS_API
14 #include "unicode/locid.h"
15 #include "unicode/stringpiece.h"
16 #include "unicode/uobject.h"
20 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
23 #ifndef U_HIDE_DRAFT_API
26 * Builder option for whether the language subtag or the script subtag is most important.
28 * @see Builder#setFavorSubtag(FavorSubtag)
31 enum ULocMatchFavorSubtag
{
33 * Language differences are most important, then script differences, then region differences.
34 * (This is the default behavior.)
38 ULOCMATCH_FAVOR_LANGUAGE
,
40 * Makes script differences matter relatively more than language differences.
44 ULOCMATCH_FAVOR_SCRIPT
47 typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag
;
51 * Builder option for whether all desired locales are treated equally or
52 * earlier ones are preferred.
54 * @see Builder#setDemotionPerDesiredLocale(Demotion)
57 enum ULocMatchDemotion
{
59 * All desired locales are treated equally.
63 ULOCMATCH_DEMOTION_NONE
,
65 * Earlier desired locales are preferred.
67 * <p>From each desired locale to the next,
68 * the distance to any supported locale is increased by an additional amount
69 * which is at least as large as most region mismatches.
70 * A later desired locale has to have a better match with some supported locale
71 * due to more than merely having the same region subtag.
73 * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
74 * yields <code>Result(en-GB, en)</code> because
75 * with the demotion of sv its perfect match is no better than
76 * the region distance between the earlier desired locale en-GB and en=en-US.
80 * <li>In some cases, language and/or script differences can be as small as
81 * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
82 * <li>It is possible for certain region differences to be larger than usual,
83 * and larger than the demotion.
84 * (As of CLDR 35 there is no such case, but
85 * this is possible in future versions of the data.)
90 ULOCMATCH_DEMOTION_REGION
93 typedef enum ULocMatchDemotion ULocMatchDemotion
;
102 class LocaleDistance
;
103 class LocaleLsrIterator
;
105 class XLikelySubtags
;
108 * Immutable class that picks the best match between a user's desired locales and
109 * an application's supported locales.
110 * Movable but not copyable.
114 * UErrorCode errorCode = U_ZERO_ERROR;
115 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
116 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
119 * <p>A matcher takes into account when languages are close to one another,
120 * such as Danish and Norwegian,
121 * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
123 * <p>If there are multiple supported locales with the same (language, script, region)
124 * likely subtags, then the current implementation returns the first of those locales.
125 * It ignores variant subtags (except for pseudolocale variants) and extensions.
126 * This may change in future versions.
128 * <p>For example, the current implementation does not distinguish between
129 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
131 * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
132 * or place it earlier in the list of supported locales.
134 * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
135 * The current implementation compares each desired locale with supported locales
136 * in the following order:
137 * 1. Default locale, if supported;
138 * 2. CLDR "paradigm locales" like en-GB and es-419;
139 * 3. other supported locales.
140 * This may change in future versions.
142 * <p>Often a product will just need one matcher instance, built with the languages
143 * that it supports. However, it may want multiple instances with different
144 * default languages based on additional information, such as the domain.
146 * <p>This class is not intended for public subclassing.
150 class U_COMMON_API LocaleMatcher
: public UMemory
{
153 * Data for the best-matching pair of a desired and a supported locale.
154 * Movable but not copyable.
158 class U_COMMON_API Result
: public UMemory
{
161 * Move constructor; might modify the source.
162 * This object will have the same contents that the source object had.
164 * @param src Result to move contents from.
167 Result(Result
&&src
) U_NOEXCEPT
;
177 * Move assignment; might modify the source.
178 * This object will have the same contents that the source object had.
180 * @param src Result to move contents from.
183 Result
&operator=(Result
&&src
) U_NOEXCEPT
;
186 * Returns the best-matching desired locale.
187 * nullptr if the list of desired locales is empty or if none matched well enough.
189 * @return the best-matching desired locale, or nullptr.
192 inline const Locale
*getDesiredLocale() const { return desiredLocale
; }
195 * Returns the best-matching supported locale.
196 * If none matched well enough, this is the default locale.
197 * The default locale is nullptr if the list of supported locales is empty and
198 * no explicit default locale is set.
200 * @return the best-matching supported locale, or nullptr.
203 inline const Locale
*getSupportedLocale() const { return supportedLocale
; }
206 * Returns the index of the best-matching desired locale in the input Iterable order.
207 * -1 if the list of desired locales is empty or if none matched well enough.
209 * @return the index of the best-matching desired locale, or -1.
212 inline int32_t getDesiredIndex() const { return desiredIndex
; }
215 * Returns the index of the best-matching supported locale in the
216 * constructor’s or builder’s input order (“set” Collection plus “added” locales).
217 * If the matcher was built from a locale list string, then the iteration order is that
218 * of a LocalePriorityList built from the same string.
219 * -1 if the list of supported locales is empty or if none matched well enough.
221 * @return the index of the best-matching supported locale, or -1.
224 inline int32_t getSupportedIndex() const { return supportedIndex
; }
227 * Takes the best-matching supported locale and adds relevant fields of the
228 * best-matching desired locale, such as the -t- and -u- extensions.
229 * May replace some fields of the supported locale.
230 * The result is the locale that should be used for date and number formatting, collation, etc.
231 * Returns the root locale if getSupportedLocale() returns nullptr.
233 * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
235 * @return a locale combining the best-matching desired and supported locales.
238 Locale
makeResolvedLocale(UErrorCode
&errorCode
) const;
241 Result(const Locale
*desired
, const Locale
*supported
,
242 int32_t desIndex
, int32_t suppIndex
, UBool owned
) :
243 desiredLocale(desired
), supportedLocale(supported
),
244 desiredIndex(desIndex
), supportedIndex(suppIndex
),
245 desiredIsOwned(owned
) {}
247 Result(const Result
&other
) = delete;
248 Result
&operator=(const Result
&other
) = delete;
250 const Locale
*desiredLocale
;
251 const Locale
*supportedLocale
;
252 int32_t desiredIndex
;
253 int32_t supportedIndex
;
254 UBool desiredIsOwned
;
256 friend class LocaleMatcher
;
260 * LocaleMatcher builder.
261 * Movable but not copyable.
263 * @see LocaleMatcher#builder()
266 class U_COMMON_API Builder
: public UMemory
{
269 * Constructs a builder used in chaining parameters for building a LocaleMatcher.
271 * @return a new Builder object
277 * Move constructor; might modify the source.
278 * This builder will have the same contents that the source builder had.
280 * @param src Builder to move contents from.
283 Builder(Builder
&&src
) U_NOEXCEPT
;
293 * Move assignment; might modify the source.
294 * This builder will have the same contents that the source builder had.
296 * @param src Builder to move contents from.
299 Builder
&operator=(Builder
&&src
) U_NOEXCEPT
;
302 * Parses an Accept-Language string
303 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
304 * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
305 * Allows whitespace in more places but does not allow "*".
306 * Clears any previously set/added supported locales first.
308 * @param locales the Accept-Language string of locales to set
309 * @return this Builder object
312 Builder
&setSupportedLocalesFromListString(StringPiece locales
);
315 * Copies the supported locales, preserving iteration order.
316 * Clears any previously set/added supported locales first.
317 * Duplicates are allowed, and are not removed.
319 * @param locales the list of locale
320 * @return this Builder object
323 Builder
&setSupportedLocales(Locale::Iterator
&locales
);
326 * Copies the supported locales from the begin/end range, preserving iteration order.
327 * Clears any previously set/added supported locales first.
328 * Duplicates are allowed, and are not removed.
330 * Each of the iterator parameter values must be an
331 * input iterator whose value is convertible to const Locale &.
333 * @param begin Start of range.
334 * @param end Exclusive end of range.
335 * @return this Builder object
338 template<typename Iter
>
339 Builder
&setSupportedLocales(Iter begin
, Iter end
) {
340 if (U_FAILURE(errorCode_
)) { return *this; }
341 clearSupportedLocales();
342 while (begin
!= end
) {
343 addSupportedLocale(*begin
++);
349 * Copies the supported locales from the begin/end range, preserving iteration order.
350 * Calls the converter to convert each *begin to a Locale or const Locale &.
351 * Clears any previously set/added supported locales first.
352 * Duplicates are allowed, and are not removed.
354 * Each of the iterator parameter values must be an
355 * input iterator whose value is convertible to const Locale &.
357 * @param begin Start of range.
358 * @param end Exclusive end of range.
359 * @param converter Converter from *begin to const Locale & or compatible.
360 * @return this Builder object
363 template<typename Iter
, typename Conv
>
364 Builder
&setSupportedLocalesViaConverter(Iter begin
, Iter end
, Conv converter
) {
365 if (U_FAILURE(errorCode_
)) { return *this; }
366 clearSupportedLocales();
367 while (begin
!= end
) {
368 addSupportedLocale(converter(*begin
++));
374 * Adds another supported locale.
375 * Duplicates are allowed, and are not removed.
377 * @param locale another locale
378 * @return this Builder object
381 Builder
&addSupportedLocale(const Locale
&locale
);
384 * Sets the default locale; if nullptr, or if it is not set explicitly,
385 * then the first supported locale is used as the default locale.
387 * @param defaultLocale the default locale (will be copied)
388 * @return this Builder object
391 Builder
&setDefaultLocale(const Locale
*defaultLocale
);
394 * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
396 * This is used in situations (such as maps) where
397 * it is better to fall back to the same script than a similar language.
399 * @param subtag the subtag to favor
400 * @return this Builder object
403 Builder
&setFavorSubtag(ULocMatchFavorSubtag subtag
);
406 * Option for whether all desired locales are treated equally or
407 * earlier ones are preferred (this is the default).
409 * @param demotion the demotion per desired locale to set.
410 * @return this Builder object
413 Builder
&setDemotionPerDesiredLocale(ULocMatchDemotion demotion
);
416 * Sets the UErrorCode if an error occurred while setting parameters.
417 * Preserves older error codes in the outErrorCode.
419 * @param outErrorCode Set to an error code if it does not contain one already
420 * and an error occurred while setting parameters.
421 * Otherwise unchanged.
422 * @return TRUE if U_FAILURE(outErrorCode)
425 UBool
copyErrorTo(UErrorCode
&outErrorCode
) const;
428 * Builds and returns a new locale matcher.
429 * This builder can continue to be used.
431 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
432 * or else the function returns immediately. Check for U_FAILURE()
433 * on output or use with function chaining. (See User Guide for details.)
434 * @return new LocaleMatcher.
437 LocaleMatcher
build(UErrorCode
&errorCode
) const;
440 friend class LocaleMatcher
;
442 Builder(const Builder
&other
) = delete;
443 Builder
&operator=(const Builder
&other
) = delete;
445 void clearSupportedLocales();
446 bool ensureSupportedLocaleVector();
448 UErrorCode errorCode_
= U_ZERO_ERROR
;
449 UVector
*supportedLocales_
= nullptr;
450 int32_t thresholdDistance_
= -1;
451 ULocMatchDemotion demotion_
= ULOCMATCH_DEMOTION_REGION
;
452 Locale
*defaultLocale_
= nullptr;
453 ULocMatchFavorSubtag favor_
= ULOCMATCH_FAVOR_LANGUAGE
;
456 // FYI No public LocaleMatcher constructors in C++; use the Builder.
459 * Move copy constructor; might modify the source.
460 * This matcher will have the same settings that the source matcher had.
461 * @param src source matcher
464 LocaleMatcher(LocaleMatcher
&&src
) U_NOEXCEPT
;
473 * Move assignment operator; might modify the source.
474 * This matcher will have the same settings that the source matcher had.
475 * The behavior is undefined if *this and src are the same object.
476 * @param src source matcher
480 LocaleMatcher
&operator=(LocaleMatcher
&&src
) U_NOEXCEPT
;
483 * Returns the supported locale which best matches the desired locale.
485 * @param desiredLocale Typically a user's language.
486 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
487 * or else the function returns immediately. Check for U_FAILURE()
488 * on output or use with function chaining. (See User Guide for details.)
489 * @return the best-matching supported locale.
492 const Locale
*getBestMatch(const Locale
&desiredLocale
, UErrorCode
&errorCode
) const;
495 * Returns the supported locale which best matches one of the desired locales.
497 * @param desiredLocales Typically a user's languages, in order of preference (descending).
498 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
499 * or else the function returns immediately. Check for U_FAILURE()
500 * on output or use with function chaining. (See User Guide for details.)
501 * @return the best-matching supported locale.
504 const Locale
*getBestMatch(Locale::Iterator
&desiredLocales
, UErrorCode
&errorCode
) const;
507 * Parses an Accept-Language string
508 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
509 * such as "af, en, fr;q=0.9",
510 * and returns the supported locale which best matches one of the desired locales.
511 * Allows whitespace in more places but does not allow "*".
513 * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
514 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
515 * or else the function returns immediately. Check for U_FAILURE()
516 * on output or use with function chaining. (See User Guide for details.)
517 * @return the best-matching supported locale.
520 const Locale
*getBestMatchForListString(StringPiece desiredLocaleList
, UErrorCode
&errorCode
) const;
523 * Returns the best match between the desired locale and the supported locales.
524 * If the result's desired locale is not nullptr, then it is the address of the input locale.
525 * It has not been cloned.
527 * @param desiredLocale Typically a user's language.
528 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
529 * or else the function returns immediately. Check for U_FAILURE()
530 * on output or use with function chaining. (See User Guide for details.)
531 * @return the best-matching pair of the desired and a supported locale.
534 Result
getBestMatchResult(const Locale
&desiredLocale
, UErrorCode
&errorCode
) const;
537 * Returns the best match between the desired and supported locales.
538 * If the result's desired locale is not nullptr, then it is a clone of
539 * the best-matching desired locale. The Result object owns the clone.
541 * @param desiredLocales Typically a user's languages, in order of preference (descending).
542 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
543 * or else the function returns immediately. Check for U_FAILURE()
544 * on output or use with function chaining. (See User Guide for details.)
545 * @return the best-matching pair of a desired and a supported locale.
548 Result
getBestMatchResult(Locale::Iterator
&desiredLocales
, UErrorCode
&errorCode
) const;
550 #ifndef U_HIDE_INTERNAL_API
552 * Returns a fraction between 0 and 1, where 1 means that the languages are a
553 * perfect match, and 0 means that they are completely different.
555 * <p>This is mostly an implementation detail, and the precise values may change over time.
556 * The implementation may use either the maximized forms or the others ones, or both.
557 * The implementation may or may not rely on the forms to be consistent with each other.
559 * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
561 * @param desired Desired locale.
562 * @param supported Supported locale.
563 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
564 * or else the function returns immediately. Check for U_FAILURE()
565 * on output or use with function chaining. (See User Guide for details.)
566 * @return value between 0 and 1, inclusive.
567 * @internal (has a known user)
569 double internalMatch(const Locale
&desired
, const Locale
&supported
, UErrorCode
&errorCode
) const;
570 #endif // U_HIDE_INTERNAL_API
573 LocaleMatcher(const Builder
&builder
, UErrorCode
&errorCode
);
574 LocaleMatcher(const LocaleMatcher
&other
) = delete;
575 LocaleMatcher
&operator=(const LocaleMatcher
&other
) = delete;
577 int32_t getBestSuppIndex(LSR desiredLSR
, LocaleLsrIterator
*remainingIter
, UErrorCode
&errorCode
) const;
579 const XLikelySubtags
&likelySubtags
;
580 const LocaleDistance
&localeDistance
;
581 int32_t thresholdDistance
;
582 int32_t demotionPerDesiredLocale
;
583 ULocMatchFavorSubtag favorSubtag
;
585 // These are in input order.
586 const Locale
** supportedLocales
;
588 int32_t supportedLocalesLength
;
589 // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
590 UHashtable
*supportedLsrToIndex
; // Map<LSR, Integer> stores index+1 because 0 is "not found"
591 // Array versions of the supportedLsrToIndex keys and values.
592 // The distance lookup loops over the supportedLSRs and returns the index of the best match.
593 const LSR
**supportedLSRs
;
594 int32_t *supportedIndexes
;
595 int32_t supportedLSRsLength
;
596 Locale
*ownedDefaultLocale
;
597 const Locale
*defaultLocale
;
598 int32_t defaultLocaleIndex
;
603 #endif // U_HIDE_DRAFT_API
604 #endif // U_SHOW_CPLUSPLUS_API
605 #endif // __LOCALEMATCHER_H__