1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
5 // created: 2019may08 Markus W. Scherer
7 #ifndef __LOCDISTANCE_H__
8 #define __LOCDISTANCE_H__
10 #include "unicode/utypes.h"
11 #include "unicode/bytestrie.h"
12 #include "unicode/localematcher.h"
13 #include "unicode/locid.h"
14 #include "unicode/uobject.h"
19 struct LocaleDistanceData
;
22 * Offline-built data for LocaleMatcher.
23 * Mostly but not only the data for mapping locales to their maximized forms.
25 class LocaleDistance final
: public UMemory
{
27 static const LocaleDistance
*getSingleton(UErrorCode
&errorCode
);
30 * Finds the supported LSR with the smallest distance from the desired one.
31 * Equivalent LSR subtags must be normalized into a canonical form.
33 * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
34 * (negative if none has a distance below the threshold),
35 * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
37 int32_t getBestIndexAndDistance(const LSR
&desired
,
38 const LSR
**supportedLSRs
, int32_t supportedLSRsLength
,
39 int32_t threshold
, ULocMatchFavorSubtag favorSubtag
) const;
41 int32_t getParadigmLSRsLength() const { return paradigmLSRsLength
; }
43 UBool
isParadigmLSR(const LSR
&lsr
) const;
45 int32_t getDefaultScriptDistance() const {
46 return defaultScriptDistance
;
49 int32_t getDefaultDemotionPerDesiredLocale() const {
50 return defaultDemotionPerDesiredLocale
;
54 LocaleDistance(const LocaleDistanceData
&data
);
55 LocaleDistance(const LocaleDistance
&other
) = delete;
56 LocaleDistance
&operator=(const LocaleDistance
&other
) = delete;
58 static void initLocaleDistance(UErrorCode
&errorCode
);
60 static int32_t getDesSuppScriptDistance(BytesTrie
&iter
, uint64_t startState
,
61 const char *desired
, const char *supported
);
63 static int32_t getRegionPartitionsDistance(
64 BytesTrie
&iter
, uint64_t startState
,
65 const char *desiredPartitions
, const char *supportedPartitions
,
68 static int32_t getFallbackRegionDistance(BytesTrie
&iter
, uint64_t startState
);
70 static int32_t trieNext(BytesTrie
&iter
, const char *s
, bool wantValue
);
72 const char *partitionsForRegion(const LSR
&lsr
) const {
73 // ill-formed region -> one non-matching string
74 int32_t pIndex
= regionToPartitionsIndex
[lsr
.regionIndex
];
75 return partitionArrays
[pIndex
];
78 int32_t getDefaultRegionDistance() const {
79 return defaultRegionDistance
;
82 // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
83 // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
84 // There is also a trie value for each subsequence of whole subtags.
85 // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
89 * Maps each region to zero or more single-character partitions.
91 const uint8_t *regionToPartitionsIndex
;
92 const char **partitionArrays
;
95 * Used to get the paradigm region for a cluster, if there is one.
97 const LSR
*paradigmLSRs
;
98 int32_t paradigmLSRsLength
;
100 int32_t defaultLanguageDistance
;
101 int32_t defaultScriptDistance
;
102 int32_t defaultRegionDistance
;
103 int32_t minRegionDistance
;
104 int32_t defaultDemotionPerDesiredLocale
;
109 #endif // __LOCDISTANCE_H__