]>
Commit | Line | Data |
---|---|---|
340931cb A |
1 | // © 2019 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | ||
4 | // locdistance.h | |
5 | // created: 2019may08 Markus W. Scherer | |
6 | ||
7 | #ifndef __LOCDISTANCE_H__ | |
8 | #define __LOCDISTANCE_H__ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | #include "unicode/bytestrie.h" | |
12 | #include "unicode/localematcher.h" | |
13 | #include "unicode/locid.h" | |
14 | #include "unicode/uobject.h" | |
15 | #include "lsr.h" | |
16 | ||
17 | U_NAMESPACE_BEGIN | |
18 | ||
19 | struct LocaleDistanceData; | |
20 | ||
21 | /** | |
22 | * Offline-built data for LocaleMatcher. | |
23 | * Mostly but not only the data for mapping locales to their maximized forms. | |
24 | */ | |
25 | class LocaleDistance final : public UMemory { | |
26 | public: | |
27 | static const LocaleDistance *getSingleton(UErrorCode &errorCode); | |
28 | ||
29 | /** | |
30 | * Finds the supported LSR with the smallest distance from the desired one. | |
31 | * Equivalent LSR subtags must be normalized into a canonical form. | |
32 | * | |
33 | * <p>Returns the index of the lowest-distance supported LSR in bits 31..8 | |
34 | * (negative if none has a distance below the threshold), | |
35 | * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. | |
36 | */ | |
37 | int32_t getBestIndexAndDistance(const LSR &desired, | |
38 | const LSR **supportedLSRs, int32_t supportedLSRsLength, | |
39 | int32_t threshold, ULocMatchFavorSubtag favorSubtag) const; | |
40 | ||
41 | int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; } | |
42 | ||
43 | UBool isParadigmLSR(const LSR &lsr) const; | |
44 | ||
45 | int32_t getDefaultScriptDistance() const { | |
46 | return defaultScriptDistance; | |
47 | } | |
48 | ||
49 | int32_t getDefaultDemotionPerDesiredLocale() const { | |
50 | return defaultDemotionPerDesiredLocale; | |
51 | } | |
52 | ||
53 | private: | |
54 | LocaleDistance(const LocaleDistanceData &data); | |
55 | LocaleDistance(const LocaleDistance &other) = delete; | |
56 | LocaleDistance &operator=(const LocaleDistance &other) = delete; | |
57 | ||
58 | static void initLocaleDistance(UErrorCode &errorCode); | |
59 | ||
60 | static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, | |
61 | const char *desired, const char *supported); | |
62 | ||
63 | static int32_t getRegionPartitionsDistance( | |
64 | BytesTrie &iter, uint64_t startState, | |
65 | const char *desiredPartitions, const char *supportedPartitions, | |
66 | int32_t threshold); | |
67 | ||
68 | static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); | |
69 | ||
70 | static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); | |
71 | ||
72 | const char *partitionsForRegion(const LSR &lsr) const { | |
73 | // ill-formed region -> one non-matching string | |
74 | int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; | |
75 | return partitionArrays[pIndex]; | |
76 | } | |
77 | ||
78 | int32_t getDefaultRegionDistance() const { | |
79 | return defaultRegionDistance; | |
80 | } | |
81 | ||
82 | // The trie maps each dlang+slang+dscript+sscript+dregion+sregion | |
83 | // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. | |
84 | // There is also a trie value for each subsequence of whole subtags. | |
85 | // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". | |
86 | BytesTrie trie; | |
87 | ||
88 | /** | |
89 | * Maps each region to zero or more single-character partitions. | |
90 | */ | |
91 | const uint8_t *regionToPartitionsIndex; | |
92 | const char **partitionArrays; | |
93 | ||
94 | /** | |
95 | * Used to get the paradigm region for a cluster, if there is one. | |
96 | */ | |
97 | const LSR *paradigmLSRs; | |
98 | int32_t paradigmLSRsLength; | |
99 | ||
100 | int32_t defaultLanguageDistance; | |
101 | int32_t defaultScriptDistance; | |
102 | int32_t defaultRegionDistance; | |
103 | int32_t minRegionDistance; | |
104 | int32_t defaultDemotionPerDesiredLocale; | |
105 | }; | |
106 | ||
107 | U_NAMESPACE_END | |
108 | ||
109 | #endif // __LOCDISTANCE_H__ |