]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/locdistance.h
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / locdistance.h
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3
4 // locdistance.h
5 // created: 2019may08 Markus W. Scherer
6
7 #ifndef __LOCDISTANCE_H__
8 #define __LOCDISTANCE_H__
9
10 #include "unicode/utypes.h"
11 #include "unicode/bytestrie.h"
12 #include "unicode/localematcher.h"
13 #include "unicode/locid.h"
14 #include "unicode/uobject.h"
15 #include "lsr.h"
16
17 U_NAMESPACE_BEGIN
18
19 struct LocaleDistanceData;
20
21 /**
22 * Offline-built data for LocaleMatcher.
23 * Mostly but not only the data for mapping locales to their maximized forms.
24 */
25 class LocaleDistance final : public UMemory {
26 public:
27 static const LocaleDistance *getSingleton(UErrorCode &errorCode);
28
29 /**
30 * Finds the supported LSR with the smallest distance from the desired one.
31 * Equivalent LSR subtags must be normalized into a canonical form.
32 *
33 * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
34 * (negative if none has a distance below the threshold),
35 * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
36 */
37 int32_t getBestIndexAndDistance(const LSR &desired,
38 const LSR **supportedLSRs, int32_t supportedLSRsLength,
39 int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
40
41 int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
42
43 UBool isParadigmLSR(const LSR &lsr) const;
44
45 int32_t getDefaultScriptDistance() const {
46 return defaultScriptDistance;
47 }
48
49 int32_t getDefaultDemotionPerDesiredLocale() const {
50 return defaultDemotionPerDesiredLocale;
51 }
52
53 private:
54 LocaleDistance(const LocaleDistanceData &data);
55 LocaleDistance(const LocaleDistance &other) = delete;
56 LocaleDistance &operator=(const LocaleDistance &other) = delete;
57
58 static void initLocaleDistance(UErrorCode &errorCode);
59
60 static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
61 const char *desired, const char *supported);
62
63 static int32_t getRegionPartitionsDistance(
64 BytesTrie &iter, uint64_t startState,
65 const char *desiredPartitions, const char *supportedPartitions,
66 int32_t threshold);
67
68 static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
69
70 static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
71
72 const char *partitionsForRegion(const LSR &lsr) const {
73 // ill-formed region -> one non-matching string
74 int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
75 return partitionArrays[pIndex];
76 }
77
78 int32_t getDefaultRegionDistance() const {
79 return defaultRegionDistance;
80 }
81
82 // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
83 // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
84 // There is also a trie value for each subsequence of whole subtags.
85 // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
86 BytesTrie trie;
87
88 /**
89 * Maps each region to zero or more single-character partitions.
90 */
91 const uint8_t *regionToPartitionsIndex;
92 const char **partitionArrays;
93
94 /**
95 * Used to get the paradigm region for a cluster, if there is one.
96 */
97 const LSR *paradigmLSRs;
98 int32_t paradigmLSRsLength;
99
100 int32_t defaultLanguageDistance;
101 int32_t defaultScriptDistance;
102 int32_t defaultRegionDistance;
103 int32_t minRegionDistance;
104 int32_t defaultDemotionPerDesiredLocale;
105 };
106
107 U_NAMESPACE_END
108
109 #endif // __LOCDISTANCE_H__