]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include "unicode/utypes.h" | |
5 | ||
6 | #if !UCONFIG_NO_FORMATTING | |
7 | ||
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: | |
9 | // Helpful in toString methods and elsewhere. | |
10 | #define UNISTR_FROM_STRING_EXPLICIT | |
11 | ||
12 | #include "numparse_types.h" | |
13 | #include "numparse_currency.h" | |
14 | #include "ucurrimp.h" | |
15 | #include "unicode/errorcode.h" | |
16 | #include "numparse_utils.h" | |
340931cb | 17 | #include "string_segment.h" |
0f5d89e8 A |
18 | |
19 | using namespace icu; | |
20 | using namespace icu::numparse; | |
21 | using namespace icu::numparse::impl; | |
22 | ||
23 | ||
24 | CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, | |
25 | parse_flags_t parseFlags, UErrorCode& status) | |
26 | : fCurrency1(currencySymbols.getCurrencySymbol(status)), | |
27 | fCurrency2(currencySymbols.getIntlCurrencySymbol(status)), | |
28 | fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)), | |
c5116b9f | 29 | fCurrencyTrails(0 != (parseFlags & PARSE_FLAG_HAS_TRAIL_CURRENCY)), // Apple <rdar://problem/51938595> |
0f5d89e8 A |
30 | afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)), |
31 | beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)), | |
32 | fLocaleName(dfs.getLocale().getName(), -1, status) { | |
33 | utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); | |
34 | ||
35 | // Pre-load the long names for the current locale and currency | |
36 | // if we are parsing without the full currency data. | |
37 | if (!fUseFullCurrencyData) { | |
38 | for (int32_t i=0; i<StandardPlural::COUNT; i++) { | |
39 | auto plural = static_cast<StandardPlural::Form>(i); | |
40 | fLocalLongNames[i] = currencySymbols.getPluralName(plural, status); | |
41 | } | |
42 | } | |
43 | ||
44 | // TODO: Figure out how to make this faster and re-enable. | |
45 | // Computing the "lead code points" set for fastpathing is too slow to use in production. | |
46 | // See http://bugs.icu-project.org/trac/ticket/13584 | |
47 | // // Compute the full set of characters that could be the first in a currency to allow for | |
48 | // // efficient smoke test. | |
49 | // fLeadCodePoints.add(fCurrency1.char32At(0)); | |
50 | // fLeadCodePoints.add(fCurrency2.char32At(0)); | |
51 | // fLeadCodePoints.add(beforeSuffixInsert.char32At(0)); | |
52 | // uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status); | |
53 | // // Always apply case mapping closure for currencies | |
54 | // fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS); | |
55 | // fLeadCodePoints.freeze(); | |
56 | } | |
57 | ||
58 | bool | |
59 | CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { | |
60 | if (result.currencyCode[0] != 0) { | |
61 | return false; | |
62 | } | |
63 | ||
64 | // Try to match a currency spacing separator. | |
65 | int32_t initialOffset = segment.getOffset(); | |
66 | bool maybeMore = false; | |
c5116b9f A |
67 | if (result.seenNumber() && !beforeSuffixInsert.isEmpty() |
68 | && segment.length() != 0) { // Apple <rdar://problem/51938595> | |
0f5d89e8 A |
69 | int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); |
70 | if (overlap == beforeSuffixInsert.length()) { | |
71 | segment.adjustOffset(overlap); | |
72 | // Note: let currency spacing be a weak match. Don't update chars consumed. | |
73 | } | |
74 | maybeMore = maybeMore || overlap == segment.length(); | |
75 | } | |
76 | ||
77 | // Match the currency string, and reset if we didn't find one. | |
78 | maybeMore = maybeMore || matchCurrency(segment, result, status); | |
79 | if (result.currencyCode[0] == 0) { | |
80 | segment.setOffset(initialOffset); | |
81 | return maybeMore; | |
82 | } | |
83 | ||
84 | // Try to match a currency spacing separator. | |
c5116b9f A |
85 | if (!result.seenNumber() && !afterPrefixInsert.isEmpty() |
86 | && segment.length() != 0) { // Apple <rdar://problem/51938595> | |
0f5d89e8 A |
87 | int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); |
88 | if (overlap == afterPrefixInsert.length()) { | |
89 | segment.adjustOffset(overlap); | |
90 | // Note: let currency spacing be a weak match. Don't update chars consumed. | |
91 | } | |
92 | maybeMore = maybeMore || overlap == segment.length(); | |
93 | } | |
94 | ||
95 | return maybeMore; | |
96 | } | |
97 | ||
98 | bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result, | |
99 | UErrorCode& status) const { | |
100 | bool maybeMore = false; | |
101 | ||
102 | int32_t overlap1; | |
103 | if (!fCurrency1.isEmpty()) { | |
104 | overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); | |
c5116b9f | 105 | } else if (!fUseFullCurrencyData && (!fCurrencyTrails || result.seenNumber())) { // Apple <rdar://problem/46915356><rdar://problem/51938595> |
3d1f044b | 106 | overlap1 = 0; |
0f5d89e8 A |
107 | } else { |
108 | overlap1 = -1; | |
109 | } | |
110 | maybeMore = maybeMore || overlap1 == segment.length(); | |
111 | if (overlap1 == fCurrency1.length()) { | |
112 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); | |
113 | segment.adjustOffset(overlap1); | |
114 | result.setCharsConsumed(segment); | |
115 | return maybeMore; | |
116 | } | |
117 | ||
118 | int32_t overlap2; | |
119 | if (!fCurrency2.isEmpty()) { | |
3d1f044b A |
120 | // ISO codes should be accepted case-insensitive. |
121 | // https://unicode-org.atlassian.net/browse/ICU-13696 | |
122 | overlap2 = segment.getCommonPrefixLength(fCurrency2); | |
0f5d89e8 A |
123 | } else { |
124 | overlap2 = -1; | |
125 | } | |
126 | maybeMore = maybeMore || overlap2 == segment.length(); | |
127 | if (overlap2 == fCurrency2.length()) { | |
128 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); | |
129 | segment.adjustOffset(overlap2); | |
130 | result.setCharsConsumed(segment); | |
131 | return maybeMore; | |
132 | } | |
133 | ||
134 | if (fUseFullCurrencyData) { | |
135 | // Use the full currency data. | |
136 | // NOTE: This call site should be improved with #13584. | |
137 | const UnicodeString segmentString = segment.toTempUnicodeString(); | |
138 | ||
139 | // Try to parse the currency | |
140 | ParsePosition ppos(0); | |
141 | int32_t partialMatchLen = 0; | |
142 | uprv_parseCurrency( | |
143 | fLocaleName.data(), | |
144 | segmentString, | |
145 | ppos, | |
146 | UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME | |
147 | &partialMatchLen, | |
148 | result.currencyCode, | |
149 | status); | |
150 | maybeMore = maybeMore || partialMatchLen == segment.length(); | |
151 | ||
152 | if (U_SUCCESS(status) && ppos.getIndex() != 0) { | |
153 | // Complete match. | |
154 | // NOTE: The currency code should already be saved in the ParsedNumber. | |
155 | segment.adjustOffset(ppos.getIndex()); | |
156 | result.setCharsConsumed(segment); | |
157 | return maybeMore; | |
158 | } | |
159 | ||
160 | } else { | |
161 | // Use the locale long names. | |
162 | int32_t longestFullMatch = 0; | |
163 | for (int32_t i=0; i<StandardPlural::COUNT; i++) { | |
164 | const UnicodeString& name = fLocalLongNames[i]; | |
165 | int32_t overlap = segment.getCommonPrefixLength(name); | |
166 | if (overlap == name.length() && name.length() > longestFullMatch) { | |
167 | longestFullMatch = name.length(); | |
168 | } | |
169 | maybeMore = maybeMore || overlap > 0; | |
170 | } | |
171 | if (longestFullMatch > 0) { | |
172 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); | |
173 | segment.adjustOffset(longestFullMatch); | |
174 | result.setCharsConsumed(segment); | |
175 | return maybeMore; | |
176 | } | |
177 | } | |
178 | ||
179 | // No match found. | |
180 | return maybeMore; | |
181 | } | |
182 | ||
183 | bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { | |
184 | // TODO: See constructor | |
185 | return true; | |
186 | //return segment.startsWith(fLeadCodePoints); | |
187 | } | |
188 | ||
189 | UnicodeString CombinedCurrencyMatcher::toString() const { | |
190 | return u"<CombinedCurrencyMatcher>"; | |
191 | } | |
192 | ||
193 | ||
194 | #endif /* #if !UCONFIG_NO_FORMATTING */ |