1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
19 using namespace icu::number
;
20 using namespace icu::number::impl
;
24 // Helper function for 2-dimensional switch statement
25 constexpr int8_t identity2d(UNumberRangeIdentityFallback a
, UNumberRangeIdentityResult b
) {
26 return static_cast<int8_t>(a
) | (static_cast<int8_t>(b
) << 4);
30 struct NumberRangeData
{
31 SimpleFormatter rangePattern
;
32 SimpleFormatter approximatelyPattern
;
35 class NumberRangeDataSink
: public ResourceSink
{
37 NumberRangeDataSink(NumberRangeData
& data
) : fData(data
) {}
39 void put(const char* key
, ResourceValue
& value
, UBool
/*noFallback*/, UErrorCode
& status
) U_OVERRIDE
{
40 ResourceTable miscTable
= value
.getTable(status
);
41 if (U_FAILURE(status
)) { return; }
42 for (int i
= 0; miscTable
.getKeyAndValue(i
, key
, value
); i
++) {
43 if (uprv_strcmp(key
, "range") == 0) {
45 continue; // have already seen this pattern
47 fData
.rangePattern
= {value
.getUnicodeString(status
), status
};
48 } else if (uprv_strcmp(key
, "approximately") == 0) {
49 if (hasApproxData()) {
50 continue; // have already seen this pattern
52 fData
.approximatelyPattern
= {value
.getUnicodeString(status
), status
};
58 return fData
.rangePattern
.getArgumentLimit() != 0;
61 bool hasApproxData() {
62 return fData
.approximatelyPattern
.getArgumentLimit() != 0;
66 return hasRangeData() && hasApproxData();
69 void fillInDefaults(UErrorCode
& status
) {
70 if (!hasRangeData()) {
71 fData
.rangePattern
= {u
"{0}–{1}", status
};
73 if (!hasApproxData()) {
74 fData
.approximatelyPattern
= {u
"~{0}", status
};
79 NumberRangeData
& fData
;
82 void getNumberRangeData(const char* localeName
, const char* nsName
, NumberRangeData
& data
, UErrorCode
& status
) {
83 if (U_FAILURE(status
)) { return; }
84 LocalUResourceBundlePointer
rb(ures_open(NULL
, localeName
, &status
));
85 if (U_FAILURE(status
)) { return; }
86 NumberRangeDataSink
sink(data
);
89 dataPath
.append("NumberElements/", -1, status
);
90 dataPath
.append(nsName
, -1, status
);
91 dataPath
.append("/miscPatterns", -1, status
);
92 if (U_FAILURE(status
)) { return; }
94 UErrorCode localStatus
= U_ZERO_ERROR
;
95 ures_getAllItemsWithFallback(rb
.getAlias(), dataPath
.data(), sink
, localStatus
);
96 if (U_FAILURE(localStatus
) && localStatus
!= U_MISSING_RESOURCE_ERROR
) {
101 // Fall back to latn if necessary
102 if (!sink
.isComplete()) {
103 ures_getAllItemsWithFallback(rb
.getAlias(), "NumberElements/latn/miscPatterns", sink
, status
);
106 sink
.fillInDefaults(status
);
109 class PluralRangesDataSink
: public ResourceSink
{
111 PluralRangesDataSink(StandardPluralRanges
& output
) : fOutput(output
) {}
113 void put(const char* /*key*/, ResourceValue
& value
, UBool
/*noFallback*/, UErrorCode
& status
) U_OVERRIDE
{
114 ResourceArray entriesArray
= value
.getArray(status
);
115 if (U_FAILURE(status
)) { return; }
116 fOutput
.setCapacity(entriesArray
.getSize());
117 for (int i
= 0; entriesArray
.getValue(i
, value
); i
++) {
118 ResourceArray pluralFormsArray
= value
.getArray(status
);
119 if (U_FAILURE(status
)) { return; }
120 pluralFormsArray
.getValue(0, value
);
121 StandardPlural::Form first
= StandardPlural::fromString(value
.getUnicodeString(status
), status
);
122 if (U_FAILURE(status
)) { return; }
123 pluralFormsArray
.getValue(1, value
);
124 StandardPlural::Form second
= StandardPlural::fromString(value
.getUnicodeString(status
), status
);
125 if (U_FAILURE(status
)) { return; }
126 pluralFormsArray
.getValue(2, value
);
127 StandardPlural::Form result
= StandardPlural::fromString(value
.getUnicodeString(status
), status
);
128 if (U_FAILURE(status
)) { return; }
129 fOutput
.addPluralRange(first
, second
, result
);
134 StandardPluralRanges
& fOutput
;
137 void getPluralRangesData(const Locale
& locale
, StandardPluralRanges
& output
, UErrorCode
& status
) {
138 if (U_FAILURE(status
)) { return; }
139 LocalUResourceBundlePointer
rb(ures_openDirect(nullptr, "pluralRanges", &status
));
140 if (U_FAILURE(status
)) { return; }
143 dataPath
.append("locales/", -1, status
);
144 dataPath
.append(locale
.getLanguage(), -1, status
);
145 if (U_FAILURE(status
)) { return; }
147 // Not all languages are covered: fail gracefully
148 UErrorCode internalStatus
= U_ZERO_ERROR
;
149 const UChar
* set
= ures_getStringByKeyWithFallback(rb
.getAlias(), dataPath
.data(), &setLen
, &internalStatus
);
150 if (U_FAILURE(internalStatus
)) { return; }
153 dataPath
.append("rules/", -1, status
);
154 dataPath
.appendInvariantChars(set
, setLen
, status
);
155 if (U_FAILURE(status
)) { return; }
156 PluralRangesDataSink
sink(output
);
157 ures_getAllItemsWithFallback(rb
.getAlias(), dataPath
.data(), sink
, status
);
158 if (U_FAILURE(status
)) { return; }
164 void StandardPluralRanges::initialize(const Locale
& locale
, UErrorCode
& status
) {
165 getPluralRangesData(locale
, *this, status
);
168 void StandardPluralRanges::addPluralRange(
169 StandardPlural::Form first
,
170 StandardPlural::Form second
,
171 StandardPlural::Form result
) {
172 U_ASSERT(fTriplesLen
< fTriples
.getCapacity());
173 fTriples
[fTriplesLen
] = {first
, second
, result
};
177 void StandardPluralRanges::setCapacity(int32_t length
) {
178 if (length
> fTriples
.getCapacity()) {
179 fTriples
.resize(length
, 0);
184 StandardPluralRanges::resolve(StandardPlural::Form first
, StandardPlural::Form second
) const {
185 for (int32_t i
=0; i
<fTriplesLen
; i
++) {
186 const auto& triple
= fTriples
[i
];
187 if (triple
.first
== first
&& triple
.second
== second
) {
188 return triple
.result
;
192 return StandardPlural::OTHER
;
196 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps
& macros
, UErrorCode
& status
)
197 : formatterImpl1(macros
.formatter1
.fMacros
, status
),
198 formatterImpl2(macros
.formatter2
.fMacros
, status
),
199 fSameFormatters(macros
.singleFormatter
),
200 fCollapse(macros
.collapse
),
201 fIdentityFallback(macros
.identityFallback
) {
203 const char* nsName
= formatterImpl1
.getRawMicroProps().nsName
;
204 if (uprv_strcmp(nsName
, formatterImpl2
.getRawMicroProps().nsName
) != 0) {
205 status
= U_ILLEGAL_ARGUMENT_ERROR
;
209 NumberRangeData data
;
210 getNumberRangeData(macros
.locale
.getName(), nsName
, data
, status
);
211 if (U_FAILURE(status
)) { return; }
212 fRangeFormatter
= data
.rangePattern
;
213 fApproximatelyModifier
= {data
.approximatelyPattern
, UNUM_FIELD_COUNT
, false};
215 // TODO: Get locale from PluralRules instead?
216 fPluralRanges
.initialize(macros
.locale
, status
);
217 if (U_FAILURE(status
)) { return; }
220 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData
& data
, bool equalBeforeRounding
, UErrorCode
& status
) const {
221 if (U_FAILURE(status
)) {
227 formatterImpl1
.preProcess(data
.quantity1
, micros1
, status
);
228 if (fSameFormatters
) {
229 formatterImpl1
.preProcess(data
.quantity2
, micros2
, status
);
231 formatterImpl2
.preProcess(data
.quantity2
, micros2
, status
);
233 if (U_FAILURE(status
)) {
237 // If any of the affixes are different, an identity is not possible
238 // and we must use formatRange().
239 // TODO: Write this as MicroProps operator==() ?
240 // TODO: Avoid the redundancy of these equality operations with the
241 // ones in formatRange?
242 if (!micros1
.modInner
->semanticallyEquivalent(*micros2
.modInner
)
243 || !micros1
.modMiddle
->semanticallyEquivalent(*micros2
.modMiddle
)
244 || !micros1
.modOuter
->semanticallyEquivalent(*micros2
.modOuter
)) {
245 formatRange(data
, micros1
, micros2
, status
);
246 data
.identityResult
= UNUM_IDENTITY_RESULT_NOT_EQUAL
;
250 // Check for identity
251 if (equalBeforeRounding
) {
252 data
.identityResult
= UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING
;
253 } else if (data
.quantity1
== data
.quantity2
) {
254 data
.identityResult
= UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING
;
256 data
.identityResult
= UNUM_IDENTITY_RESULT_NOT_EQUAL
;
259 switch (identity2d(fIdentityFallback
, data
.identityResult
)) {
260 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE
,
261 UNUM_IDENTITY_RESULT_NOT_EQUAL
):
262 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE
,
263 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING
):
264 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE
,
265 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING
):
266 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY
,
267 UNUM_IDENTITY_RESULT_NOT_EQUAL
):
268 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE
,
269 UNUM_IDENTITY_RESULT_NOT_EQUAL
):
270 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE
,
271 UNUM_IDENTITY_RESULT_NOT_EQUAL
):
272 formatRange(data
, micros1
, micros2
, status
);
275 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY
,
276 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING
):
277 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY
,
278 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING
):
279 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE
,
280 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING
):
281 formatApproximately(data
, micros1
, micros2
, status
);
284 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE
,
285 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING
):
286 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE
,
287 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING
):
288 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE
,
289 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING
):
290 formatSingleValue(data
, micros1
, micros2
, status
);
299 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData
& data
,
300 MicroProps
& micros1
, MicroProps
& micros2
,
301 UErrorCode
& status
) const {
302 if (U_FAILURE(status
)) { return; }
303 if (fSameFormatters
) {
304 int32_t length
= NumberFormatterImpl::writeNumber(micros1
, data
.quantity1
, data
.getStringRef(), 0, status
);
305 NumberFormatterImpl::writeAffixes(micros1
, data
.getStringRef(), 0, length
, status
);
307 formatRange(data
, micros1
, micros2
, status
);
312 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData
& data
,
313 MicroProps
& micros1
, MicroProps
& micros2
,
314 UErrorCode
& status
) const {
315 if (U_FAILURE(status
)) { return; }
316 if (fSameFormatters
) {
317 int32_t length
= NumberFormatterImpl::writeNumber(micros1
, data
.quantity1
, data
.getStringRef(), 0, status
);
318 // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
319 length
+= micros1
.modInner
->apply(data
.getStringRef(), 0, length
, status
);
320 length
+= micros1
.modMiddle
->apply(data
.getStringRef(), 0, length
, status
);
321 length
+= fApproximatelyModifier
.apply(data
.getStringRef(), 0, length
, status
);
322 micros1
.modOuter
->apply(data
.getStringRef(), 0, length
, status
);
324 formatRange(data
, micros1
, micros2
, status
);
329 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData
& data
,
330 MicroProps
& micros1
, MicroProps
& micros2
,
331 UErrorCode
& status
) const {
332 if (U_FAILURE(status
)) { return; }
334 // modInner is always notation (scientific); collapsable in ALL.
335 // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
336 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
337 // Never collapse an outer mod but not an inner mod.
338 bool collapseOuter
, collapseMiddle
, collapseInner
;
340 case UNUM_RANGE_COLLAPSE_ALL
:
341 case UNUM_RANGE_COLLAPSE_AUTO
:
342 case UNUM_RANGE_COLLAPSE_UNIT
:
345 collapseOuter
= micros1
.modOuter
->semanticallyEquivalent(*micros2
.modOuter
);
347 if (!collapseOuter
) {
348 // Never collapse inner mods if outer mods are not collapsable
349 collapseMiddle
= false;
350 collapseInner
= false;
355 collapseMiddle
= micros1
.modMiddle
->semanticallyEquivalent(*micros2
.modMiddle
);
357 if (!collapseMiddle
) {
358 // Never collapse inner mods if outer mods are not collapsable
359 collapseInner
= false;
363 // MIDDLE MODIFIER HEURISTICS
364 // (could disable collapsing of the middle modifier)
365 // The modifiers are equal by this point, so we can look at just one of them.
366 const Modifier
* mm
= micros1
.modMiddle
;
367 if (fCollapse
== UNUM_RANGE_COLLAPSE_UNIT
) {
368 // Only collapse if the modifier is a unit.
369 // TODO: Make a better way to check for a unit?
370 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
371 if (!mm
->containsField(UNUM_CURRENCY_FIELD
) && !mm
->containsField(UNUM_PERCENT_FIELD
)) {
372 collapseMiddle
= false;
374 } else if (fCollapse
== UNUM_RANGE_COLLAPSE_AUTO
) {
375 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
376 if (mm
->getCodePointCount() <= 1) {
377 collapseMiddle
= false;
381 if (!collapseMiddle
|| fCollapse
!= UNUM_RANGE_COLLAPSE_ALL
) {
382 collapseInner
= false;
387 collapseInner
= micros1
.modInner
->semanticallyEquivalent(*micros2
.modInner
);
389 // All done checking for collapsability.
394 collapseOuter
= false;
395 collapseMiddle
= false;
396 collapseInner
= false;
400 FormattedStringBuilder
& string
= data
.getStringRef();
401 int32_t lengthPrefix
= 0;
403 int32_t lengthInfix
= 0;
405 int32_t lengthSuffix
= 0;
407 // Use #define so that these are evaluated at the call site.
408 #define UPRV_INDEX_0 (lengthPrefix)
409 #define UPRV_INDEX_1 (lengthPrefix + length1)
410 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
411 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
413 int32_t lengthRange
= SimpleModifier::formatTwoArgPattern(
421 if (U_FAILURE(status
)) { return; }
422 lengthInfix
= lengthRange
- lengthPrefix
- lengthSuffix
;
423 U_ASSERT(lengthInfix
> 0);
426 // Add spacing unless all modifiers are collapsed.
427 // TODO: add API to control this?
428 // TODO: Use a data-driven heuristic like currency spacing?
429 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
431 bool repeatInner
= !collapseInner
&& micros1
.modInner
->getCodePointCount() > 0;
432 bool repeatMiddle
= !collapseMiddle
&& micros1
.modMiddle
->getCodePointCount() > 0;
433 bool repeatOuter
= !collapseOuter
&& micros1
.modOuter
->getCodePointCount() > 0;
434 if (repeatInner
|| repeatMiddle
|| repeatOuter
) {
435 // Add spacing if there is not already spacing
436 if (!PatternProps::isWhiteSpace(string
.charAt(UPRV_INDEX_1
))) {
437 lengthInfix
+= string
.insertCodePoint(UPRV_INDEX_1
, u
'\u0020', UNUM_FIELD_COUNT
, status
);
439 if (!PatternProps::isWhiteSpace(string
.charAt(UPRV_INDEX_2
- 1))) {
440 lengthInfix
+= string
.insertCodePoint(UPRV_INDEX_2
, u
'\u0020', UNUM_FIELD_COUNT
, status
);
445 length1
+= NumberFormatterImpl::writeNumber(micros1
, data
.quantity1
, string
, UPRV_INDEX_0
, status
);
446 length2
+= NumberFormatterImpl::writeNumber(micros2
, data
.quantity2
, string
, UPRV_INDEX_2
, status
);
448 // TODO: Support padding?
451 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
452 const Modifier
& mod
= resolveModifierPlurals(*micros1
.modInner
, *micros2
.modInner
);
453 lengthInfix
+= mod
.apply(string
, UPRV_INDEX_0
, UPRV_INDEX_3
, status
);
455 length1
+= micros1
.modInner
->apply(string
, UPRV_INDEX_0
, UPRV_INDEX_1
, status
);
456 length2
+= micros2
.modInner
->apply(string
, UPRV_INDEX_2
, UPRV_INDEX_3
, status
);
459 if (collapseMiddle
) {
460 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
461 const Modifier
& mod
= resolveModifierPlurals(*micros1
.modMiddle
, *micros2
.modMiddle
);
462 lengthInfix
+= mod
.apply(string
, UPRV_INDEX_0
, UPRV_INDEX_3
, status
);
464 length1
+= micros1
.modMiddle
->apply(string
, UPRV_INDEX_0
, UPRV_INDEX_1
, status
);
465 length2
+= micros2
.modMiddle
->apply(string
, UPRV_INDEX_2
, UPRV_INDEX_3
, status
);
469 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
470 const Modifier
& mod
= resolveModifierPlurals(*micros1
.modOuter
, *micros2
.modOuter
);
471 lengthInfix
+= mod
.apply(string
, UPRV_INDEX_0
, UPRV_INDEX_3
, status
);
473 length1
+= micros1
.modOuter
->apply(string
, UPRV_INDEX_0
, UPRV_INDEX_1
, status
);
474 length2
+= micros2
.modOuter
->apply(string
, UPRV_INDEX_2
, UPRV_INDEX_3
, status
);
480 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier
& first
, const Modifier
& second
) const {
481 Modifier::Parameters parameters
;
482 first
.getParameters(parameters
);
483 if (parameters
.obj
== nullptr) {
484 // No plural form; return a fallback (e.g., the first)
487 StandardPlural::Form firstPlural
= parameters
.plural
;
489 second
.getParameters(parameters
);
490 if (parameters
.obj
== nullptr) {
491 // No plural form; return a fallback (e.g., the first)
494 StandardPlural::Form secondPlural
= parameters
.plural
;
496 // Get the required plural form from data
497 StandardPlural::Form resultPlural
= fPluralRanges
.resolve(firstPlural
, secondPlural
);
499 // Get and return the new Modifier
500 const Modifier
* mod
= parameters
.obj
->getModifier(parameters
.signum
, resultPlural
);
501 U_ASSERT(mod
!= nullptr);
507 #endif /* #if !UCONFIG_NO_FORMATTING */