git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/numrange

... / ...

Commit	Line	Data
	1	// © 2018 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	#include "unicode/utypes.h"
	5
	6	#if !UCONFIG_NO_FORMATTING
	7
	8	// Allow implicit conversion from char16_t* to UnicodeString for this file:
	9	// Helpful in toString methods and elsewhere.
	10	#define UNISTR_FROM_STRING_EXPLICIT
	11
	12	#include "unicode/numberrangeformatter.h"
	13	#include "numrange_impl.h"
	14	#include "patternprops.h"
	15	#include "uresimp.h"
	16	#include "util.h"
	17
	18	using namespace icu;
	19	using namespace icu::number;
	20	using namespace icu::number::impl;
	21
	22	namespace {
	23
	24	// Helper function for 2-dimensional switch statement
	25	constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
	26	return static_cast<int8_t>(a) \| (static_cast<int8_t>(b) << 4);
	27	}
	28
	29
	30	struct NumberRangeData {
	31	SimpleFormatter rangePattern;
	32	SimpleFormatter approximatelyPattern;
	33	};
	34
	35	class NumberRangeDataSink : public ResourceSink {
	36	public:
	37	NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
	38
	39	void put(const char* key, ResourceValue& value, UBool /noFallback/, UErrorCode& status) U_OVERRIDE {
	40	ResourceTable miscTable = value.getTable(status);
	41	if (U_FAILURE(status)) { return; }
	42	for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
	43	if (uprv_strcmp(key, "range") == 0) {
	44	if (hasRangeData()) {
	45	continue; // have already seen this pattern
	46	}
	47	fData.rangePattern = {value.getUnicodeString(status), status};
	48	} else if (uprv_strcmp(key, "approximately") == 0) {
	49	if (hasApproxData()) {
	50	continue; // have already seen this pattern
	51	}
	52	fData.approximatelyPattern = {value.getUnicodeString(status), status};
	53	}
	54	}
	55	}
	56
	57	bool hasRangeData() {
	58	return fData.rangePattern.getArgumentLimit() != 0;
	59	}
	60
	61	bool hasApproxData() {
	62	return fData.approximatelyPattern.getArgumentLimit() != 0;
	63	}
	64
	65	bool isComplete() {
	66	return hasRangeData() && hasApproxData();
	67	}
	68
	69	void fillInDefaults(UErrorCode& status) {
	70	if (!hasRangeData()) {
	71	fData.rangePattern = {u"{0}–{1}", status};
	72	}
	73	if (!hasApproxData()) {
	74	fData.approximatelyPattern = {u"~{0}", status};
	75	}
	76	}
	77
	78	private:
	79	NumberRangeData& fData;
	80	};
	81
	82	void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
	83	if (U_FAILURE(status)) { return; }
	84	LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
	85	if (U_FAILURE(status)) { return; }
	86	NumberRangeDataSink sink(data);
	87
	88	CharString dataPath;
	89	dataPath.append("NumberElements/", -1, status);
	90	dataPath.append(nsName, -1, status);
	91	dataPath.append("/miscPatterns", -1, status);
	92	if (U_FAILURE(status)) { return; }
	93
	94	UErrorCode localStatus = U_ZERO_ERROR;
	95	ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
	96	if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
	97	status = localStatus;
	98	return;
	99	}
	100
	101	// Fall back to latn if necessary
	102	if (!sink.isComplete()) {
	103	ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
	104	}
	105
	106	sink.fillInDefaults(status);
	107	}
	108
	109	class PluralRangesDataSink : public ResourceSink {
	110	public:
	111	PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
	112
	113	void put(const char* /key/, ResourceValue& value, UBool /noFallback/, UErrorCode& status) U_OVERRIDE {
	114	ResourceArray entriesArray = value.getArray(status);
	115	if (U_FAILURE(status)) { return; }
	116	fOutput.setCapacity(entriesArray.getSize());
	117	for (int i = 0; entriesArray.getValue(i, value); i++) {
	118	ResourceArray pluralFormsArray = value.getArray(status);
	119	if (U_FAILURE(status)) { return; }
	120	pluralFormsArray.getValue(0, value);
	121	StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
	122	if (U_FAILURE(status)) { return; }
	123	pluralFormsArray.getValue(1, value);
	124	StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
	125	if (U_FAILURE(status)) { return; }
	126	pluralFormsArray.getValue(2, value);
	127	StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
	128	if (U_FAILURE(status)) { return; }
	129	fOutput.addPluralRange(first, second, result);
	130	}
	131	}
	132
	133	private:
	134	StandardPluralRanges& fOutput;
	135	};
	136
	137	void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
	138	if (U_FAILURE(status)) { return; }
	139	LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
	140	if (U_FAILURE(status)) { return; }
	141
	142	CharString dataPath;
	143	dataPath.append("locales/", -1, status);
	144	dataPath.append(locale.getLanguage(), -1, status);
	145	if (U_FAILURE(status)) { return; }
	146	int32_t setLen;
	147	// Not all languages are covered: fail gracefully
	148	UErrorCode internalStatus = U_ZERO_ERROR;
	149	const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
	150	if (U_FAILURE(internalStatus)) { return; }
	151
	152	dataPath.clear();
	153	dataPath.append("rules/", -1, status);
	154	dataPath.appendInvariantChars(set, setLen, status);
	155	if (U_FAILURE(status)) { return; }
	156	PluralRangesDataSink sink(output);
	157	ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
	158	if (U_FAILURE(status)) { return; }
	159	}
	160
	161	} // namespace
	162
	163
	164	void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
	165	getPluralRangesData(locale, *this, status);
	166	}
	167
	168	void StandardPluralRanges::addPluralRange(
	169	StandardPlural::Form first,
	170	StandardPlural::Form second,
	171	StandardPlural::Form result) {
	172	U_ASSERT(fTriplesLen < fTriples.getCapacity());
	173	fTriples[fTriplesLen] = {first, second, result};
	174	fTriplesLen++;
	175	}
	176
	177	void StandardPluralRanges::setCapacity(int32_t length) {
	178	if (length > fTriples.getCapacity()) {
	179	fTriples.resize(length, 0);
	180	}
	181	}
	182
	183	StandardPlural::Form
	184	StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
	185	for (int32_t i=0; i<fTriplesLen; i++) {
	186	const auto& triple = fTriples[i];
	187	if (triple.first == first && triple.second == second) {
	188	return triple.result;
	189	}
	190	}
	191	// Default fallback
	192	return StandardPlural::OTHER;
	193	}
	194
	195
	196	NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
	197	: formatterImpl1(macros.formatter1.fMacros, status),
	198	formatterImpl2(macros.formatter2.fMacros, status),
	199	fSameFormatters(macros.singleFormatter),
	200	fCollapse(macros.collapse),
	201	fIdentityFallback(macros.identityFallback) {
	202
	203	const char* nsName = formatterImpl1.getRawMicroProps().nsName;
	204	if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
	205	status = U_ILLEGAL_ARGUMENT_ERROR;
	206	return;
	207	}
	208
	209	NumberRangeData data;
	210	getNumberRangeData(macros.locale.getName(), nsName, data, status);
	211	if (U_FAILURE(status)) { return; }
	212	fRangeFormatter = data.rangePattern;
	213	fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
	214
	215	// TODO: Get locale from PluralRules instead?
	216	fPluralRanges.initialize(macros.locale, status);
	217	if (U_FAILURE(status)) { return; }
	218	}
	219
	220	void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
	221	if (U_FAILURE(status)) {
	222	return;
	223	}
	224
	225	MicroProps micros1;
	226	MicroProps micros2;
	227	formatterImpl1.preProcess(data.quantity1, micros1, status);
	228	if (fSameFormatters) {
	229	formatterImpl1.preProcess(data.quantity2, micros2, status);
	230	} else {
	231	formatterImpl2.preProcess(data.quantity2, micros2, status);
	232	}
	233	if (U_FAILURE(status)) {
	234	return;
	235	}
	236
	237	// If any of the affixes are different, an identity is not possible
	238	// and we must use formatRange().
	239	// TODO: Write this as MicroProps operator==() ?
	240	// TODO: Avoid the redundancy of these equality operations with the
	241	// ones in formatRange?
	242	if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
	243	\|\| !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
	244	\|\| !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
	245	formatRange(data, micros1, micros2, status);
	246	data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
	247	return;
	248	}
	249
	250	// Check for identity
	251	if (equalBeforeRounding) {
	252	data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
	253	} else if (data.quantity1 == data.quantity2) {
	254	data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
	255	} else {
	256	data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
	257	}
	258
	259	switch (identity2d(fIdentityFallback, data.identityResult)) {
	260	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
	261	UNUM_IDENTITY_RESULT_NOT_EQUAL):
	262	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
	263	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
	264	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
	265	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
	266	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
	267	UNUM_IDENTITY_RESULT_NOT_EQUAL):
	268	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
	269	UNUM_IDENTITY_RESULT_NOT_EQUAL):
	270	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
	271	UNUM_IDENTITY_RESULT_NOT_EQUAL):
	272	formatRange(data, micros1, micros2, status);
	273	break;
	274
	275	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
	276	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
	277	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
	278	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
	279	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
	280	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
	281	formatApproximately(data, micros1, micros2, status);
	282	break;
	283
	284	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
	285	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
	286	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
	287	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
	288	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
	289	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
	290	formatSingleValue(data, micros1, micros2, status);
	291	break;
	292
	293	default:
	294	UPRV_UNREACHABLE;
	295	}
	296	}
	297
	298
	299	void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
	300	MicroProps& micros1, MicroProps& micros2,
	301	UErrorCode& status) const {
	302	if (U_FAILURE(status)) { return; }
	303	if (fSameFormatters) {
	304	int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
	305	NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
	306	} else {
	307	formatRange(data, micros1, micros2, status);
	308	}
	309	}
	310
	311
	312	void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
	313	MicroProps& micros1, MicroProps& micros2,
	314	UErrorCode& status) const {
	315	if (U_FAILURE(status)) { return; }
	316	if (fSameFormatters) {
	317	int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
	318	// HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
	319	length += micros1.modInner->apply(data.getStringRef(), 0, length, status);
	320	length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status);
	321	length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status);
	322	micros1.modOuter->apply(data.getStringRef(), 0, length, status);
	323	} else {
	324	formatRange(data, micros1, micros2, status);
	325	}
	326	}
	327
	328
	329	void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
	330	MicroProps& micros1, MicroProps& micros2,
	331	UErrorCode& status) const {
	332	if (U_FAILURE(status)) { return; }
	333
	334	// modInner is always notation (scientific); collapsable in ALL.
	335	// modOuter is always units; collapsable in ALL, AUTO, and UNIT.
	336	// modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
	337	// Never collapse an outer mod but not an inner mod.
	338	bool collapseOuter, collapseMiddle, collapseInner;
	339	switch (fCollapse) {
	340	case UNUM_RANGE_COLLAPSE_ALL:
	341	case UNUM_RANGE_COLLAPSE_AUTO:
	342	case UNUM_RANGE_COLLAPSE_UNIT:
	343	{
	344	// OUTER MODIFIER
	345	collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
	346
	347	if (!collapseOuter) {
	348	// Never collapse inner mods if outer mods are not collapsable
	349	collapseMiddle = false;
	350	collapseInner = false;
	351	break;
	352	}
	353
	354	// MIDDLE MODIFIER
	355	collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
	356
	357	if (!collapseMiddle) {
	358	// Never collapse inner mods if outer mods are not collapsable
	359	collapseInner = false;
	360	break;
	361	}
	362
	363	// MIDDLE MODIFIER HEURISTICS
	364	// (could disable collapsing of the middle modifier)
	365	// The modifiers are equal by this point, so we can look at just one of them.
	366	const Modifier* mm = micros1.modMiddle;
	367	if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
	368	// Only collapse if the modifier is a unit.
	369	// TODO: Make a better way to check for a unit?
	370	// TODO: Handle case where the modifier has both notation and unit (compact currency)?
	371	if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
	372	collapseMiddle = false;
	373	}
	374	} else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
	375	// Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
	376	if (mm->getCodePointCount() <= 1) {
	377	collapseMiddle = false;
	378	}
	379	}
	380
	381	if (!collapseMiddle \|\| fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
	382	collapseInner = false;
	383	break;
	384	}
	385
	386	// INNER MODIFIER
	387	collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
	388
	389	// All done checking for collapsability.
	390	break;
	391	}
	392
	393	default:
	394	collapseOuter = false;
	395	collapseMiddle = false;
	396	collapseInner = false;
	397	break;
	398	}
	399
	400	FormattedStringBuilder& string = data.getStringRef();
	401	int32_t lengthPrefix = 0;
	402	int32_t length1 = 0;
	403	int32_t lengthInfix = 0;
	404	int32_t length2 = 0;
	405	int32_t lengthSuffix = 0;
	406
	407	// Use #define so that these are evaluated at the call site.
	408	#define UPRV_INDEX_0 (lengthPrefix)
	409	#define UPRV_INDEX_1 (lengthPrefix + length1)
	410	#define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
	411	#define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
	412
	413	int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
	414	fRangeFormatter,
	415	string,
	416	0,
	417	&lengthPrefix,
	418	&lengthSuffix,
	419	UNUM_FIELD_COUNT,
	420	status);
	421	if (U_FAILURE(status)) { return; }
	422	lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
	423	U_ASSERT(lengthInfix > 0);
	424
	425	// SPACING HEURISTIC
	426	// Add spacing unless all modifiers are collapsed.
	427	// TODO: add API to control this?
	428	// TODO: Use a data-driven heuristic like currency spacing?
	429	// TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
	430	{
	431	bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
	432	bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
	433	bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
	434	if (repeatInner \|\| repeatMiddle \|\| repeatOuter) {
	435	// Add spacing if there is not already spacing
	436	if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
	437	lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status);
	438	}
	439	if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
	440	lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status);
	441	}
	442	}
	443	}
	444
	445	length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
	446	length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
	447
	448	// TODO: Support padding?
	449
	450	if (collapseInner) {
	451	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
	452	const Modifier& mod = resolveModifierPlurals(micros1.modInner, micros2.modInner);
	453	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
	454	} else {
	455	length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
	456	length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
	457	}
	458
	459	if (collapseMiddle) {
	460	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
	461	const Modifier& mod = resolveModifierPlurals(micros1.modMiddle, micros2.modMiddle);
	462	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
	463	} else {
	464	length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
	465	length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
	466	}
	467
	468	if (collapseOuter) {
	469	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
	470	const Modifier& mod = resolveModifierPlurals(micros1.modOuter, micros2.modOuter);
	471	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
	472	} else {
	473	length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
	474	length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
	475	}
	476	}
	477
	478
	479	const Modifier&
	480	NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
	481	Modifier::Parameters parameters;
	482	first.getParameters(parameters);
	483	if (parameters.obj == nullptr) {
	484	// No plural form; return a fallback (e.g., the first)
	485	return first;
	486	}
	487	StandardPlural::Form firstPlural = parameters.plural;
	488
	489	second.getParameters(parameters);
	490	if (parameters.obj == nullptr) {
	491	// No plural form; return a fallback (e.g., the first)
	492	return first;
	493	}
	494	StandardPlural::Form secondPlural = parameters.plural;
	495
	496	// Get the required plural form from data
	497	StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
	498
	499	// Get and return the new Modifier
	500	const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
	501	U_ASSERT(mod != nullptr);
	502	return *mod;
	503	}
	504
	505
	506
	507	#endif /* #if !UCONFIG_NO_FORMATTING */