git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/choicfmt.cpp

Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f A	3	/*
b75a7d8f A	4	*******************************************************************************
57a6839d	5	* Copyright (C) 1997-2013, International Business Machines Corporation and *
b75a7d8f A	6	* others. All Rights Reserved. *
	7	*******************************************************************************
	8	*
	9	* File CHOICFMT.CPP
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 02/19/97 aliu Converted from java.
	15	* 03/20/97 helena Finished first cut of implementation and got rid
	16	* of nextDouble/previousDouble and replaced with
	17	* boolean array.
	18	* 4/10/97 aliu Clean up. Modified to work on AIX.
	19	* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
	20	* wchar.h.
	21	* 07/09/97 helena Made ParsePosition into a class.
	22	* 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
	23	* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
	24	* 02/22/99 stephen Removed character literals for EBCDIC safety
	25	********************************************************************************
	26	*/
	27
	28	#include "unicode/utypes.h"
	29
	30	#if !UCONFIG_NO_FORMATTING
	31
	32	#include "unicode/choicfmt.h"
	33	#include "unicode/numfmt.h"
	34	#include "unicode/locid.h"
	35	#include "cpputils.h"
374ca955	36	#include "cstring.h"
4388f060	37	#include "messageimpl.h"
374ca955	38	#include "putilimp.h"
4388f060	39	#include "uassert.h"
73c04bcf A	40	#include <stdio.h>
73c04bcf A	41	#include <float.h>
b75a7d8f A	42
	43	// *****************************************************************************
	44	// class ChoiceFormat
	45	// *****************************************************************************
	46
	47	U_NAMESPACE_BEGIN
	48
374ca955	49	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
b75a7d8f A	50
	51	// Special characters used by ChoiceFormat. There are two characters
	52	// used interchangeably to indicate <=. Either is parsed, but only
	53	// LESS_EQUAL is generated by toPattern().
	54	#define SINGLE_QUOTE ((UChar)0x0027) /'/
	55	#define LESS_THAN ((UChar)0x003C) /</
	56	#define LESS_EQUAL ((UChar)0x0023) /#/
	57	#define LESS_EQUAL2 ((UChar)0x2264)
	58	#define VERTICAL_BAR ((UChar)0x007C) /\|/
	59	#define MINUS ((UChar)0x002D) /-/
729e4ab9	60
4388f060 A	61	static const UChar LEFT_CURLY_BRACE = 0x7B; /{/
	62	static const UChar RIGHT_CURLY_BRACE = 0x7D; /}/
	63
729e4ab9 A	64	#ifdef INFINITY
	65	#undef INFINITY
	66	#endif
b75a7d8f A	67	#define INFINITY ((UChar)0x221E)
b75a7d8f A	68
51004dcb A	69	//static const UChar gPositiveInfinity[] = {INFINITY, 0};
51004dcb A	70	//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
b75a7d8f A	71	#define POSITIVE_INF_STRLEN 1
	72	#define NEGATIVE_INF_STRLEN 2
	73
	74	// -------------------------------------
	75	// Creates a ChoiceFormat instance based on the pattern.
	76
	77	ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
	78	UErrorCode& status)
4388f060 A	79	: constructorErrorCode(status),
4388f060 A	80	msgPattern(status)
b75a7d8f A	81	{
	82	applyPattern(newPattern, status);
	83	}
	84
	85	// -------------------------------------
	86	// Creates a ChoiceFormat instance with the limit array and
	87	// format strings for each limit.
	88
	89	ChoiceFormat::ChoiceFormat(const double* limits,
	90	const UnicodeString* formats,
	91	int32_t cnt )
4388f060 A	92	: constructorErrorCode(U_ZERO_ERROR),
4388f060 A	93	msgPattern(constructorErrorCode)
b75a7d8f	94	{
4388f060	95	setChoices(limits, NULL, formats, cnt, constructorErrorCode);
b75a7d8f A	96	}
	97
	98	// -------------------------------------
	99
	100	ChoiceFormat::ChoiceFormat(const double* limits,
	101	const UBool* closures,
	102	const UnicodeString* formats,
	103	int32_t cnt )
4388f060 A	104	: constructorErrorCode(U_ZERO_ERROR),
4388f060 A	105	msgPattern(constructorErrorCode)
b75a7d8f	106	{
4388f060	107	setChoices(limits, closures, formats, cnt, constructorErrorCode);
b75a7d8f A	108	}
	109
	110	// -------------------------------------
	111	// copy constructor
	112
	113	ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
	114	: NumberFormat(that),
4388f060 A	115	constructorErrorCode(that.constructorErrorCode),
4388f060 A	116	msgPattern(that.msgPattern)
b75a7d8f	117	{
b75a7d8f A	118	}
	119
	120	// -------------------------------------
	121	// Private constructor that creates a
	122	// ChoiceFormat instance based on the
	123	// pattern and populates UParseError
	124
	125	ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
	126	UParseError& parseError,
	127	UErrorCode& status)
4388f060 A	128	: constructorErrorCode(status),
4388f060 A	129	msgPattern(status)
b75a7d8f A	130	{
	131	applyPattern(newPattern,parseError, status);
	132	}
	133	// -------------------------------------
	134
	135	UBool
	136	ChoiceFormat::operator==(const Format& that) const
	137	{
	138	if (this == &that) return TRUE;
b75a7d8f A	139	if (!NumberFormat::operator==(that)) return FALSE;
b75a7d8f A	140	ChoiceFormat& thatAlias = (ChoiceFormat&)that;
4388f060	141	return msgPattern == thatAlias.msgPattern;
b75a7d8f A	142	}
	143
	144	// -------------------------------------
	145	// copy constructor
	146
	147	const ChoiceFormat&
	148	ChoiceFormat::operator=(const ChoiceFormat& that)
	149	{
	150	if (this != &that) {
	151	NumberFormat::operator=(that);
4388f060 A	152	constructorErrorCode = that.constructorErrorCode;
4388f060 A	153	msgPattern = that.msgPattern;
b75a7d8f A	154	}
	155	return *this;
	156	}
	157
	158	// -------------------------------------
	159
	160	ChoiceFormat::~ChoiceFormat()
	161	{
b75a7d8f A	162	}
	163
	164	// -------------------------------------
	165
	166	/**
4388f060	167	* Convert a double value to a string without the overhead of NumberFormat.
b75a7d8f A	168	*/
	169	UnicodeString&
	170	ChoiceFormat::dtos(double value,
	171	UnicodeString& string)
	172	{
73c04bcf A	173	/* Buffer to contain the digits and any extra formatting stuff. */
	174	char temp[DBL_DIG + 16];
	175	char *itrPtr = temp;
46f4442e	176	char *expPtr;
73c04bcf	177
46f4442e	178	sprintf(temp, "%.*g", DBL_DIG, value);
73c04bcf A	179
	180	/* Find and convert the decimal point.
	181	Using setlocale on some machines will cause sprintf to use a comma for certain locales.
	182	*/
	183	while (itrPtr && (itrPtr == '-' \|\| isdigit(*itrPtr))) {
	184	itrPtr++;
	185	}
46f4442e A	186	if (itrPtr != 0 && itrPtr != 'e') {
	187	/* We reached something that looks like a decimal point.
	188	In case someone used setlocale(), which changes the decimal point. */
73c04bcf	189	*itrPtr = '.';
46f4442e	190	itrPtr++;
73c04bcf	191	}
46f4442e A	192	/* Search for the exponent */
	193	while (itrPtr && itrPtr != 'e') {
	194	itrPtr++;
	195	}
	196	if (*itrPtr == 'e') {
	197	itrPtr++;
	198	/* Verify the exponent sign */
	199	if (itrPtr == '+' \|\| itrPtr == '-') {
	200	itrPtr++;
	201	}
	202	/* Remove leading zeros. You will see this on Windows machines. */
	203	expPtr = itrPtr;
	204	while (*itrPtr == '0') {
	205	itrPtr++;
	206	}
	207	if (*itrPtr && expPtr != itrPtr) {
	208	/* Shift the exponent without zeros. */
	209	while (*itrPtr) {
	210	(expPtr++) = (itrPtr++);
	211	}
	212	// NULL terminate
	213	*expPtr = 0;
73c04bcf A	214	}
73c04bcf A	215	}
46f4442e	216
374ca955	217	string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
b75a7d8f A	218	return string;
	219	}
	220
	221	// -------------------------------------
	222	// calls the overloaded applyPattern method.
	223
	224	void
	225	ChoiceFormat::applyPattern(const UnicodeString& pattern,
	226	UErrorCode& status)
	227	{
4388f060 A	228	msgPattern.parseChoiceStyle(pattern, NULL, status);
4388f060 A	229	constructorErrorCode = status;
b75a7d8f A	230	}
	231
	232	// -------------------------------------
	233	// Applies the pattern to this ChoiceFormat instance.
	234
	235	void
	236	ChoiceFormat::applyPattern(const UnicodeString& pattern,
	237	UParseError& parseError,
	238	UErrorCode& status)
	239	{
4388f060 A	240	msgPattern.parseChoiceStyle(pattern, &parseError, status);
4388f060 A	241	constructorErrorCode = status;
b75a7d8f A	242	}
b75a7d8f A	243	// -------------------------------------
4388f060	244	// Returns the input pattern string.
b75a7d8f A	245
	246	UnicodeString&
	247	ChoiceFormat::toPattern(UnicodeString& result) const
	248	{
4388f060	249	return result = msgPattern.getPatternString();
b75a7d8f A	250	}
b75a7d8f A	251
b75a7d8f A	252	// -------------------------------------
	253	// Sets the limit and format arrays.
	254	void
	255	ChoiceFormat::setChoices( const double* limits,
	256	const UnicodeString* formats,
	257	int32_t cnt )
	258	{
4388f060 A	259	UErrorCode errorCode = U_ZERO_ERROR;
4388f060 A	260	setChoices(limits, NULL, formats, cnt, errorCode);
b75a7d8f A	261	}
	262
	263	// -------------------------------------
	264	// Sets the limit and format arrays.
	265	void
	266	ChoiceFormat::setChoices( const double* limits,
	267	const UBool* closures,
	268	const UnicodeString* formats,
	269	int32_t cnt )
	270	{
4388f060 A	271	UErrorCode errorCode = U_ZERO_ERROR;
	272	setChoices(limits, closures, formats, cnt, errorCode);
	273	}
b75a7d8f	274
4388f060 A	275	void
	276	ChoiceFormat::setChoices(const double* limits,
	277	const UBool* closures,
	278	const UnicodeString* formats,
	279	int32_t count,
	280	UErrorCode &errorCode) {
	281	if (U_FAILURE(errorCode)) {
	282	return;
46f4442e	283	}
4388f060 A	284	if (limits == NULL \|\| formats == NULL) {
	285	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
	286	return;
46f4442e	287	}
4388f060 A	288	// Reconstruct the original input pattern.
	289	// Modified version of the pre-ICU 4.8 toPattern() implementation.
	290	UnicodeString result;
	291	for (int32_t i = 0; i < count; ++i) {
	292	if (i != 0) {
	293	result += VERTICAL_BAR;
46f4442e	294	}
4388f060 A	295	UnicodeString buf;
	296	if (uprv_isPositiveInfinity(limits[i])) {
	297	result += INFINITY;
	298	} else if (uprv_isNegativeInfinity(limits[i])) {
	299	result += MINUS;
	300	result += INFINITY;
	301	} else {
	302	result += dtos(limits[i], buf);
46f4442e	303	}
4388f060 A	304	if (closures != NULL && closures[i]) {
	305	result += LESS_THAN;
	306	} else {
	307	result += LESS_EQUAL;
46f4442e	308	}
4388f060 A	309	// Append formats[i], using quotes if there are special
	310	// characters. Single quotes themselves must be escaped in
	311	// either case.
	312	const UnicodeString& text = formats[i];
	313	int32_t textLength = text.length();
	314	int32_t nestingLevel = 0;
	315	for (int32_t j = 0; j < textLength; ++j) {
	316	UChar c = text[j];
	317	if (c == SINGLE_QUOTE && nestingLevel == 0) {
	318	// Double each top-level apostrophe.
	319	result.append(c);
	320	} else if (c == VERTICAL_BAR && nestingLevel == 0) {
	321	// Surround each pipe symbol with apostrophes for quoting.
	322	// If the next character is an apostrophe, then that will be doubled,
	323	// and although the parser will see the apostrophe pairs beginning
	324	// and ending one character earlier than our doubling, the result
	325	// is as desired.
	326	// \| -> '\|'
	327	// \|' -> '\|'''
	328	// \|'' -> '\|''''' etc.
	329	result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
	330	continue; // Skip the append(c) at the end of the loop body.
	331	} else if (c == LEFT_CURLY_BRACE) {
	332	++nestingLevel;
	333	} else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
	334	--nestingLevel;
	335	}
	336	result.append(c);
b75a7d8f A	337	}
b75a7d8f A	338	}
4388f060 A	339	// Apply the reconstructed pattern.
4388f060 A	340	applyPattern(result, errorCode);
b75a7d8f A	341	}
	342
	343	// -------------------------------------
	344	// Gets the limit array.
	345
	346	const double*
	347	ChoiceFormat::getLimits(int32_t& cnt) const
	348	{
4388f060 A	349	cnt = 0;
4388f060 A	350	return NULL;
b75a7d8f A	351	}
	352
	353	// -------------------------------------
	354	// Gets the closures array.
	355
	356	const UBool*
	357	ChoiceFormat::getClosures(int32_t& cnt) const
	358	{
4388f060 A	359	cnt = 0;
4388f060 A	360	return NULL;
b75a7d8f A	361	}
	362
	363	// -------------------------------------
	364	// Gets the format array.
	365
	366	const UnicodeString*
	367	ChoiceFormat::getFormats(int32_t& cnt) const
	368	{
4388f060 A	369	cnt = 0;
4388f060 A	370	return NULL;
b75a7d8f A	371	}
b75a7d8f A	372
374ca955 A	373	// -------------------------------------
	374	// Formats an int64 number, it's actually formatted as
	375	// a double. The returned format string may differ
	376	// from the input number because of this.
	377
	378	UnicodeString&
	379	ChoiceFormat::format(int64_t number,
	380	UnicodeString& appendTo,
	381	FieldPosition& status) const
	382	{
	383	return format((double) number, appendTo, status);
	384	}
	385
b75a7d8f	386	// -------------------------------------
4388f060 A	387	// Formats an int32_t number, it's actually formatted as
4388f060 A	388	// a double.
b75a7d8f A	389
	390	UnicodeString&
	391	ChoiceFormat::format(int32_t number,
	392	UnicodeString& appendTo,
	393	FieldPosition& status) const
	394	{
	395	return format((double) number, appendTo, status);
	396	}
	397
	398	// -------------------------------------
	399	// Formats a double number.
	400
	401	UnicodeString&
	402	ChoiceFormat::format(double number,
	403	UnicodeString& appendTo,
	404	FieldPosition& /pos/) const
	405	{
4388f060 A	406	if (msgPattern.countParts() == 0) {
	407	// No pattern was applied, or it failed.
	408	return appendTo;
	409	}
	410	// Get the appropriate sub-message.
	411	int32_t msgStart = findSubMessage(msgPattern, 0, number);
	412	if (!MessageImpl::jdkAposMode(msgPattern)) {
	413	int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
	414	int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
	415	appendTo.append(msgPattern.getPatternString(),
	416	patternStart,
	417	msgPattern.getPatternIndex(msgLimit) - patternStart);
	418	return appendTo;
	419	}
	420	// JDK compatibility mode: Remove SKIP_SYNTAX.
	421	return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
	422	}
	423
	424	int32_t
	425	ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
	426	int32_t count = pattern.countParts();
	427	int32_t msgStart;
	428	// Iterate over (ARG_INT\|DOUBLE, ARG_SELECTOR, message) tuples
	429	// until ARG_LIMIT or end of choice-only pattern.
	430	// Ignore the first number and selector and start the loop on the first message.
	431	partIndex += 2;
	432	for (;;) {
	433	// Skip but remember the current sub-message.
	434	msgStart = partIndex;
	435	partIndex = pattern.getLimitPartIndex(partIndex);
	436	if (++partIndex >= count) {
	437	// Reached the end of the choice-only pattern.
	438	// Return with the last sub-message.
	439	break;
	440	}
	441	const MessagePattern::Part &part = pattern.getPart(partIndex++);
	442	UMessagePatternPartType type = part.getType();
	443	if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
	444	// Reached the end of the ChoiceFormat style.
	445	// Return with the last sub-message.
	446	break;
	447	}
	448	// part is an ARG_INT or ARG_DOUBLE
	449	U_ASSERT(MessagePattern::Part::hasNumericValue(type));
	450	double boundary = pattern.getNumericValue(part);
	451	// Fetch the ARG_SELECTOR character.
	452	int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
	453	UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
	454	if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
	455	// The number is in the interval between the previous boundary and the current one.
	456	// Return with the sub-message between them.
	457	// The !(a>b) and !(a>=b) comparisons are equivalent to
	458	// (a<=b) and (a<b) except they "catch" NaN.
b75a7d8f A	459	break;
	460	}
	461	}
4388f060	462	return msgStart;
b75a7d8f A	463	}
	464
	465	// -------------------------------------
	466	// Formats an array of objects. Checks if the data type of the objects
	467	// to get the right value for formatting.
	468
	469	UnicodeString&
	470	ChoiceFormat::format(const Formattable* objs,
	471	int32_t cnt,
	472	UnicodeString& appendTo,
	473	FieldPosition& pos,
	474	UErrorCode& status) const
	475	{
	476	if(cnt < 0) {
	477	status = U_ILLEGAL_ARGUMENT_ERROR;
	478	return appendTo;
	479	}
4388f060 A	480	if (msgPattern.countParts() == 0) {
	481	status = U_INVALID_STATE_ERROR;
	482	return appendTo;
	483	}
b75a7d8f	484
b75a7d8f	485	for (int32_t i = 0; i < cnt; i++) {
374ca955 A	486	double objDouble = objs[i].getDouble(status);
374ca955 A	487	if (U_SUCCESS(status)) {
4388f060	488	format(objDouble, appendTo, pos);
374ca955	489	}
b75a7d8f A	490	}
	491
	492	return appendTo;
	493	}
	494
b75a7d8f A	495	// -------------------------------------
	496
	497	void
	498	ChoiceFormat::parse(const UnicodeString& text,
	499	Formattable& result,
4388f060	500	ParsePosition& pos) const
b75a7d8f	501	{
4388f060 A	502	result.setDouble(parseArgument(msgPattern, 0, text, pos));
	503	}
	504
	505	double
	506	ChoiceFormat::parseArgument(
	507	const MessagePattern &pattern, int32_t partIndex,
	508	const UnicodeString &source, ParsePosition &pos) {
b75a7d8f	509	// find the best number (defined as the one with the longest parse)
4388f060	510	int32_t start = pos.getIndex();
b75a7d8f A	511	int32_t furthest = start;
	512	double bestNumber = uprv_getNaN();
	513	double tempNumber = 0.0;
4388f060 A	514	int32_t count = pattern.countParts();
	515	while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
	516	tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
	517	partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
	518	int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
	519	int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
	520	if (len >= 0) {
	521	int32_t newIndex = start + len;
	522	if (newIndex > furthest) {
	523	furthest = newIndex;
b75a7d8f	524	bestNumber = tempNumber;
4388f060	525	if (furthest == source.length()) {
b75a7d8f	526	break;
4388f060	527	}
b75a7d8f A	528	}
b75a7d8f A	529	}
4388f060	530	partIndex = msgLimit + 1;
b75a7d8f	531	}
4388f060 A	532	if (furthest == start) {
	533	pos.setErrorIndex(start);
	534	} else {
	535	pos.setIndex(furthest);
	536	}
	537	return bestNumber;
	538	}
	539
	540	int32_t
	541	ChoiceFormat::matchStringUntilLimitPart(
	542	const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
	543	const UnicodeString &source, int32_t sourceOffset) {
	544	int32_t matchingSourceLength = 0;
	545	const UnicodeString &msgString = pattern.getPatternString();
	546	int32_t prevIndex = pattern.getPart(partIndex).getLimit();
	547	for (;;) {
	548	const MessagePattern::Part &part = pattern.getPart(++partIndex);
	549	if (partIndex == limitPartIndex \|\| part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
	550	int32_t index = part.getIndex();
	551	int32_t length = index - prevIndex;
	552	if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
	553	return -1; // mismatch
	554	}
	555	matchingSourceLength += length;
	556	if (partIndex == limitPartIndex) {
	557	return matchingSourceLength;
	558	}
	559	prevIndex = part.getLimit(); // SKIP_SYNTAX
	560	}
b75a7d8f	561	}
b75a7d8f A	562	}
b75a7d8f A	563
b75a7d8f A	564	// -------------------------------------
b75a7d8f A	565
340931cb	566	ChoiceFormat*
b75a7d8f A	567	ChoiceFormat::clone() const
	568	{
	569	ChoiceFormat aCopy = new ChoiceFormat(this);
	570	return aCopy;
	571	}
	572
	573	U_NAMESPACE_END
	574
	575	#endif /* #if !UCONFIG_NO_FORMATTING */
	576
	577	//eof