git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/nfrs.cpp

Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f A	3	/*
b75a7d8f A	4	******************************************************************************
2ca993e8	5	* Copyright (C) 1997-2015, International Business Machines
b75a7d8f A	6	* Corporation and others. All Rights Reserved.
	7	******************************************************************************
	8	* file name: nfrs.cpp
f3c0d7a5	9	* encoding: UTF-8
b75a7d8f A	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* Modification history
	14	* Date Name Comments
	15	* 10/11/2001 Doug Ported from ICU4J
	16	*/
	17
	18	#include "nfrs.h"
	19
	20	#if U_HAVE_RBNF
	21
	22	#include "unicode/uchar.h"
	23	#include "nfrule.h"
	24	#include "nfrlist.h"
4388f060	25	#include "patternprops.h"
f3c0d7a5	26	#include "putilimp.h"
b75a7d8f A	27
	28	#ifdef RBNF_DEBUG
	29	#include "cmemory.h"
	30	#endif
	31
2ca993e8 A	32	enum {
	33	/** -x */
	34	NEGATIVE_RULE_INDEX = 0,
	35	/** x.x */
	36	IMPROPER_FRACTION_RULE_INDEX = 1,
	37	/** 0.x */
	38	PROPER_FRACTION_RULE_INDEX = 2,
	39	/** x.0 */
	40	MASTER_RULE_INDEX = 3,
	41	/** Inf */
	42	INFINITY_RULE_INDEX = 4,
	43	/** NaN */
	44	NAN_RULE_INDEX = 5,
	45	NON_NUMERICAL_RULE_LENGTH = 6
	46	};
	47
b75a7d8f A	48	U_NAMESPACE_BEGIN
	49
	50	#if 0
	51	// euclid's algorithm works with doubles
	52	// note, doubles only get us up to one quadrillion or so, which
	53	// isn't as much range as we get with longs. We probably still
	54	// want either 64-bit math, or BigInteger.
	55
	56	static int64_t
	57	util_lcm(int64_t x, int64_t y)
	58	{
	59	x.abs();
	60	y.abs();
	61
	62	if (x == 0 \|\| y == 0) {
	63	return 0;
	64	} else {
	65	do {
	66	if (x < y) {
	67	int64_t t = x; x = y; y = t;
	68	}
	69	x -= y * (x/y);
	70	} while (x != 0);
	71
	72	return y;
	73	}
	74	}
	75
	76	#else
	77	/**
	78	* Calculates the least common multiple of x and y.
	79	*/
	80	static int64_t
	81	util_lcm(int64_t x, int64_t y)
	82	{
	83	// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
	84	// vol. 2, 1st ed., pp. 298-299
	85	int64_t x1 = x;
	86	int64_t y1 = y;
	87
	88	int p2 = 0;
	89	while ((x1 & 1) == 0 && (y1 & 1) == 0) {
	90	++p2;
	91	x1 >>= 1;
	92	y1 >>= 1;
	93	}
	94
	95	int64_t t;
	96	if ((x1 & 1) == 1) {
	97	t = -y1;
	98	} else {
	99	t = x1;
	100	}
	101
	102	while (t != 0) {
	103	while ((t & 1) == 0) {
	104	t = t >> 1;
	105	}
	106	if (t > 0) {
	107	x1 = t;
	108	} else {
	109	y1 = -t;
	110	}
	111	t = x1 - y1;
112	}
113
114	int64_t gcd = x1 << p2;
115
116	// x * y == gcd(x, y) * lcm(x, y)
117	return x / gcd * y;
118	}
119	#endif
120
121	static const UChar gPercent = 0x0025;
122	static const UChar gColon = 0x003a;
123	static const UChar gSemicolon = 0x003b;
124	static const UChar gLineFeed = 0x000a;
125
b75a7d8f A	126	static const UChar gPercentPercent[] =
	127	{
	128	0x25, 0x25, 0
	129	}; /* "%%" */
	130
4388f060 A	131	static const UChar gNoparse[] =
	132	{
	133	0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
	134	}; /* "@noparse" */
	135
2ca993e8	136	NFRuleSet::NFRuleSet(RuleBasedNumberFormat _owner, UnicodeString descriptions, int32_t index, UErrorCode& status)
b75a7d8f A	137	: name()
b75a7d8f A	138	, rules(0)
2ca993e8 A	139	, owner(_owner)
2ca993e8 A	140	, fractionRules()
b75a7d8f A	141	, fIsFractionRuleSet(FALSE)
b75a7d8f A	142	, fIsPublic(FALSE)
4388f060	143	, fIsParseable(TRUE)
b75a7d8f	144	{
2ca993e8 A	145	for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
2ca993e8 A	146	nonNumericalRules[i] = NULL;
b75a7d8f A	147	}
	148
	149	if (U_FAILURE(status)) {
	150	return;
	151	}
	152
	153	UnicodeString& description = descriptions[index]; // !!! make sure index is valid
	154
374ca955 A	155	if (description.length() == 0) {
	156	// throw new IllegalArgumentException("Empty rule set description");
	157	status = U_PARSE_ERROR;
73c04bcf	158	return;
374ca955 A	159	}
374ca955 A	160
b75a7d8f A	161	// if the description begins with a rule set name (the rule set
	162	// name can be omitted in formatter descriptions that consist
	163	// of only one rule set), copy it out into our "name" member
	164	// and delete it from the description
	165	if (description.charAt(0) == gPercent) {
	166	int32_t pos = description.indexOf(gColon);
	167	if (pos == -1) {
	168	// throw new IllegalArgumentException("Rule set name doesn't end in colon");
	169	status = U_PARSE_ERROR;
	170	} else {
	171	name.setTo(description, 0, pos);
4388f060	172	while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
b75a7d8f A	173	}
	174	description.remove(0, pos);
	175	}
	176	} else {
374ca955	177	name.setTo(UNICODE_STRING_SIMPLE("%default"));
b75a7d8f A	178	}
	179
	180	if (description.length() == 0) {
	181	// throw new IllegalArgumentException("Empty rule set description");
	182	status = U_PARSE_ERROR;
	183	}
	184
4388f060 A	185	fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
	186
	187	if ( name.endsWith(gNoparse,8) ) {
	188	fIsParseable = FALSE;
	189	name.truncate(name.length()-8); // remove the @noparse from the name
	190	}
b75a7d8f A	191
	192	// all of the other members of NFRuleSet are initialized
	193	// by parseRules()
	194	}
	195
	196	void
2ca993e8	197	NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
b75a7d8f A	198	{
	199	// start by creating a Vector whose elements are Strings containing
	200	// the descriptions of the rules (one rule per element). The rules
	201	// are separated by semicolons (there's no escape facility: ALL
	202	// semicolons are rule delimiters)
	203
	204	if (U_FAILURE(status)) {
	205	return;
	206	}
	207
4388f060 A	208	// ensure we are starting with an empty rule list
	209	rules.deleteAll();
	210
b75a7d8f A	211	// dlf - the original code kept a separate description array for no reason,
	212	// so I got rid of it. The loop was too complex so I simplified it.
	213
	214	UnicodeString currentDescription;
	215	int32_t oldP = 0;
	216	while (oldP < description.length()) {
	217	int32_t p = description.indexOf(gSemicolon, oldP);
	218	if (p == -1) {
	219	p = description.length();
	220	}
	221	currentDescription.setTo(description, oldP, p - oldP);
	222	NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
	223	oldP = p + 1;
	224	}
	225
	226	// for rules that didn't specify a base value, their base values
	227	// were initialized to 0. Make another pass through the list and
	228	// set all those rules' base values. We also remove any special
	229	// rules from the list and put them into their own member variables
	230	int64_t defaultBaseValue = 0;
	231
	232	// (this isn't a for loop because we might be deleting items from
	233	// the vector-- we want to make sure we only increment i when
	234	// we _didn't_ delete aything from the vector)
2ca993e8 A	235	int32_t rulesSize = rules.size();
2ca993e8 A	236	for (int32_t i = 0; i < rulesSize; i++) {
b75a7d8f	237	NFRule* rule = rules[i];
2ca993e8	238	int64_t baseValue = rule->getBaseValue();
b75a7d8f	239
2ca993e8	240	if (baseValue == 0) {
b75a7d8f A	241	// if the rule's base value is 0, fill in a default
	242	// base value (this will be 1 plus the preceding
	243	// rule's base value for regular rule sets, and the
	244	// same as the preceding rule's base value in fraction
	245	// rule sets)
374ca955	246	rule->setBaseValue(defaultBaseValue, status);
2ca993e8 A	247	}
2ca993e8 A	248	else {
b75a7d8f A	249	// if it's a regular rule that already knows its base value,
	250	// check to make sure the rules are in order, and update
	251	// the default base value for the next rule
2ca993e8	252	if (baseValue < defaultBaseValue) {
b75a7d8f A	253	// throw new IllegalArgumentException("Rules are not in order");
	254	status = U_PARSE_ERROR;
	255	return;
	256	}
2ca993e8 A	257	defaultBaseValue = baseValue;
	258	}
	259	if (!fIsFractionRuleSet) {
	260	++defaultBaseValue;
	261	}
	262	}
	263	}
	264
	265	/**
	266	* Set one of the non-numerical rules.
	267	* @param rule The rule to set.
	268	*/
	269	void NFRuleSet::setNonNumericalRule(NFRule *rule) {
	270	int64_t baseValue = rule->getBaseValue();
	271	if (baseValue == NFRule::kNegativeNumberRule) {
	272	delete nonNumericalRules[NEGATIVE_RULE_INDEX];
	273	nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
	274	}
	275	else if (baseValue == NFRule::kImproperFractionRule) {
	276	setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
	277	}
	278	else if (baseValue == NFRule::kProperFractionRule) {
	279	setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
	280	}
	281	else if (baseValue == NFRule::kMasterRule) {
	282	setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
	283	}
	284	else if (baseValue == NFRule::kInfinityRule) {
	285	delete nonNumericalRules[INFINITY_RULE_INDEX];
	286	nonNumericalRules[INFINITY_RULE_INDEX] = rule;
	287	}
	288	else if (baseValue == NFRule::kNaNRule) {
	289	delete nonNumericalRules[NAN_RULE_INDEX];
	290	nonNumericalRules[NAN_RULE_INDEX] = rule;
	291	}
	292	}
	293
	294	/**
	295	* Determine the best fraction rule to use. Rules matching the decimal point from
	296	* DecimalFormatSymbols become the main set of rules to use.
	297	* @param originalIndex The index into nonNumericalRules
	298	* @param newRule The new rule to consider
	299	* @param rememberRule Should the new rule be added to fractionRules.
	300	*/
	301	void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
	302	if (rememberRule) {
	303	fractionRules.add(newRule);
	304	}
	305	NFRule *bestResult = nonNumericalRules[originalIndex];
	306	if (bestResult == NULL) {
	307	nonNumericalRules[originalIndex] = newRule;
	308	}
	309	else {
	310	// We have more than one. Which one is better?
	311	const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
	312	if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
	313	== newRule->getDecimalPoint())
	314	{
	315	nonNumericalRules[originalIndex] = newRule;
b75a7d8f	316	}
2ca993e8	317	// else leave it alone
b75a7d8f A	318	}
	319	}
	320
	321	NFRuleSet::~NFRuleSet()
	322	{
2ca993e8 A	323	for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
	324	if (i != IMPROPER_FRACTION_RULE_INDEX
	325	&& i != PROPER_FRACTION_RULE_INDEX
	326	&& i != MASTER_RULE_INDEX)
	327	{
	328	delete nonNumericalRules[i];
	329	}
	330	// else it will be deleted via NFRuleList fractionRules
	331	}
b75a7d8f A	332	}
b75a7d8f A	333
374ca955	334	static UBool
b75a7d8f A	335	util_equalRules(const NFRule* rule1, const NFRule* rule2)
	336	{
	337	if (rule1) {
	338	if (rule2) {
	339	return rule1 == rule2;
	340	}
	341	} else if (!rule2) {
	342	return TRUE;
	343	}
	344	return FALSE;
	345	}
	346
	347	UBool
	348	NFRuleSet::operator==(const NFRuleSet& rhs) const
	349	{
	350	if (rules.size() == rhs.rules.size() &&
	351	fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
2ca993e8 A	352	name == rhs.name) {
	353
	354	// ...then compare the non-numerical rule lists...
	355	for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
	356	if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
	357	return FALSE;
	358	}
	359	}
b75a7d8f	360
2ca993e8	361	// ...then compare the rule lists...
b75a7d8f A	362	for (uint32_t i = 0; i < rules.size(); ++i) {
	363	if (rules[i] != rhs.rules[i]) {
	364	return FALSE;
	365	}
	366	}
	367	return TRUE;
	368	}
	369	return FALSE;
	370	}
	371
2ca993e8 A	372	void
	373	NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
	374	for (uint32_t i = 0; i < rules.size(); ++i) {
	375	rules[i]->setDecimalFormatSymbols(newSymbols, status);
	376	}
	377	// Switch the fraction rules to mirror the DecimalFormatSymbols.
	378	for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
	379	if (nonNumericalRules[nonNumericalIdx]) {
	380	for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
	381	NFRule *fractionRule = fractionRules[fIdx];
	382	if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
	383	setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
	384	}
	385	}
	386	}
	387	}
	388
	389	for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
	390	NFRule *rule = nonNumericalRules[nnrIdx];
	391	if (rule) {
	392	rule->setDecimalFormatSymbols(newSymbols, status);
	393	}
	394	}
	395	}
	396
	397	#define RECURSION_LIMIT 64
374ca955	398
b75a7d8f	399	void
2ca993e8	400	NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
b75a7d8f	401	{
2ca993e8 A	402	if (recursionCount >= RECURSION_LIMIT) {
	403	// stop recursion
	404	status = U_INVALID_STATE_ERROR;
	405	return;
	406	}
	407	const NFRule *rule = findNormalRule(number);
374ca955	408	if (rule) { // else error, but can't report it
2ca993e8	409	rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
374ca955	410	}
b75a7d8f A	411	}
	412
	413	void
2ca993e8	414	NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
b75a7d8f	415	{
2ca993e8 A	416	if (recursionCount >= RECURSION_LIMIT) {
	417	// stop recursion
	418	status = U_INVALID_STATE_ERROR;
	419	return;
	420	}
	421	const NFRule *rule = findDoubleRule(number);
374ca955	422	if (rule) { // else error, but can't report it
2ca993e8	423	rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
374ca955	424	}
b75a7d8f A	425	}
b75a7d8f A	426
2ca993e8	427	const NFRule*
b75a7d8f A	428	NFRuleSet::findDoubleRule(double number) const
	429	{
	430	// if this is a fraction rule set, use findFractionRuleSetRule()
	431	if (isFractionRuleSet()) {
	432	return findFractionRuleSetRule(number);
	433	}
	434
2ca993e8 A	435	if (uprv_isNaN(number)) {
	436	const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
	437	if (!rule) {
	438	rule = owner->getDefaultNaNRule();
	439	}
	440	return rule;
	441	}
	442
b75a7d8f A	443	// if the number is negative, return the negative number rule
	444	// (if there isn't a negative-number rule, we pretend it's a
	445	// positive number)
	446	if (number < 0) {
2ca993e8 A	447	if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
2ca993e8 A	448	return nonNumericalRules[NEGATIVE_RULE_INDEX];
b75a7d8f A	449	} else {
	450	number = -number;
	451	}
	452	}
	453
2ca993e8 A	454	if (uprv_isInfinite(number)) {
	455	const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
	456	if (!rule) {
	457	rule = owner->getDefaultInfinityRule();
	458	}
	459	return rule;
	460	}
	461
b75a7d8f A	462	// if the number isn't an integer, we use one of the fraction rules...
	463	if (number != uprv_floor(number)) {
	464	// if the number is between 0 and 1, return the proper
	465	// fraction rule
2ca993e8 A	466	if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
2ca993e8 A	467	return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
b75a7d8f A	468	}
b75a7d8f A	469	// otherwise, return the improper fraction rule
2ca993e8 A	470	else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
2ca993e8 A	471	return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
b75a7d8f A	472	}
	473	}
	474
	475	// if there's a master rule, use it to format the number
2ca993e8 A	476	if (nonNumericalRules[MASTER_RULE_INDEX]) {
2ca993e8 A	477	return nonNumericalRules[MASTER_RULE_INDEX];
729e4ab9 A	478	}
729e4ab9 A	479
b75a7d8f A	480	// and if we haven't yet returned a rule, use findNormalRule()
	481	// to find the applicable rule
	482	int64_t r = util64_fromDouble(number + 0.5);
	483	return findNormalRule(r);
	484	}
	485
2ca993e8	486	const NFRule *
b75a7d8f A	487	NFRuleSet::findNormalRule(int64_t number) const
	488	{
	489	// if this is a fraction rule set, use findFractionRuleSetRule()
	490	// to find the rule (we should only go into this clause if the
	491	// value is 0)
	492	if (fIsFractionRuleSet) {
	493	return findFractionRuleSetRule((double)number);
	494	}
	495
	496	// if the number is negative, return the negative-number rule
	497	// (if there isn't one, pretend the number is positive)
	498	if (number < 0) {
2ca993e8 A	499	if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
2ca993e8 A	500	return nonNumericalRules[NEGATIVE_RULE_INDEX];
b75a7d8f A	501	} else {
	502	number = -number;
	503	}
	504	}
	505
	506	// we have to repeat the preceding two checks, even though we
	507	// do them in findRule(), because the version of format() that
	508	// takes a long bypasses findRule() and goes straight to this
	509	// function. This function does skip the fraction rules since
	510	// we know the value is an integer (it also skips the master
	511	// rule, since it's considered a fraction rule. Skipping the
	512	// master rule in this function is also how we avoid infinite
	513	// recursion)
	514
	515	// {dlf} unfortunately this fails if there are no rules except
	516	// special rules. If there are no rules, use the master rule.
	517
	518	// binary-search the rule list for the applicable rule
	519	// (a rule is used for all values from its base value to
	520	// the next rule's base value)
	521	int32_t hi = rules.size();
	522	if (hi > 0) {
	523	int32_t lo = 0;
	524
	525	while (lo < hi) {
	526	int32_t mid = (lo + hi) / 2;
	527	if (rules[mid]->getBaseValue() == number) {
	528	return rules[mid];
	529	}
	530	else if (rules[mid]->getBaseValue() > number) {
	531	hi = mid;
	532	}
	533	else {
	534	lo = mid + 1;
	535	}
	536	}
374ca955 A	537	if (hi == 0) { // bad rule set, minimum base > 0
	538	return NULL; // want to throw exception here
	539	}
	540
b75a7d8f A	541	NFRule *result = rules[hi - 1];
	542
	543	// use shouldRollBack() to see whether we need to invoke the
	544	// rollback rule (see shouldRollBack()'s documentation for
	545	// an explanation of the rollback rule). If we do, roll back
	546	// one rule and return that one instead of the one we'd normally
	547	// return
f3c0d7a5	548	if (result->shouldRollBack(number)) {
374ca955 A	549	if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
	550	return NULL;
	551	}
b75a7d8f A	552	result = rules[hi - 2];
	553	}
	554	return result;
	555	}
	556	// else use the master rule
2ca993e8	557	return nonNumericalRules[MASTER_RULE_INDEX];
b75a7d8f A	558	}
	559
	560	/**
	561	* If this rule is a fraction rule set, this function is used by
	562	* findRule() to select the most appropriate rule for formatting
	563	* the number. Basically, the base value of each rule in the rule
	564	* set is treated as the denominator of a fraction. Whichever
	565	* denominator can produce the fraction closest in value to the
	566	* number passed in is the result. If there's a tie, the earlier
	567	* one in the list wins. (If there are two rules in a row with the
	568	* same base value, the first one is used when the numerator of the
	569	* fraction would be 1, and the second rule is used the rest of the
	570	* time.
	571	* @param number The number being formatted (which will always be
	572	* a number between 0 and 1)
	573	* @return The rule to use to format this number
	574	*/
2ca993e8	575	const NFRule*
b75a7d8f A	576	NFRuleSet::findFractionRuleSetRule(double number) const
	577	{
	578	// the obvious way to do this (multiply the value being formatted
	579	// by each rule's base value until you get an integral result)
	580	// doesn't work because of rounding error. This method is more
	581	// accurate
	582
	583	// find the least common multiple of the rules' base values
	584	// and multiply this by the number being formatted. This is
	585	// all the precision we need, and we can do all of the rest
	586	// of the math using integer arithmetic
	587	int64_t leastCommonMultiple = rules[0]->getBaseValue();
	588	int64_t numerator;
	589	{
	590	for (uint32_t i = 1; i < rules.size(); ++i) {
	591	leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
	592	}
	593	numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
	594	}
	595	// for each rule, do the following...
	596	int64_t tempDifference;
	597	int64_t difference = util64_fromDouble(uprv_maxMantissa());
	598	int32_t winner = 0;
	599	for (uint32_t i = 0; i < rules.size(); ++i) {
	600	// "numerator" is the numerator of the fraction if the
	601	// denominator is the LCD. The numerator if the rule's
	602	// base value is the denominator is "numerator" times the
	603	// base value divided bythe LCD. Here we check to see if
	604	// that's an integer, and if not, how close it is to being
	605	// an integer.
	606	tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
	607
	608
	609	// normalize the result of the above calculation: we want
	610	// the numerator's distance from the CLOSEST multiple
	611	// of the LCD
	612	if (leastCommonMultiple - tempDifference < tempDifference) {
	613	tempDifference = leastCommonMultiple - tempDifference;
	614	}
	615
	616	// if this is as close as we've come, keep track of how close
	617	// that is, and the line number of the rule that did it. If
	618	// we've scored a direct hit, we don't have to look at any more
	619	// rules
	620	if (tempDifference < difference) {
	621	difference = tempDifference;
	622	winner = i;
	623	if (difference == 0) {
	624	break;
	625	}
	626	}
	627	}
	628
	629	// if we have two successive rules that both have the winning base
	630	// value, then the first one (the one we found above) is used if
	631	// the numerator of the fraction is 1 and the second one is used if
	632	// the numerator of the fraction is anything else (this lets us
	633	// do things like "one third"/"two thirds" without haveing to define
	634	// a whole bunch of extra rule sets)
	635	if ((unsigned)(winner + 1) < rules.size() &&
	636	rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
	637	double n = ((double)rules[winner]->getBaseValue()) * number;
	638	if (n < 0.5 \|\| n >= 2) {
	639	++winner;
640	}
641	}
642
643	// finally, return the winning rule
644	return rules[winner];
645	}
646
647	/**
648	* Parses a string. Matches the string to be parsed against each
649	* of its rules (with a base value less than upperBound) and returns
650	* the value produced by the rule that matched the most charcters
651	* in the source string.
652	* @param text The string to parse
653	* @param parsePosition The initial position is ignored and assumed
654	* to be 0. On exit, this object has been updated to point to the
655	* first character position this rule set didn't consume.
656	* @param upperBound Limits the rules that can be allowed to match.
657	* Only rules whose base values are strictly less than upperBound
658	* are considered.
659	* @return The numerical result of parsing this string. This will
660	* be the matching rule's base value, composed appropriately with
661	* the results of matching any of its substitutions. The object
662	* will be an instance of Long if it's an integral value; otherwise,
663	* it will be an instance of Double. This function always returns
664	* a valid object: If nothing matched the input string at all,
665	* this function returns new Long(0), and the parse position is
666	* left unchanged.
667	*/
668	#ifdef RBNF_DEBUG
669	#include <stdio.h>
670
671	static void dumpUS(FILE* f, const UnicodeString& us) {
672	int len = us.length();
673	char* buf = (char )uprv_malloc((len+1)sizeof(char)); //new char[len+1];
46f4442e A	674	if (buf != NULL) {
	675	us.extract(0, len, buf);
	676	buf[len] = 0;
	677	fprintf(f, "%s", buf);
	678	uprv_free(buf); //delete[] buf;
	679	}
b75a7d8f A	680	}
	681	#endif
	682
	683	UBool
0f5d89e8	684	NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result, UBool lenient) const
b75a7d8f A	685	{
	686	// try matching each rule in the rule set against the text being
	687	// parsed. Whichever one matches the most characters is the one
	688	// that determines the value we return.
	689
	690	result.setLong(0);
	691
	692	// dump out if there's no text to parse
	693	if (text.length() == 0) {
	694	return 0;
	695	}
	696
	697	ParsePosition highWaterMark;
	698	ParsePosition workingPos = pos;
	699
	700	#ifdef RBNF_DEBUG
	701	fprintf(stderr, "<nfrs> %x '", this);
	702	dumpUS(stderr, name);
	703	fprintf(stderr, "' text '");
	704	dumpUS(stderr, text);
	705	fprintf(stderr, "'\n");
	706	fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
	707	#endif
2ca993e8 A	708	// Try each of the negative rules, fraction rules, infinity rules and NaN rules
2ca993e8 A	709	for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
0f5d89e8 A	710	if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
	711	// Mark this rule as being executed so that we don't try to execute it again.
	712	nonNumericalExecutedRuleMask \|= 1 << i;
	713
2ca993e8	714	Formattable tempResult;
0f5d89e8	715	UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult, lenient \|\| isDecimalFormatRuleParseable() );
2ca993e8 A	716	if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
	717	result = tempResult;
	718	highWaterMark = workingPos;
b75a7d8f	719	}
2ca993e8	720	workingPos = pos;
b75a7d8f A	721	}
	722	}
	723	#ifdef RBNF_DEBUG
	724	fprintf(stderr, "<nfrs> continue other with text '");
	725	dumpUS(stderr, text);
	726	fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
	727	#endif
	728
	729	// finally, go through the regular rules one at a time. We start
	730	// at the end of the list because we want to try matching the most
	731	// sigificant rule first (this helps ensure that we parse
	732	// "five thousand three hundred six" as
	733	// "(five thousand) (three hundred) (six)" rather than
	734	// "((five thousand three) hundred) (six)"). Skip rules whose
	735	// base values are higher than the upper bound (again, this helps
	736	// limit ambiguity by making sure the rules that match a rule's
	737	// are less significant than the rule containing the substitutions)/
	738	{
	739	int64_t ub = util64_fromDouble(upperBound);
	740	#ifdef RBNF_DEBUG
	741	{
	742	char ubstr[64];
	743	util64_toa(ub, ubstr, 64);
	744	char ubstrhex[64];
	745	util64_toa(ub, ubstrhex, 64, 16);
	746	fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
	747	}
	748	#endif
	749	for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
	750	if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
	751	continue;
	752	}
	753	Formattable tempResult;
0f5d89e8	754	UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
b75a7d8f A	755	if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
	756	result = tempResult;
	757	highWaterMark = workingPos;
	758	}
	759	workingPos = pos;
	760	}
	761	}
	762	#ifdef RBNF_DEBUG
	763	fprintf(stderr, "<nfrs> exit\n");
	764	#endif
	765	// finally, update the parse postion we were passed to point to the
	766	// first character we didn't use, and return the result that
	767	// corresponds to that string of characters
	768	pos = highWaterMark;
	769
	770	return 1;
	771	}
	772
	773	void
	774	NFRuleSet::appendRules(UnicodeString& result) const
	775	{
2ca993e8 A	776	uint32_t i;
2ca993e8 A	777
b75a7d8f A	778	// the rule set name goes first...
	779	result.append(name);
	780	result.append(gColon);
	781	result.append(gLineFeed);
	782
	783	// followed by the regular rules...
2ca993e8	784	for (i = 0; i < rules.size(); i++) {
73c04bcf	785	rules[i]->_appendRuleText(result);
b75a7d8f A	786	result.append(gLineFeed);
	787	}
	788
	789	// followed by the special rules (if they exist)
2ca993e8 A	790	for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
	791	NFRule *rule = nonNumericalRules[i];
	792	if (nonNumericalRules[i]) {
	793	if (rule->getBaseValue() == NFRule::kImproperFractionRule
	794	\|\| rule->getBaseValue() == NFRule::kProperFractionRule
	795	\|\| rule->getBaseValue() == NFRule::kMasterRule)
	796	{
	797	for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
	798	NFRule *fractionRule = fractionRules[fIdx];
	799	if (fractionRule->getBaseValue() == rule->getBaseValue()) {
	800	fractionRule->_appendRuleText(result);
	801	result.append(gLineFeed);
	802	}
	803	}
	804	}
	805	else {
	806	rule->_appendRuleText(result);
b75a7d8f A	807	result.append(gLineFeed);
	808	}
	809	}
	810	}
	811	}
	812
	813	// utility functions
	814
	815	int64_t util64_fromDouble(double d) {
	816	int64_t result = 0;
	817	if (!uprv_isNaN(d)) {
	818	double mant = uprv_maxMantissa();
	819	if (d < -mant) {
	820	d = -mant;
	821	} else if (d > mant) {
	822	d = mant;
	823	}
	824	UBool neg = d < 0;
	825	if (neg) {
	826	d = -d;
	827	}
	828	result = (int64_t)uprv_floor(d);
	829	if (neg) {
	830	result = -result;
	831	}
	832	}
	833	return result;
	834	}
	835
0f5d89e8	836	uint64_t util64_pow(uint32_t base, uint16_t exponent) {
f3c0d7a5	837	if (base == 0) {
b75a7d8f	838	return 0;
b75a7d8f	839	}
0f5d89e8 A	840	uint64_t result = 1;
	841	uint64_t pow = base;
	842	while (true) {
	843	if ((exponent & 1) == 1) {
	844	result *= pow;
	845	}
	846	exponent >>= 1;
	847	if (exponent == 0) {
	848	break;
	849	}
	850	pow *= pow;
	851	}
f3c0d7a5	852	return result;
b75a7d8f A	853	}
	854
	855	static const uint8_t asciiDigits[] = {
	856	0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
	857	0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
	858	0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
	859	0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
	860	0x77u, 0x78u, 0x79u, 0x7au,
	861	};
	862
	863	static const UChar kUMinus = (UChar)0x002d;
	864
73c04bcf	865	#ifdef RBNF_DEBUG
b75a7d8f A	866	static const char kMinus = '-';
	867
	868	static const uint8_t digitInfo[] = {
	869	0, 0, 0, 0, 0, 0, 0, 0,
	870	0, 0, 0, 0, 0, 0, 0, 0,
	871	0, 0, 0, 0, 0, 0, 0, 0,
	872	0, 0, 0, 0, 0, 0, 0, 0,
	873	0, 0, 0, 0, 0, 0, 0, 0,
	874	0, 0, 0, 0, 0, 0, 0, 0,
	875	0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
	876	0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
	877	0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
	878	0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
	879	0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
	880	0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
	881	0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
	882	0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
	883	0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
	884	0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
	885	};
	886
b75a7d8f A	887	int64_t util64_atoi(const char* str, uint32_t radix)
	888	{
	889	if (radix > 36) {
	890	radix = 36;
	891	} else if (radix < 2) {
	892	radix = 2;
	893	}
	894	int64_t lradix = radix;
	895
	896	int neg = 0;
	897	if (*str == kMinus) {
	898	++str;
	899	neg = 1;
	900	}
	901	int64_t result = 0;
	902	uint8_t b;
	903	while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
	904	result *= lradix;
	905	result += (int32_t)b;
	906	}
	907	if (neg) {
	908	result = -result;
	909	}
	910	return result;
	911	}
b75a7d8f A	912
	913	int64_t util64_utoi(const UChar* str, uint32_t radix)
	914	{
	915	if (radix > 36) {
	916	radix = 36;
	917	} else if (radix < 2) {
	918	radix = 2;
	919	}
	920	int64_t lradix = radix;
	921
	922	int neg = 0;
	923	if (*str == kUMinus) {
	924	++str;
	925	neg = 1;
	926	}
	927	int64_t result = 0;
	928	UChar c;
	929	uint8_t b;
	930	while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
	931	result *= lradix;
	932	result += (int32_t)b;
	933	}
	934	if (neg) {
	935	result = -result;
	936	}
	937	return result;
	938	}
	939
b75a7d8f A	940	uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
	941	{
	942	if (radix > 36) {
	943	radix = 36;
	944	} else if (radix < 2) {
	945	radix = 2;
	946	}
	947	int64_t base = radix;
	948
	949	char* p = buf;
	950	if (len && (w < 0) && (radix == 10) && !raw) {
	951	w = -w;
	952	*p++ = kMinus;
	953	--len;
	954	} else if (len && (w == 0)) {
	955	*p++ = (char)raw ? 0 : asciiDigits[0];
	956	--len;
	957	}
	958
	959	while (len && w != 0) {
	960	int64_t n = w / base;
	961	int64_t m = n * base;
	962	int32_t d = (int32_t)(w-m);
	963	*p++ = raw ? (char)d : asciiDigits[d];
	964	w = n;
	965	--len;
	966	}
	967	if (len) {
	968	*p = 0; // null terminate if room for caller convenience
	969	}
	970
	971	len = p - buf;
	972	if (*buf == kMinus) {
	973	++buf;
	974	}
	975	while (--p > buf) {
	976	char c = *p;
	977	p = buf;
	978	*buf = c;
	979	++buf;
	980	}
	981
	982	return len;
	983	}
	984	#endif
	985
	986	uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
	987	{
	988	if (radix > 36) {
	989	radix = 36;
	990	} else if (radix < 2) {
	991	radix = 2;
	992	}
	993	int64_t base = radix;
	994
	995	UChar* p = buf;
	996	if (len && (w < 0) && (radix == 10) && !raw) {
	997	w = -w;
	998	*p++ = kUMinus;
	999	--len;
	1000	} else if (len && (w == 0)) {
	1001	*p++ = (UChar)raw ? 0 : asciiDigits[0];
	1002	--len;
	1003	}
1004
1005	while (len && (w != 0)) {
1006	int64_t n = w / base;
1007	int64_t m = n * base;
1008	int32_t d = (int32_t)(w-m);
1009	*p++ = (UChar)(raw ? d : asciiDigits[d]);
1010	w = n;
1011	--len;
1012	}
1013	if (len) {
1014	*p = 0; // null terminate if room for caller convenience
1015	}
1016
1017	len = (uint32_t)(p - buf);
1018	if (*buf == kUMinus) {
1019	++buf;
1020	}
1021	while (--p > buf) {
1022	UChar c = *p;
1023	p = buf;
1024	*buf = c;
1025	++buf;
1026	}
1027
1028	return len;
1029	}
1030
1031
1032	U_NAMESPACE_END
1033
1034	/* U_HAVE_RBNF */
1035	#endif