2 *******************************************************************************
3 * Copyright (C) 1997-2012, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
8 #include "utypeinfo.h" // for 'typeid' to work
10 #include "unicode/rbnf.h"
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
28 #include "patternprops.h"
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
40 static const UChar gPercentPercent
[] =
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse
[] =
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon
= 0x003B;
52 static const UChar gSemiPercent
[] =
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat
)
66 This is a utility class. It does not use ICU's RTTI.
67 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
68 Please make sure that intltest passes on Windows in Release mode,
69 since the string pooling per compilation unit will mess up how RTTI works.
70 The RTTI code was also removed due to lack of code coverage.
72 class LocalizationInfo
: public UMemory
{
74 virtual ~LocalizationInfo();
78 LocalizationInfo() : refcount(0) {}
80 LocalizationInfo
* ref(void) {
85 LocalizationInfo
* unref(void) {
86 if (refcount
&& --refcount
== 0) {
92 virtual UBool
operator==(const LocalizationInfo
* rhs
) const;
93 inline UBool
operator!=(const LocalizationInfo
* rhs
) const { return !operator==(rhs
); }
95 virtual int32_t getNumberOfRuleSets(void) const = 0;
96 virtual const UChar
* getRuleSetName(int32_t index
) const = 0;
97 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
98 virtual const UChar
* getLocaleName(int32_t index
) const = 0;
99 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const = 0;
101 virtual int32_t indexForLocale(const UChar
* locale
) const;
102 virtual int32_t indexForRuleSet(const UChar
* ruleset
) const;
104 // virtual UClassID getDynamicClassID() const = 0;
105 // static UClassID getStaticClassID(void);
108 LocalizationInfo::~LocalizationInfo() {}
110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
112 // if both strings are NULL, this returns TRUE
114 streq(const UChar
* lhs
, const UChar
* rhs
) {
119 return u_strcmp(lhs
, rhs
) == 0;
125 LocalizationInfo::operator==(const LocalizationInfo
* rhs
) const {
131 int32_t rsc
= getNumberOfRuleSets();
132 if (rsc
== rhs
->getNumberOfRuleSets()) {
133 for (int i
= 0; i
< rsc
; ++i
) {
134 if (!streq(getRuleSetName(i
), rhs
->getRuleSetName(i
))) {
138 int32_t dlc
= getNumberOfDisplayLocales();
139 if (dlc
== rhs
->getNumberOfDisplayLocales()) {
140 for (int i
= 0; i
< dlc
; ++i
) {
141 const UChar
* locale
= getLocaleName(i
);
142 int32_t ix
= rhs
->indexForLocale(locale
);
143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
144 if (!streq(locale
, rhs
->getLocaleName(ix
))) {
147 for (int j
= 0; j
< rsc
; ++j
) {
148 if (!streq(getDisplayName(i
, j
), rhs
->getDisplayName(ix
, j
))) {
161 LocalizationInfo::indexForLocale(const UChar
* locale
) const {
162 for (int i
= 0; i
< getNumberOfDisplayLocales(); ++i
) {
163 if (streq(locale
, getLocaleName(i
))) {
171 LocalizationInfo::indexForRuleSet(const UChar
* ruleset
) const {
173 for (int i
= 0; i
< getNumberOfRuleSets(); ++i
) {
174 if (streq(ruleset
, getRuleSetName(i
))) {
183 typedef void (*Fn_Deleter
)(void*);
191 VArray() : buf(NULL
), cap(0), size(0), deleter(NULL
) {}
193 VArray(Fn_Deleter del
) : buf(NULL
), cap(0), size(0), deleter(del
) {}
197 for (int i
= 0; i
< size
; ++i
) {
208 void add(void* elem
, UErrorCode
& status
) {
209 if (U_SUCCESS(status
)) {
213 } else if (cap
< 256) {
219 buf
= (void**)uprv_malloc(cap
* sizeof(void*));
221 buf
= (void**)uprv_realloc(buf
, cap
* sizeof(void*));
224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
225 status
= U_MEMORY_ALLOCATION_ERROR
;
228 void* start
= &buf
[size
];
229 size_t count
= (cap
- size
) * sizeof(void*);
230 uprv_memset(start
, 0, count
); // fill with nulls, just because
236 void** release(void) {
247 class StringLocalizationInfo
: public LocalizationInfo
{
253 friend class LocDataParser
;
255 StringLocalizationInfo(UChar
* i
, UChar
*** d
, int32_t numRS
, int32_t numLocs
)
256 : info(i
), data(d
), numRuleSets(numRS
), numLocales(numLocs
)
261 static StringLocalizationInfo
* create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
);
263 virtual ~StringLocalizationInfo();
264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets
; }
265 virtual const UChar
* getRuleSetName(int32_t index
) const;
266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales
; }
267 virtual const UChar
* getLocaleName(int32_t index
) const;
268 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const;
270 // virtual UClassID getDynamicClassID() const;
271 // static UClassID getStaticClassID(void);
274 void init(UErrorCode
& status
) const;
279 OPEN_ANGLE
= 0x003c, /* '<' */
280 CLOSE_ANGLE
= 0x003e, /* '>' */
288 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
290 class LocDataParser
{
299 LocDataParser(UParseError
& parseError
, UErrorCode
& status
)
300 : data(NULL
), e(NULL
), p(NULL
), ch(0xffff), pe(parseError
), ec(status
) {}
304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
307 StringLocalizationInfo
* parse(UChar
* data
, int32_t len
);
311 void inc(void) { ++p
; ch
= 0xffff; }
312 UBool
checkInc(UChar c
) { if (p
< e
&& (ch
== c
|| *p
== c
)) { inc(); return TRUE
; } return FALSE
; }
313 UBool
check(UChar c
) { return p
< e
&& (ch
== c
|| *p
== c
); }
314 void skipWhitespace(void) { while (p
< e
&& PatternProps::isWhiteSpace(ch
!= 0xffff ? ch
: *p
)) inc();}
315 UBool
inList(UChar c
, const UChar
* list
) const {
316 if (*list
== SPACE
&& PatternProps::isWhiteSpace(c
)) return TRUE
;
317 while (*list
&& *list
!= c
) ++list
; return *list
== c
;
319 void parseError(const char* msg
);
321 StringLocalizationInfo
* doParse(void);
323 UChar
** nextArray(int32_t& requiredLength
);
324 UChar
* nextString(void);
328 #define ERROR(msg) parseError(msg); return NULL;
330 #define ERROR(msg) parseError(NULL); return NULL;
334 static const UChar DQUOTE_STOPLIST
[] = {
338 static const UChar SQUOTE_STOPLIST
[] = {
342 static const UChar NOQUOTE_STOPLIST
[] = {
343 SPACE
, COMMA
, CLOSE_ANGLE
, OPEN_ANGLE
, TICK
, QUOTE
, 0
351 StringLocalizationInfo
*
352 LocDataParser::parse(UChar
* _data
, int32_t len
) {
354 if (_data
) uprv_free(_data
);
360 pe
.postContext
[0] = 0;
361 pe
.preContext
[0] = 0;
364 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
369 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
383 StringLocalizationInfo
*
384 LocDataParser::doParse(void) {
386 if (!checkInc(OPEN_ANGLE
)) {
387 ERROR("Missing open angle");
389 VArray
array(DeleteFn
);
390 UBool mightHaveNext
= TRUE
;
391 int32_t requiredLength
= -1;
392 while (mightHaveNext
) {
393 mightHaveNext
= FALSE
;
394 UChar
** elem
= nextArray(requiredLength
);
396 UBool haveComma
= check(COMMA
);
401 mightHaveNext
= TRUE
;
403 } else if (haveComma
) {
404 ERROR("Unexpected character");
409 if (!checkInc(CLOSE_ANGLE
)) {
410 if (check(OPEN_ANGLE
)) {
411 ERROR("Missing comma in outer array");
413 ERROR("Missing close angle bracket in outer array");
419 ERROR("Extra text after close of localization data");
424 int32_t numLocs
= array
.length() - 2; // subtract first, NULL
425 UChar
*** result
= (UChar
***)array
.release();
427 return new StringLocalizationInfo(data
, result
, requiredLength
-2, numLocs
); // subtract first, NULL
431 ERROR("Unknown error");
435 LocDataParser::nextArray(int32_t& requiredLength
) {
441 if (!checkInc(OPEN_ANGLE
)) {
442 ERROR("Missing open angle");
446 UBool mightHaveNext
= TRUE
;
447 while (mightHaveNext
) {
448 mightHaveNext
= FALSE
;
449 UChar
* elem
= nextString();
451 UBool haveComma
= check(COMMA
);
456 mightHaveNext
= TRUE
;
458 } else if (haveComma
) {
459 ERROR("Unexpected comma");
463 if (!checkInc(CLOSE_ANGLE
)) {
464 if (check(OPEN_ANGLE
)) {
465 ERROR("Missing close angle bracket in inner array");
467 ERROR("Missing comma in inner array");
473 if (requiredLength
== -1) {
474 requiredLength
= array
.length() + 1;
475 } else if (array
.length() != requiredLength
) {
476 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
477 ERROR("Array not of required length");
480 return (UChar
**)array
.release();
482 ERROR("Unknown Error");
486 LocDataParser::nextString() {
487 UChar
* result
= NULL
;
491 const UChar
* terminators
;
493 UBool haveQuote
= c
== QUOTE
|| c
== TICK
;
496 terminators
= c
== QUOTE
? DQUOTE_STOPLIST
: SQUOTE_STOPLIST
;
498 terminators
= NOQUOTE_STOPLIST
;
501 while (p
< e
&& !inList(*p
, terminators
)) ++p
;
503 ERROR("Unexpected end of data");
509 *p
= 0x0; // terminate by writing to data
510 result
= start
; // just point into data
514 ERROR("Missing matching quote");
515 } else if (p
== start
) {
516 ERROR("Empty string");
519 } else if (x
== OPEN_ANGLE
|| x
== TICK
|| x
== QUOTE
) {
520 ERROR("Unexpected character in string");
524 // ok for there to be no next string
529 LocDataParser::parseError(const char* /*str*/) {
534 const UChar
* start
= p
- U_PARSE_CONTEXT_LEN
- 1;
538 for (UChar
* x
= p
; --x
>= start
;) {
544 const UChar
* limit
= p
+ U_PARSE_CONTEXT_LEN
- 1;
548 u_strncpy(pe
.preContext
, start
, (int32_t)(p
-start
));
549 pe
.preContext
[p
-start
] = 0;
550 u_strncpy(pe
.postContext
, p
, (int32_t)(limit
-p
));
551 pe
.postContext
[limit
-p
] = 0;
552 pe
.offset
= (int32_t)(p
- data
);
555 fprintf(stderr
, "%s at or near character %d: ", str
, p
-data
);
558 msg
.append(start
, p
- start
);
559 msg
.append((UChar
)0x002f); /* SOLIDUS/SLASH */
560 msg
.append(p
, limit
-p
);
564 int32_t len
= msg
.extract(0, msg
.length(), buf
, 128);
570 fprintf(stderr
, "%s\n", buf
);
584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
586 StringLocalizationInfo
*
587 StringLocalizationInfo::create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
) {
588 if (U_FAILURE(status
)) {
592 int32_t len
= info
.length();
594 return NULL
; // no error;
597 UChar
* p
= (UChar
*)uprv_malloc(len
* sizeof(UChar
));
599 status
= U_MEMORY_ALLOCATION_ERROR
;
602 info
.extract(p
, len
, status
);
603 if (!U_FAILURE(status
)) {
604 status
= U_ZERO_ERROR
; // clear warning about non-termination
607 LocDataParser
parser(perror
, status
);
608 return parser
.parse(p
, len
);
611 StringLocalizationInfo::~StringLocalizationInfo() {
612 for (UChar
*** p
= (UChar
***)data
; *p
; ++p
) {
613 // remaining data is simply pointer into our unicode string data.
614 if (*p
) uprv_free(*p
);
616 if (data
) uprv_free(data
);
617 if (info
) uprv_free(info
);
622 StringLocalizationInfo::getRuleSetName(int32_t index
) const {
623 if (index
>= 0 && index
< getNumberOfRuleSets()) {
624 return data
[0][index
];
630 StringLocalizationInfo::getLocaleName(int32_t index
) const {
631 if (index
>= 0 && index
< getNumberOfDisplayLocales()) {
632 return data
[index
+1][0];
638 StringLocalizationInfo::getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const {
639 if (localeIndex
>= 0 && localeIndex
< getNumberOfDisplayLocales() &&
640 ruleIndex
>= 0 && ruleIndex
< getNumberOfRuleSets()) {
641 return data
[localeIndex
+1][ruleIndex
+1];
648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
649 const UnicodeString
& locs
,
650 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
652 , ruleSetDescriptions(NULL
)
654 , defaultRuleSet(NULL
)
657 , decimalFormatSymbols(NULL
)
659 , lenientParseRules(NULL
)
660 , localizations(NULL
)
662 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
663 init(description
, locinfo
, perror
, status
);
666 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
667 const UnicodeString
& locs
,
668 UParseError
& perror
, UErrorCode
& status
)
670 , ruleSetDescriptions(NULL
)
672 , defaultRuleSet(NULL
)
673 , locale(Locale::getDefault())
675 , decimalFormatSymbols(NULL
)
677 , lenientParseRules(NULL
)
678 , localizations(NULL
)
680 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
681 init(description
, locinfo
, perror
, status
);
684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
685 LocalizationInfo
* info
,
686 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
688 , ruleSetDescriptions(NULL
)
690 , defaultRuleSet(NULL
)
693 , decimalFormatSymbols(NULL
)
695 , lenientParseRules(NULL
)
696 , localizations(NULL
)
698 init(description
, info
, perror
, status
);
701 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
705 , ruleSetDescriptions(NULL
)
707 , defaultRuleSet(NULL
)
708 , locale(Locale::getDefault())
710 , decimalFormatSymbols(NULL
)
712 , lenientParseRules(NULL
)
713 , localizations(NULL
)
715 init(description
, NULL
, perror
, status
);
718 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
719 const Locale
& aLocale
,
723 , ruleSetDescriptions(NULL
)
725 , defaultRuleSet(NULL
)
728 , decimalFormatSymbols(NULL
)
730 , lenientParseRules(NULL
)
731 , localizations(NULL
)
733 init(description
, NULL
, perror
, status
);
736 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag
, const Locale
& alocale
, UErrorCode
& status
)
738 , ruleSetDescriptions(NULL
)
740 , defaultRuleSet(NULL
)
743 , decimalFormatSymbols(NULL
)
745 , lenientParseRules(NULL
)
746 , localizations(NULL
)
748 if (U_FAILURE(status
)) {
752 const char* rules_tag
= "RBNFRules";
753 const char* fmt_tag
= "";
755 case URBNF_SPELLOUT
: fmt_tag
= "SpelloutRules"; break;
756 case URBNF_ORDINAL
: fmt_tag
= "OrdinalRules"; break;
757 case URBNF_DURATION
: fmt_tag
= "DurationRules"; break;
758 case URBNF_NUMBERING_SYSTEM
: fmt_tag
= "NumberingSystemRules"; break;
759 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
762 // TODO: read localization info from resource
763 LocalizationInfo
* locinfo
= NULL
;
765 UResourceBundle
* nfrb
= ures_open(U_ICUDATA_RBNF
, locale
.getName(), &status
);
766 if (U_SUCCESS(status
)) {
767 setLocaleIDs(ures_getLocaleByType(nfrb
, ULOC_VALID_LOCALE
, &status
),
768 ures_getLocaleByType(nfrb
, ULOC_ACTUAL_LOCALE
, &status
));
770 UResourceBundle
* rbnfRules
= ures_getByKeyWithFallback(nfrb
, rules_tag
, NULL
, &status
);
771 if (U_FAILURE(status
)) {
774 UResourceBundle
* ruleSets
= ures_getByKeyWithFallback(rbnfRules
, fmt_tag
, NULL
, &status
);
775 if (U_FAILURE(status
)) {
776 ures_close(rbnfRules
);
782 while (ures_hasNext(ruleSets
)) {
783 desc
.append(ures_getNextUnicodeString(ruleSets
,NULL
,&status
));
787 init (desc
, locinfo
, perror
, status
);
789 ures_close(ruleSets
);
790 ures_close(rbnfRules
);
795 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat
& rhs
)
798 , ruleSetDescriptions(NULL
)
800 , defaultRuleSet(NULL
)
803 , decimalFormatSymbols(NULL
)
805 , lenientParseRules(NULL
)
806 , localizations(NULL
)
808 this->operator=(rhs
);
813 RuleBasedNumberFormat
&
814 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat
& rhs
)
816 UErrorCode status
= U_ZERO_ERROR
;
819 lenient
= rhs
.lenient
;
821 UnicodeString rules
= rhs
.getRules();
823 init(rules
, rhs
.localizations
? rhs
.localizations
->ref() : NULL
, perror
, status
);
828 RuleBasedNumberFormat::~RuleBasedNumberFormat()
834 RuleBasedNumberFormat::clone(void) const
836 RuleBasedNumberFormat
* result
= NULL
;
837 UnicodeString rules
= getRules();
838 UErrorCode status
= U_ZERO_ERROR
;
840 result
= new RuleBasedNumberFormat(rules
, localizations
, locale
, perror
, status
);
843 status
= U_MEMORY_ALLOCATION_ERROR
;
846 if (U_FAILURE(status
)) {
850 result
->lenient
= lenient
;
856 RuleBasedNumberFormat::operator==(const Format
& other
) const
858 if (this == &other
) {
862 if (typeid(*this) == typeid(other
)) {
863 const RuleBasedNumberFormat
& rhs
= (const RuleBasedNumberFormat
&)other
;
864 if (locale
== rhs
.locale
&&
865 lenient
== rhs
.lenient
&&
866 (localizations
== NULL
867 ? rhs
.localizations
== NULL
868 : (rhs
.localizations
== NULL
870 : *localizations
== rhs
.localizations
))) {
872 NFRuleSet
** p
= ruleSets
;
873 NFRuleSet
** q
= rhs
.ruleSets
;
876 } else if (q
== NULL
) {
879 while (*p
&& *q
&& (**p
== **q
)) {
883 return *q
== NULL
&& *p
== NULL
;
891 RuleBasedNumberFormat::getRules() const
893 UnicodeString result
;
894 if (ruleSets
!= NULL
) {
895 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
896 (*p
)->appendRules(result
);
903 RuleBasedNumberFormat::getRuleSetName(int32_t index
) const
906 UnicodeString
string(TRUE
, localizations
->getRuleSetName(index
), (int32_t)-1);
908 } else if (ruleSets
) {
909 UnicodeString result
;
910 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
912 if (rs
->isPublic()) {
925 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
929 result
= localizations
->getNumberOfRuleSets();
930 } else if (ruleSets
) {
931 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
932 if ((**p
).isPublic()) {
941 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
943 return localizations
->getNumberOfDisplayLocales();
949 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index
, UErrorCode
& status
) const {
950 if (U_FAILURE(status
)) {
953 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfDisplayLocales()) {
954 UnicodeString
name(TRUE
, localizations
->getLocaleName(index
), -1);
956 int32_t cap
= name
.length() + 1;
959 bp
= (char *)uprv_malloc(cap
);
961 status
= U_MEMORY_ALLOCATION_ERROR
;
965 name
.extract(0, name
.length(), bp
, cap
, UnicodeString::kInvariant
);
966 Locale
retLocale(bp
);
972 status
= U_ILLEGAL_ARGUMENT_ERROR
;
978 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index
, const Locale
& localeParam
) {
979 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfRuleSets()) {
980 UnicodeString
localeName(localeParam
.getBaseName(), -1, UnicodeString::kInvariant
);
981 int32_t len
= localeName
.length();
982 UChar
* localeStr
= localeName
.getBuffer(len
+ 1);
985 int32_t ix
= localizations
->indexForLocale(localeStr
);
987 UnicodeString
name(TRUE
, localizations
->getDisplayName(ix
, index
), -1);
991 // trim trailing portion, skipping over ommitted sections
992 do { --len
;} while (len
> 0 && localeStr
[len
] != 0x005f); // underscore
993 while (len
> 0 && localeStr
[len
-1] == 0x005F) --len
;
995 UnicodeString
name(TRUE
, localizations
->getRuleSetName(index
), -1);
1004 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString
& ruleSetName
, const Locale
& localeParam
) {
1005 if (localizations
) {
1006 UnicodeString
rsn(ruleSetName
);
1007 int32_t ix
= localizations
->indexForRuleSet(rsn
.getTerminatedBuffer());
1008 return getRuleSetDisplayName(ix
, localeParam
);
1010 UnicodeString bogus
;
1016 RuleBasedNumberFormat::findRuleSet(const UnicodeString
& name
, UErrorCode
& status
) const
1018 if (U_SUCCESS(status
) && ruleSets
) {
1019 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1021 if (rs
->isNamed(name
)) {
1025 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1031 RuleBasedNumberFormat::format(int32_t number
,
1032 UnicodeString
& toAppendTo
,
1033 FieldPosition
& /* pos */) const
1035 if (defaultRuleSet
) defaultRuleSet
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1041 RuleBasedNumberFormat::format(int64_t number
,
1042 UnicodeString
& toAppendTo
,
1043 FieldPosition
& /* pos */) const
1045 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1051 RuleBasedNumberFormat::format(double number
,
1052 UnicodeString
& toAppendTo
,
1053 FieldPosition
& /* pos */) const
1055 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1056 if (uprv_isNaN(number
)) {
1057 DecimalFormatSymbols
* decFmtSyms
= getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1059 toAppendTo
+= decFmtSyms
->getConstSymbol(DecimalFormatSymbols::kNaNSymbol
);
1061 } else if (defaultRuleSet
) {
1062 defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1069 RuleBasedNumberFormat::format(int32_t number
,
1070 const UnicodeString
& ruleSetName
,
1071 UnicodeString
& toAppendTo
,
1072 FieldPosition
& /* pos */,
1073 UErrorCode
& status
) const
1075 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1076 if (U_SUCCESS(status
)) {
1077 if (ruleSetName
.indexOf(gPercentPercent
, 2, 0) == 0) {
1078 // throw new IllegalArgumentException("Can't use internal rule set");
1079 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1081 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1083 rs
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1092 RuleBasedNumberFormat::format(int64_t number
,
1093 const UnicodeString
& ruleSetName
,
1094 UnicodeString
& toAppendTo
,
1095 FieldPosition
& /* pos */,
1096 UErrorCode
& status
) const
1098 if (U_SUCCESS(status
)) {
1099 if (ruleSetName
.indexOf(gPercentPercent
, 2, 0) == 0) {
1100 // throw new IllegalArgumentException("Can't use internal rule set");
1101 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1103 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1105 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1113 // make linker happy
1115 RuleBasedNumberFormat::format(const Formattable
& obj
,
1116 UnicodeString
& toAppendTo
,
1118 UErrorCode
& status
) const
1120 return NumberFormat::format(obj
, toAppendTo
, pos
, status
);
1124 RuleBasedNumberFormat::format(double number
,
1125 const UnicodeString
& ruleSetName
,
1126 UnicodeString
& toAppendTo
,
1127 FieldPosition
& /* pos */,
1128 UErrorCode
& status
) const
1130 if (U_SUCCESS(status
)) {
1131 if (ruleSetName
.indexOf(gPercentPercent
, 2, 0) == 0) {
1132 // throw new IllegalArgumentException("Can't use internal rule set");
1133 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1135 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1137 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1145 RuleBasedNumberFormat::parse(const UnicodeString
& text
,
1146 Formattable
& result
,
1147 ParsePosition
& parsePosition
) const
1150 parsePosition
.setErrorIndex(0);
1154 UnicodeString
workingText(text
, parsePosition
.getIndex());
1155 ParsePosition
workingPos(0);
1157 ParsePosition
high_pp(0);
1158 Formattable high_result
;
1160 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1162 if (rp
->isPublic() && rp
->isParseable()) {
1163 ParsePosition
working_pp(0);
1164 Formattable working_result
;
1166 rp
->parse(workingText
, working_pp
, kMaxDouble
, working_result
, lenient
);
1167 if (working_pp
.getIndex() > high_pp
.getIndex()) {
1168 high_pp
= working_pp
;
1169 high_result
= working_result
;
1171 if (high_pp
.getIndex() == workingText
.length()) {
1178 int32_t startIndex
= parsePosition
.getIndex();
1179 parsePosition
.setIndex(startIndex
+ high_pp
.getIndex());
1180 if (high_pp
.getIndex() > 0) {
1181 parsePosition
.setErrorIndex(-1);
1183 int32_t errorIndex
= (high_pp
.getErrorIndex()>0)? high_pp
.getErrorIndex(): 0;
1184 parsePosition
.setErrorIndex(startIndex
+ errorIndex
);
1186 result
= high_result
;
1187 if (result
.getType() == Formattable::kDouble
) {
1188 int32_t r
= (int32_t)result
.getDouble();
1189 if ((double)r
== result
.getDouble()) {
1195 #if !UCONFIG_NO_COLLATION
1198 RuleBasedNumberFormat::setLenient(UBool enabled
)
1201 if (!enabled
&& collator
) {
1210 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString
& ruleSetName
, UErrorCode
& status
) {
1211 if (U_SUCCESS(status
)) {
1212 if (ruleSetName
.isEmpty()) {
1213 if (localizations
) {
1214 UnicodeString
name(TRUE
, localizations
->getRuleSetName(0), -1);
1215 defaultRuleSet
= findRuleSet(name
, status
);
1217 initDefaultRuleSet();
1219 } else if (ruleSetName
.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1220 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1222 NFRuleSet
* result
= findRuleSet(ruleSetName
, status
);
1223 if (result
!= NULL
) {
1224 defaultRuleSet
= result
;
1231 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1232 UnicodeString result
;
1233 if (defaultRuleSet
&& defaultRuleSet
->isPublic()) {
1234 defaultRuleSet
->getName(result
);
1236 result
.setToBogus();
1242 RuleBasedNumberFormat::initDefaultRuleSet()
1244 defaultRuleSet
= NULL
;
1249 const UnicodeString spellout
= UNICODE_STRING_SIMPLE("%spellout-numbering");
1250 const UnicodeString ordinal
= UNICODE_STRING_SIMPLE("%digits-ordinal");
1251 const UnicodeString duration
= UNICODE_STRING_SIMPLE("%duration");
1253 NFRuleSet
**p
= &ruleSets
[0];
1255 if ((*p
)->isNamed(spellout
) || (*p
)->isNamed(ordinal
) || (*p
)->isNamed(duration
)) {
1256 defaultRuleSet
= *p
;
1263 defaultRuleSet
= *--p
;
1264 if (!defaultRuleSet
->isPublic()) {
1265 while (p
!= ruleSets
) {
1266 if ((*--p
)->isPublic()) {
1267 defaultRuleSet
= *p
;
1276 RuleBasedNumberFormat::init(const UnicodeString
& rules
, LocalizationInfo
* localizationInfos
,
1277 UParseError
& pErr
, UErrorCode
& status
)
1279 // TODO: implement UParseError
1280 uprv_memset(&pErr
, 0, sizeof(UParseError
));
1281 // Note: this can leave ruleSets == NULL, so remaining code should check
1282 if (U_FAILURE(status
)) {
1286 this->localizations
= localizationInfos
== NULL
? NULL
: localizationInfos
->ref();
1288 UnicodeString
description(rules
);
1289 if (!description
.length()) {
1290 status
= U_MEMORY_ALLOCATION_ERROR
;
1294 // start by stripping the trailing whitespace from all the rules
1295 // (this is all the whitespace follwing each semicolon in the
1296 // description). This allows us to look for rule-set boundaries
1297 // by searching for ";%" without having to worry about whitespace
1298 // between the ; and the %
1299 stripWhitespace(description
);
1301 // check to see if there's a set of lenient-parse rules. If there
1302 // is, pull them out into our temporary holding place for them,
1303 // and delete them from the description before the real desciption-
1304 // parsing code sees them
1305 int32_t lp
= description
.indexOf(gLenientParse
, -1, 0);
1307 // we've got to make sure we're not in the middle of a rule
1308 // (where "%%lenient-parse" would actually get treated as
1310 if (lp
== 0 || description
.charAt(lp
- 1) == gSemiColon
) {
1311 // locate the beginning and end of the actual collation
1312 // rules (there may be whitespace between the name and
1313 // the first token in the description)
1314 int lpEnd
= description
.indexOf(gSemiPercent
, 2, lp
);
1317 lpEnd
= description
.length() - 1;
1319 int lpStart
= lp
+ u_strlen(gLenientParse
);
1320 while (PatternProps::isWhiteSpace(description
.charAt(lpStart
))) {
1324 // copy out the lenient-parse rules and delete them
1325 // from the description
1326 lenientParseRules
= new UnicodeString();
1328 if (lenientParseRules
== 0) {
1329 status
= U_MEMORY_ALLOCATION_ERROR
;
1332 lenientParseRules
->setTo(description
, lpStart
, lpEnd
- lpStart
);
1334 description
.remove(lp
, lpEnd
+ 1 - lp
);
1338 // pre-flight parsing the description and count the number of
1339 // rule sets (";%" marks the end of one rule set and the beginning
1342 for (int32_t p
= description
.indexOf(gSemiPercent
, 2, 0); p
!= -1; p
= description
.indexOf(gSemiPercent
, 2, p
)) {
1348 // our rule list is an array of the appropriate size
1349 ruleSets
= (NFRuleSet
**)uprv_malloc((numRuleSets
+ 1) * sizeof(NFRuleSet
*));
1351 if (ruleSets
== 0) {
1352 status
= U_MEMORY_ALLOCATION_ERROR
;
1356 for (int i
= 0; i
<= numRuleSets
; ++i
) {
1360 // divide up the descriptions into individual rule-set descriptions
1361 // and store them in a temporary array. At each step, we also
1362 // new up a rule set, but all this does is initialize its name
1363 // and remove it from its description. We can't actually parse
1364 // the rest of the descriptions and finish initializing everything
1365 // because we have to know the names and locations of all the rule
1366 // sets before we can actually set everything up
1368 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1372 ruleSetDescriptions
= new UnicodeString
[numRuleSets
];
1373 if (ruleSetDescriptions
== 0) {
1374 status
= U_MEMORY_ALLOCATION_ERROR
;
1381 for (int32_t p
= description
.indexOf(gSemiPercent
, 2, 0); p
!= -1; p
= description
.indexOf(gSemiPercent
, 2, start
)) {
1382 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, p
+ 1 - start
);
1383 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1384 if (ruleSets
[curRuleSet
] == 0) {
1385 status
= U_MEMORY_ALLOCATION_ERROR
;
1391 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, description
.length() - start
);
1392 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1393 if (ruleSets
[curRuleSet
] == 0) {
1394 status
= U_MEMORY_ALLOCATION_ERROR
;
1399 // now we can take note of the formatter's default rule set, which
1400 // is the last public rule set in the description (it's the last
1401 // rather than the first so that a user can create a new formatter
1402 // from an existing formatter and change its default behavior just
1403 // by appending more rule sets to the end)
1405 // {dlf} Initialization of a fraction rule set requires the default rule
1406 // set to be known. For purposes of initialization, this is always the
1407 // last public rule set, no matter what the localization data says.
1408 initDefaultRuleSet();
1410 // finally, we can go back through the temporary descriptions
1411 // list and finish seting up the substructure (and we throw
1412 // away the temporary descriptions as we go)
1414 for (int i
= 0; i
< numRuleSets
; i
++) {
1415 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1419 // Now that the rules are initialized, the 'real' default rule
1420 // set can be adjusted by the localization data.
1422 // The C code keeps the localization array as is, rather than building
1423 // a separate array of the public rule set names, so we have less work
1424 // to do here-- but we still need to check the names.
1426 if (localizationInfos
) {
1427 // confirm the names, if any aren't in the rules, that's an error
1428 // it is ok if the rules contain public rule sets that are not in this list
1429 for (int32_t i
= 0; i
< localizationInfos
->getNumberOfRuleSets(); ++i
) {
1430 UnicodeString
name(TRUE
, localizationInfos
->getRuleSetName(i
), -1);
1431 NFRuleSet
* rs
= findRuleSet(name
, status
);
1436 defaultRuleSet
= rs
;
1440 defaultRuleSet
= getDefaultRuleSet();
1445 RuleBasedNumberFormat::stripWhitespace(UnicodeString
& description
)
1447 // iterate through the characters...
1448 UnicodeString result
;
1451 while (start
!= -1 && start
< description
.length()) {
1452 // seek to the first non-whitespace character...
1453 while (start
< description
.length()
1454 && PatternProps::isWhiteSpace(description
.charAt(start
))) {
1458 // locate the next semicolon in the text and copy the text from
1459 // our current position up to that semicolon into the result
1460 int32_t p
= description
.indexOf(gSemiColon
, start
);
1462 // or if we don't find a semicolon, just copy the rest of
1463 // the string into the result
1464 result
.append(description
, start
, description
.length() - start
);
1467 else if (p
< description
.length()) {
1468 result
.append(description
, start
, p
+ 1 - start
);
1472 // when we get here, we've seeked off the end of the sring, and
1473 // we terminate the loop (we continue until *start* is -1 rather
1474 // than until *p* is -1, because otherwise we'd miss the last
1475 // rule in the description)
1481 description
.setTo(result
);
1486 RuleBasedNumberFormat::dispose()
1489 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1492 uprv_free(ruleSets
);
1496 if (ruleSetDescriptions
) {
1497 delete [] ruleSetDescriptions
;
1500 #if !UCONFIG_NO_COLLATION
1505 delete decimalFormatSymbols
;
1506 decimalFormatSymbols
= NULL
;
1508 delete lenientParseRules
;
1509 lenientParseRules
= NULL
;
1511 if (localizations
) localizations
= localizations
->unref();
1515 //-----------------------------------------------------------------------
1516 // package-internal API
1517 //-----------------------------------------------------------------------
1520 * Returns the collator to use for lenient parsing. The collator is lazily created:
1521 * this function creates it the first time it's called.
1522 * @return The collator to use for lenient parsing, or null if lenient parsing
1526 RuleBasedNumberFormat::getCollator() const
1528 #if !UCONFIG_NO_COLLATION
1533 // lazy-evaulate the collator
1534 if (collator
== NULL
&& lenient
) {
1535 // create a default collator based on the formatter's locale,
1536 // then pull out that collator's rules, append any additional
1537 // rules specified in the description, and create a _new_
1538 // collator based on the combinaiton of those rules
1540 UErrorCode status
= U_ZERO_ERROR
;
1542 Collator
* temp
= Collator::createInstance(locale
, status
);
1543 RuleBasedCollator
* newCollator
;
1544 if (U_SUCCESS(status
) && (newCollator
= dynamic_cast<RuleBasedCollator
*>(temp
)) != NULL
) {
1545 if (lenientParseRules
) {
1546 UnicodeString
rules(newCollator
->getRules());
1547 rules
.append(*lenientParseRules
);
1549 newCollator
= new RuleBasedCollator(rules
, status
);
1550 // Exit if newCollator could not be created.
1551 if (newCollator
== NULL
) {
1557 if (U_SUCCESS(status
)) {
1558 newCollator
->setAttribute(UCOL_DECOMPOSITION_MODE
, UCOL_ON
, status
);
1560 ((RuleBasedNumberFormat
*)this)->collator
= newCollator
;
1569 // if lenient-parse mode is off, this will be null
1570 // (see setLenientParseMode())
1576 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1577 * instances owned by this formatter. This object is lazily created: this function
1578 * creates it the first time it's called.
1579 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1580 * instances owned by this formatter.
1582 DecimalFormatSymbols
*
1583 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1585 // lazy-evaluate the DecimalFormatSymbols object. This object
1586 // is shared by all DecimalFormat instances belonging to this
1588 if (decimalFormatSymbols
== NULL
) {
1589 UErrorCode status
= U_ZERO_ERROR
;
1590 DecimalFormatSymbols
* temp
= new DecimalFormatSymbols(locale
, status
);
1591 if (U_SUCCESS(status
)) {
1592 ((RuleBasedNumberFormat
*)this)->decimalFormatSymbols
= temp
;
1597 return decimalFormatSymbols
;
1600 // De-owning the current localized symbols and adopt the new symbols.
1602 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols
* symbolsToAdopt
)
1604 if (symbolsToAdopt
== NULL
) {
1605 return; // do not allow caller to set decimalFormatSymbols to NULL
1608 if (decimalFormatSymbols
!= NULL
) {
1609 delete decimalFormatSymbols
;
1612 decimalFormatSymbols
= symbolsToAdopt
;
1615 // Apply the new decimalFormatSymbols by reparsing the rulesets
1616 UErrorCode status
= U_ZERO_ERROR
;
1618 for (int32_t i
= 0; i
< numRuleSets
; i
++) {
1619 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1624 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1626 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols
& symbols
)
1628 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols
));