2 *******************************************************************************
3 * Copyright (C) 1997-2010, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
8 #include <typeinfo> // for 'typeid' to work
10 #include "unicode/rbnf.h"
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
40 static const UChar gPercentPercent
[] =
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse
[] =
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon
= 0x003B;
52 static const UChar gSemiPercent
[] =
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
61 // Temporary workaround - when noParse is true, do noting in parse.
62 // TODO: We need a real fix - see #6895/#6896
63 static const char *NO_SPELLOUT_PARSE_LANGUAGES
[] = { "ga", NULL
};
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat
)
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
76 class LocalizationInfo
: public UMemory
{
78 virtual ~LocalizationInfo() {};
82 LocalizationInfo() : refcount(0) {}
84 LocalizationInfo
* ref(void) {
89 LocalizationInfo
* unref(void) {
90 if (refcount
&& --refcount
== 0) {
96 virtual UBool
operator==(const LocalizationInfo
* rhs
) const;
97 inline UBool
operator!=(const LocalizationInfo
* rhs
) const { return !operator==(rhs
); }
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar
* getRuleSetName(int32_t index
) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar
* getLocaleName(int32_t index
) const = 0;
103 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const = 0;
105 virtual int32_t indexForLocale(const UChar
* locale
) const;
106 virtual int32_t indexForRuleSet(const UChar
* ruleset
) const;
108 // virtual UClassID getDynamicClassID() const = 0;
109 // static UClassID getStaticClassID(void);
112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
114 // if both strings are NULL, this returns TRUE
116 streq(const UChar
* lhs
, const UChar
* rhs
) {
121 return u_strcmp(lhs
, rhs
) == 0;
127 LocalizationInfo::operator==(const LocalizationInfo
* rhs
) const {
133 int32_t rsc
= getNumberOfRuleSets();
134 if (rsc
== rhs
->getNumberOfRuleSets()) {
135 for (int i
= 0; i
< rsc
; ++i
) {
136 if (!streq(getRuleSetName(i
), rhs
->getRuleSetName(i
))) {
140 int32_t dlc
= getNumberOfDisplayLocales();
141 if (dlc
== rhs
->getNumberOfDisplayLocales()) {
142 for (int i
= 0; i
< dlc
; ++i
) {
143 const UChar
* locale
= getLocaleName(i
);
144 int32_t ix
= rhs
->indexForLocale(locale
);
145 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
146 if (!streq(locale
, rhs
->getLocaleName(ix
))) {
149 for (int j
= 0; j
< rsc
; ++j
) {
150 if (!streq(getDisplayName(i
, j
), rhs
->getDisplayName(ix
, j
))) {
163 LocalizationInfo::indexForLocale(const UChar
* locale
) const {
164 for (int i
= 0; i
< getNumberOfDisplayLocales(); ++i
) {
165 if (streq(locale
, getLocaleName(i
))) {
173 LocalizationInfo::indexForRuleSet(const UChar
* ruleset
) const {
175 for (int i
= 0; i
< getNumberOfRuleSets(); ++i
) {
176 if (streq(ruleset
, getRuleSetName(i
))) {
185 typedef void (*Fn_Deleter
)(void*);
193 VArray() : buf(NULL
), cap(0), size(0), deleter(NULL
) {}
195 VArray(Fn_Deleter del
) : buf(NULL
), cap(0), size(0), deleter(del
) {}
199 for (int i
= 0; i
< size
; ++i
) {
210 void add(void* elem
, UErrorCode
& status
) {
211 if (U_SUCCESS(status
)) {
215 } else if (cap
< 256) {
221 buf
= (void**)uprv_malloc(cap
* sizeof(void*));
223 buf
= (void**)uprv_realloc(buf
, cap
* sizeof(void*));
226 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
227 status
= U_MEMORY_ALLOCATION_ERROR
;
230 void* start
= &buf
[size
];
231 size_t count
= (cap
- size
) * sizeof(void*);
232 uprv_memset(start
, 0, count
); // fill with nulls, just because
238 void** release(void) {
249 class StringLocalizationInfo
: public LocalizationInfo
{
255 friend class LocDataParser
;
257 StringLocalizationInfo(UChar
* i
, UChar
*** d
, int32_t numRS
, int32_t numLocs
)
258 : info(i
), data(d
), numRuleSets(numRS
), numLocales(numLocs
)
263 static StringLocalizationInfo
* create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
);
265 virtual ~StringLocalizationInfo();
266 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets
; }
267 virtual const UChar
* getRuleSetName(int32_t index
) const;
268 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales
; }
269 virtual const UChar
* getLocaleName(int32_t index
) const;
270 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const;
272 // virtual UClassID getDynamicClassID() const;
273 // static UClassID getStaticClassID(void);
276 void init(UErrorCode
& status
) const;
281 OPEN_ANGLE
= 0x003c, /* '<' */
282 CLOSE_ANGLE
= 0x003e, /* '>' */
290 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
292 class LocDataParser
{
301 LocDataParser(UParseError
& parseError
, UErrorCode
& status
)
302 : data(NULL
), e(NULL
), p(NULL
), ch(0xffff), pe(parseError
), ec(status
) {}
306 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
307 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
309 StringLocalizationInfo
* parse(UChar
* data
, int32_t len
);
313 void inc(void) { ++p
; ch
= 0xffff; }
314 UBool
checkInc(UChar c
) { if (p
< e
&& (ch
== c
|| *p
== c
)) { inc(); return TRUE
; } return FALSE
; }
315 UBool
check(UChar c
) { return p
< e
&& (ch
== c
|| *p
== c
); }
316 void skipWhitespace(void) { while (p
< e
&& uprv_isRuleWhiteSpace(ch
!= 0xffff ? ch
: *p
)) inc();}
317 UBool
inList(UChar c
, const UChar
* list
) const {
318 if (*list
== SPACE
&& uprv_isRuleWhiteSpace(c
)) return TRUE
;
319 while (*list
&& *list
!= c
) ++list
; return *list
== c
;
321 void parseError(const char* msg
);
323 StringLocalizationInfo
* doParse(void);
325 UChar
** nextArray(int32_t& requiredLength
);
326 UChar
* nextString(void);
330 #define ERROR(msg) parseError(msg); return NULL;
332 #define ERROR(msg) parseError(NULL); return NULL;
336 static const UChar DQUOTE_STOPLIST
[] = {
340 static const UChar SQUOTE_STOPLIST
[] = {
344 static const UChar NOQUOTE_STOPLIST
[] = {
345 SPACE
, COMMA
, CLOSE_ANGLE
, OPEN_ANGLE
, TICK
, QUOTE
, 0
353 StringLocalizationInfo
*
354 LocDataParser::parse(UChar
* _data
, int32_t len
) {
356 if (_data
) uprv_free(_data
);
362 pe
.postContext
[0] = 0;
363 pe
.preContext
[0] = 0;
366 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
371 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
385 StringLocalizationInfo
*
386 LocDataParser::doParse(void) {
388 if (!checkInc(OPEN_ANGLE
)) {
389 ERROR("Missing open angle");
391 VArray
array(DeleteFn
);
392 UBool mightHaveNext
= TRUE
;
393 int32_t requiredLength
= -1;
394 while (mightHaveNext
) {
395 mightHaveNext
= FALSE
;
396 UChar
** elem
= nextArray(requiredLength
);
398 UBool haveComma
= check(COMMA
);
403 mightHaveNext
= TRUE
;
405 } else if (haveComma
) {
406 ERROR("Unexpected character");
411 if (!checkInc(CLOSE_ANGLE
)) {
412 if (check(OPEN_ANGLE
)) {
413 ERROR("Missing comma in outer array");
415 ERROR("Missing close angle bracket in outer array");
421 ERROR("Extra text after close of localization data");
426 int32_t numLocs
= array
.length() - 2; // subtract first, NULL
427 UChar
*** result
= (UChar
***)array
.release();
429 return new StringLocalizationInfo(data
, result
, requiredLength
-2, numLocs
); // subtract first, NULL
433 ERROR("Unknown error");
437 LocDataParser::nextArray(int32_t& requiredLength
) {
443 if (!checkInc(OPEN_ANGLE
)) {
444 ERROR("Missing open angle");
448 UBool mightHaveNext
= TRUE
;
449 while (mightHaveNext
) {
450 mightHaveNext
= FALSE
;
451 UChar
* elem
= nextString();
453 UBool haveComma
= check(COMMA
);
458 mightHaveNext
= TRUE
;
460 } else if (haveComma
) {
461 ERROR("Unexpected comma");
465 if (!checkInc(CLOSE_ANGLE
)) {
466 if (check(OPEN_ANGLE
)) {
467 ERROR("Missing close angle bracket in inner array");
469 ERROR("Missing comma in inner array");
475 if (requiredLength
== -1) {
476 requiredLength
= array
.length() + 1;
477 } else if (array
.length() != requiredLength
) {
478 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
479 ERROR("Array not of required length");
482 return (UChar
**)array
.release();
484 ERROR("Unknown Error");
488 LocDataParser::nextString() {
489 UChar
* result
= NULL
;
493 const UChar
* terminators
;
495 UBool haveQuote
= c
== QUOTE
|| c
== TICK
;
498 terminators
= c
== QUOTE
? DQUOTE_STOPLIST
: SQUOTE_STOPLIST
;
500 terminators
= NOQUOTE_STOPLIST
;
503 while (p
< e
&& !inList(*p
, terminators
)) ++p
;
505 ERROR("Unexpected end of data");
511 *p
= 0x0; // terminate by writing to data
512 result
= start
; // just point into data
516 ERROR("Missing matching quote");
517 } else if (p
== start
) {
518 ERROR("Empty string");
521 } else if (x
== OPEN_ANGLE
|| x
== TICK
|| x
== QUOTE
) {
522 ERROR("Unexpected character in string");
526 // ok for there to be no next string
531 LocDataParser::parseError(const char* /*str*/) {
536 const UChar
* start
= p
- U_PARSE_CONTEXT_LEN
- 1;
540 for (UChar
* x
= p
; --x
>= start
;) {
546 const UChar
* limit
= p
+ U_PARSE_CONTEXT_LEN
- 1;
550 u_strncpy(pe
.preContext
, start
, (int32_t)(p
-start
));
551 pe
.preContext
[p
-start
] = 0;
552 u_strncpy(pe
.postContext
, p
, (int32_t)(limit
-p
));
553 pe
.postContext
[limit
-p
] = 0;
554 pe
.offset
= (int32_t)(p
- data
);
557 fprintf(stderr
, "%s at or near character %d: ", str
, p
-data
);
560 msg
.append(start
, p
- start
);
561 msg
.append((UChar
)0x002f); /* SOLIDUS/SLASH */
562 msg
.append(p
, limit
-p
);
566 int32_t len
= msg
.extract(0, msg
.length(), buf
, 128);
572 fprintf(stderr
, "%s\n", buf
);
586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
588 StringLocalizationInfo
*
589 StringLocalizationInfo::create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
) {
590 if (U_FAILURE(status
)) {
594 int32_t len
= info
.length();
596 return NULL
; // no error;
599 UChar
* p
= (UChar
*)uprv_malloc(len
* sizeof(UChar
));
601 status
= U_MEMORY_ALLOCATION_ERROR
;
604 info
.extract(p
, len
, status
);
605 if (!U_FAILURE(status
)) {
606 status
= U_ZERO_ERROR
; // clear warning about non-termination
609 LocDataParser
parser(perror
, status
);
610 return parser
.parse(p
, len
);
613 StringLocalizationInfo::~StringLocalizationInfo() {
614 for (UChar
*** p
= (UChar
***)data
; *p
; ++p
) {
615 // remaining data is simply pointer into our unicode string data.
616 if (*p
) uprv_free(*p
);
618 if (data
) uprv_free(data
);
619 if (info
) uprv_free(info
);
624 StringLocalizationInfo::getRuleSetName(int32_t index
) const {
625 if (index
>= 0 && index
< getNumberOfRuleSets()) {
626 return data
[0][index
];
632 StringLocalizationInfo::getLocaleName(int32_t index
) const {
633 if (index
>= 0 && index
< getNumberOfDisplayLocales()) {
634 return data
[index
+1][0];
640 StringLocalizationInfo::getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const {
641 if (localeIndex
>= 0 && localeIndex
< getNumberOfDisplayLocales() &&
642 ruleIndex
>= 0 && ruleIndex
< getNumberOfRuleSets()) {
643 return data
[localeIndex
+1][ruleIndex
+1];
650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
651 const UnicodeString
& locs
,
652 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
654 , defaultRuleSet(NULL
)
657 , decimalFormatSymbols(NULL
)
659 , lenientParseRules(NULL
)
660 , localizations(NULL
)
661 , noParse(FALSE
) //TODO: to be removed after #6895
663 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
664 init(description
, locinfo
, perror
, status
);
667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
668 const UnicodeString
& locs
,
669 UParseError
& perror
, UErrorCode
& status
)
671 , defaultRuleSet(NULL
)
672 , locale(Locale::getDefault())
674 , decimalFormatSymbols(NULL
)
676 , lenientParseRules(NULL
)
677 , localizations(NULL
)
678 , noParse(FALSE
) //TODO: to be removed after #6895
680 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
681 init(description
, locinfo
, perror
, status
);
684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
685 LocalizationInfo
* info
,
686 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
688 , defaultRuleSet(NULL
)
691 , decimalFormatSymbols(NULL
)
693 , lenientParseRules(NULL
)
694 , localizations(NULL
)
695 , noParse(FALSE
) //TODO: to be removed after #6895
697 init(description
, info
, perror
, status
);
700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
704 , defaultRuleSet(NULL
)
705 , locale(Locale::getDefault())
707 , decimalFormatSymbols(NULL
)
709 , lenientParseRules(NULL
)
710 , localizations(NULL
)
711 , noParse(FALSE
) //TODO: to be removed after #6895
713 init(description
, NULL
, perror
, status
);
716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
717 const Locale
& aLocale
,
721 , defaultRuleSet(NULL
)
724 , decimalFormatSymbols(NULL
)
726 , lenientParseRules(NULL
)
727 , localizations(NULL
)
728 , noParse(FALSE
) //TODO: to be removed after #6895
730 init(description
, NULL
, perror
, status
);
733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag
, const Locale
& alocale
, UErrorCode
& status
)
735 , defaultRuleSet(NULL
)
738 , decimalFormatSymbols(NULL
)
740 , lenientParseRules(NULL
)
741 , localizations(NULL
)
743 if (U_FAILURE(status
)) {
747 const char* rules_tag
= "RBNFRules";
748 const char* fmt_tag
= "";
750 case URBNF_SPELLOUT
: fmt_tag
= "SpelloutRules"; break;
751 case URBNF_ORDINAL
: fmt_tag
= "OrdinalRules"; break;
752 case URBNF_DURATION
: fmt_tag
= "DurationRules"; break;
753 case URBNF_NUMBERING_SYSTEM
: fmt_tag
= "NumberingSystemRules"; break;
754 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
757 // TODO: read localization info from resource
758 LocalizationInfo
* locinfo
= NULL
;
761 UResourceBundle
* nfrb
= ures_open(U_ICUDATA_RBNF
, locale
.getName(), &status
);
762 if (U_SUCCESS(status
)) {
763 setLocaleIDs(ures_getLocaleByType(nfrb
, ULOC_VALID_LOCALE
, &status
),
764 ures_getLocaleByType(nfrb
, ULOC_ACTUAL_LOCALE
, &status
));
766 UResourceBundle
* rbnfRules
= ures_getByKeyWithFallback(nfrb
, rules_tag
, NULL
, &status
);
767 if (U_FAILURE(status
)) {
770 UResourceBundle
* ruleSets
= ures_getByKeyWithFallback(rbnfRules
, fmt_tag
, NULL
, &status
);
771 if (U_FAILURE(status
)) {
772 ures_close(rbnfRules
);
778 while (ures_hasNext(ruleSets
)) {
779 const UChar
* currentString
= ures_getNextString(ruleSets
,&len
,NULL
,&status
);
780 desc
.append(currentString
);
785 init (desc
, locinfo
, perror
, status
);
787 //TODO: we need a real fix - see #6895 / #6896
789 if (tag
== URBNF_SPELLOUT
) {
790 const char *lang
= alocale
.getLanguage();
791 for (int32_t i
= 0; NO_SPELLOUT_PARSE_LANGUAGES
[i
] != NULL
; i
++) {
792 if (uprv_strcmp(lang
, NO_SPELLOUT_PARSE_LANGUAGES
[i
]) == 0) {
800 ures_close(ruleSets
);
801 ures_close(rbnfRules
);
806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat
& rhs
)
809 , defaultRuleSet(NULL
)
812 , decimalFormatSymbols(NULL
)
814 , lenientParseRules(NULL
)
815 , localizations(NULL
)
817 this->operator=(rhs
);
822 RuleBasedNumberFormat
&
823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat
& rhs
)
825 UErrorCode status
= U_ZERO_ERROR
;
828 lenient
= rhs
.lenient
;
830 UnicodeString rules
= rhs
.getRules();
832 init(rules
, rhs
.localizations
? rhs
.localizations
->ref() : NULL
, perror
, status
);
834 //TODO: remove below when we fix the parse bug - See #6895 / #6896
835 noParse
= rhs
.noParse
;
840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
846 RuleBasedNumberFormat::clone(void) const
848 RuleBasedNumberFormat
* result
= NULL
;
849 UnicodeString rules
= getRules();
850 UErrorCode status
= U_ZERO_ERROR
;
852 result
= new RuleBasedNumberFormat(rules
, localizations
, locale
, perror
, status
);
855 status
= U_MEMORY_ALLOCATION_ERROR
;
858 if (U_FAILURE(status
)) {
862 result
->lenient
= lenient
;
864 //TODO: remove below when we fix the parse bug - See #6895 / #6896
865 result
->noParse
= noParse
;
871 RuleBasedNumberFormat::operator==(const Format
& other
) const
873 if (this == &other
) {
877 if (typeid(*this) == typeid(other
)) {
878 const RuleBasedNumberFormat
& rhs
= (const RuleBasedNumberFormat
&)other
;
879 if (locale
== rhs
.locale
&&
880 lenient
== rhs
.lenient
&&
881 (localizations
== NULL
882 ? rhs
.localizations
== NULL
883 : (rhs
.localizations
== NULL
885 : *localizations
== rhs
.localizations
))) {
887 NFRuleSet
** p
= ruleSets
;
888 NFRuleSet
** q
= rhs
.ruleSets
;
891 } else if (q
== NULL
) {
894 while (*p
&& *q
&& (**p
== **q
)) {
898 return *q
== NULL
&& *p
== NULL
;
906 RuleBasedNumberFormat::getRules() const
908 UnicodeString result
;
909 if (ruleSets
!= NULL
) {
910 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
911 (*p
)->appendRules(result
);
918 RuleBasedNumberFormat::getRuleSetName(int32_t index
) const
921 UnicodeString
string(TRUE
, localizations
->getRuleSetName(index
), (int32_t)-1);
923 } else if (ruleSets
) {
924 UnicodeString result
;
925 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
927 if (rs
->isPublic()) {
940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
944 result
= localizations
->getNumberOfRuleSets();
945 } else if (ruleSets
) {
946 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
947 if ((**p
).isPublic()) {
956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
958 return localizations
->getNumberOfDisplayLocales();
964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index
, UErrorCode
& status
) const {
965 if (U_FAILURE(status
)) {
968 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfDisplayLocales()) {
969 UnicodeString
name(TRUE
, localizations
->getLocaleName(index
), -1);
971 int32_t cap
= name
.length() + 1;
974 bp
= (char *)uprv_malloc(cap
);
976 status
= U_MEMORY_ALLOCATION_ERROR
;
980 name
.extract(0, name
.length(), bp
, cap
, UnicodeString::kInvariant
);
981 Locale
retLocale(bp
);
987 status
= U_ILLEGAL_ARGUMENT_ERROR
;
993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index
, const Locale
& localeParam
) {
994 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfRuleSets()) {
995 UnicodeString
localeName(localeParam
.getBaseName(), -1, UnicodeString::kInvariant
);
996 int32_t len
= localeName
.length();
997 UChar
* localeStr
= localeName
.getBuffer(len
+ 1);
1000 int32_t ix
= localizations
->indexForLocale(localeStr
);
1002 UnicodeString
name(TRUE
, localizations
->getDisplayName(ix
, index
), -1);
1006 // trim trailing portion, skipping over ommitted sections
1007 do { --len
;} while (len
> 0 && localeStr
[len
] != 0x005f); // underscore
1008 while (len
> 0 && localeStr
[len
-1] == 0x005F) --len
;
1010 UnicodeString
name(TRUE
, localizations
->getRuleSetName(index
), -1);
1013 UnicodeString bogus
;
1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString
& ruleSetName
, const Locale
& localeParam
) {
1020 if (localizations
) {
1021 UnicodeString
rsn(ruleSetName
);
1022 int32_t ix
= localizations
->indexForRuleSet(rsn
.getTerminatedBuffer());
1023 return getRuleSetDisplayName(ix
, localeParam
);
1025 UnicodeString bogus
;
1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString
& name
, UErrorCode
& status
) const
1033 if (U_SUCCESS(status
) && ruleSets
) {
1034 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1036 if (rs
->isNamed(name
)) {
1040 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1046 RuleBasedNumberFormat::format(int32_t number
,
1047 UnicodeString
& toAppendTo
,
1048 FieldPosition
& /* pos */) const
1050 if (defaultRuleSet
) defaultRuleSet
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1056 RuleBasedNumberFormat::format(int64_t number
,
1057 UnicodeString
& toAppendTo
,
1058 FieldPosition
& /* pos */) const
1060 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1066 RuleBasedNumberFormat::format(double number
,
1067 UnicodeString
& toAppendTo
,
1068 FieldPosition
& /* pos */) const
1070 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1071 if (uprv_isNaN(number
)) {
1072 DecimalFormatSymbols
* decFmtSyms
= getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1074 toAppendTo
+= decFmtSyms
->getConstSymbol(DecimalFormatSymbols::kNaNSymbol
);
1076 } else if (defaultRuleSet
) {
1077 defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1084 RuleBasedNumberFormat::format(int32_t number
,
1085 const UnicodeString
& ruleSetName
,
1086 UnicodeString
& toAppendTo
,
1087 FieldPosition
& /* pos */,
1088 UErrorCode
& status
) const
1090 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1091 if (U_SUCCESS(status
)) {
1092 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1093 // throw new IllegalArgumentException("Can't use internal rule set");
1094 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1096 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1098 rs
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1107 RuleBasedNumberFormat::format(int64_t number
,
1108 const UnicodeString
& ruleSetName
,
1109 UnicodeString
& toAppendTo
,
1110 FieldPosition
& /* pos */,
1111 UErrorCode
& status
) const
1113 if (U_SUCCESS(status
)) {
1114 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1115 // throw new IllegalArgumentException("Can't use internal rule set");
1116 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1118 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1120 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1128 // make linker happy
1130 RuleBasedNumberFormat::format(const Formattable
& obj
,
1131 UnicodeString
& toAppendTo
,
1133 UErrorCode
& status
) const
1135 return NumberFormat::format(obj
, toAppendTo
, pos
, status
);
1139 RuleBasedNumberFormat::format(double number
,
1140 const UnicodeString
& ruleSetName
,
1141 UnicodeString
& toAppendTo
,
1142 FieldPosition
& /* pos */,
1143 UErrorCode
& status
) const
1145 if (U_SUCCESS(status
)) {
1146 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1147 // throw new IllegalArgumentException("Can't use internal rule set");
1148 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1150 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1152 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1160 RuleBasedNumberFormat::parse(const UnicodeString
& text
,
1161 Formattable
& result
,
1162 ParsePosition
& parsePosition
) const
1164 //TODO: We need a real fix. See #6895 / #6896
1167 parsePosition
.setErrorIndex(0);
1172 parsePosition
.setErrorIndex(0);
1176 UnicodeString
workingText(text
, parsePosition
.getIndex());
1177 ParsePosition
workingPos(0);
1179 ParsePosition
high_pp(0);
1180 Formattable high_result
;
1182 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1184 if (rp
->isPublic() && rp
->isParseable()) {
1185 ParsePosition
working_pp(0);
1186 Formattable working_result
;
1188 rp
->parse(workingText
, working_pp
, kMaxDouble
, working_result
, lenient
);
1189 if (working_pp
.getIndex() > high_pp
.getIndex()) {
1190 high_pp
= working_pp
;
1191 high_result
= working_result
;
1193 if (high_pp
.getIndex() == workingText
.length()) {
1200 int32_t startIndex
= parsePosition
.getIndex();
1201 parsePosition
.setIndex(startIndex
+ high_pp
.getIndex());
1202 if (high_pp
.getIndex() > 0) {
1203 parsePosition
.setErrorIndex(-1);
1205 int32_t errorIndex
= (high_pp
.getErrorIndex()>0)? high_pp
.getErrorIndex(): 0;
1206 parsePosition
.setErrorIndex(startIndex
+ errorIndex
);
1208 result
= high_result
;
1209 if (result
.getType() == Formattable::kDouble
) {
1210 int32_t r
= (int32_t)result
.getDouble();
1211 if ((double)r
== result
.getDouble()) {
1217 #if !UCONFIG_NO_COLLATION
1220 RuleBasedNumberFormat::setLenient(UBool enabled
)
1223 if (!enabled
&& collator
) {
1232 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString
& ruleSetName
, UErrorCode
& status
) {
1233 if (U_SUCCESS(status
)) {
1234 if (ruleSetName
.isEmpty()) {
1235 if (localizations
) {
1236 UnicodeString
name(TRUE
, localizations
->getRuleSetName(0), -1);
1237 defaultRuleSet
= findRuleSet(name
, status
);
1239 initDefaultRuleSet();
1241 } else if (ruleSetName
.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1242 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1244 NFRuleSet
* result
= findRuleSet(ruleSetName
, status
);
1245 if (result
!= NULL
) {
1246 defaultRuleSet
= result
;
1253 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1254 UnicodeString result
;
1255 if (defaultRuleSet
&& defaultRuleSet
->isPublic()) {
1256 defaultRuleSet
->getName(result
);
1258 result
.setToBogus();
1264 RuleBasedNumberFormat::initDefaultRuleSet()
1266 defaultRuleSet
= NULL
;
1271 const UnicodeString spellout
= UNICODE_STRING_SIMPLE("%spellout-numbering");
1272 const UnicodeString ordinal
= UNICODE_STRING_SIMPLE("%digits-ordinal");
1273 const UnicodeString duration
= UNICODE_STRING_SIMPLE("%duration");
1275 NFRuleSet
**p
= &ruleSets
[0];
1277 if ((*p
)->isNamed(spellout
) || (*p
)->isNamed(ordinal
) || (*p
)->isNamed(duration
)) {
1278 defaultRuleSet
= *p
;
1285 defaultRuleSet
= *--p
;
1286 if (!defaultRuleSet
->isPublic()) {
1287 while (p
!= ruleSets
) {
1288 if ((*--p
)->isPublic()) {
1289 defaultRuleSet
= *p
;
1298 RuleBasedNumberFormat::init(const UnicodeString
& rules
, LocalizationInfo
* localizationInfos
,
1299 UParseError
& pErr
, UErrorCode
& status
)
1301 // TODO: implement UParseError
1302 uprv_memset(&pErr
, 0, sizeof(UParseError
));
1303 // Note: this can leave ruleSets == NULL, so remaining code should check
1304 if (U_FAILURE(status
)) {
1308 this->localizations
= localizationInfos
== NULL
? NULL
: localizationInfos
->ref();
1310 UnicodeString
description(rules
);
1311 if (!description
.length()) {
1312 status
= U_MEMORY_ALLOCATION_ERROR
;
1316 // start by stripping the trailing whitespace from all the rules
1317 // (this is all the whitespace follwing each semicolon in the
1318 // description). This allows us to look for rule-set boundaries
1319 // by searching for ";%" without having to worry about whitespace
1320 // between the ; and the %
1321 stripWhitespace(description
);
1323 // check to see if there's a set of lenient-parse rules. If there
1324 // is, pull them out into our temporary holding place for them,
1325 // and delete them from the description before the real desciption-
1326 // parsing code sees them
1327 int32_t lp
= description
.indexOf(gLenientParse
);
1329 // we've got to make sure we're not in the middle of a rule
1330 // (where "%%lenient-parse" would actually get treated as
1332 if (lp
== 0 || description
.charAt(lp
- 1) == gSemiColon
) {
1333 // locate the beginning and end of the actual collation
1334 // rules (there may be whitespace between the name and
1335 // the first token in the description)
1336 int lpEnd
= description
.indexOf(gSemiPercent
, lp
);
1339 lpEnd
= description
.length() - 1;
1341 int lpStart
= lp
+ u_strlen(gLenientParse
);
1342 while (uprv_isRuleWhiteSpace(description
.charAt(lpStart
))) {
1346 // copy out the lenient-parse rules and delete them
1347 // from the description
1348 lenientParseRules
= new UnicodeString();
1350 if (lenientParseRules
== 0) {
1351 status
= U_MEMORY_ALLOCATION_ERROR
;
1354 lenientParseRules
->setTo(description
, lpStart
, lpEnd
- lpStart
);
1356 description
.remove(lp
, lpEnd
+ 1 - lp
);
1360 // pre-flight parsing the description and count the number of
1361 // rule sets (";%" marks the end of one rule set and the beginning
1363 int numRuleSets
= 0;
1364 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, p
)) {
1370 // our rule list is an array of the appropriate size
1371 ruleSets
= (NFRuleSet
**)uprv_malloc((numRuleSets
+ 1) * sizeof(NFRuleSet
*));
1373 if (ruleSets
== 0) {
1374 status
= U_MEMORY_ALLOCATION_ERROR
;
1378 for (int i
= 0; i
<= numRuleSets
; ++i
) {
1382 // divide up the descriptions into individual rule-set descriptions
1383 // and store them in a temporary array. At each step, we also
1384 // new up a rule set, but all this does is initialize its name
1385 // and remove it from its description. We can't actually parse
1386 // the rest of the descriptions and finish initializing everything
1387 // because we have to know the names and locations of all the rule
1388 // sets before we can actually set everything up
1390 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1393 UnicodeString
* ruleSetDescriptions
= new UnicodeString
[numRuleSets
];
1394 if (ruleSetDescriptions
== 0) {
1395 status
= U_MEMORY_ALLOCATION_ERROR
;
1402 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, start
)) {
1403 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, p
+ 1 - start
);
1404 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1405 if (ruleSets
[curRuleSet
] == 0) {
1406 status
= U_MEMORY_ALLOCATION_ERROR
;
1412 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, description
.length() - start
);
1413 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1414 if (ruleSets
[curRuleSet
] == 0) {
1415 status
= U_MEMORY_ALLOCATION_ERROR
;
1420 // now we can take note of the formatter's default rule set, which
1421 // is the last public rule set in the description (it's the last
1422 // rather than the first so that a user can create a new formatter
1423 // from an existing formatter and change its default behavior just
1424 // by appending more rule sets to the end)
1426 // {dlf} Initialization of a fraction rule set requires the default rule
1427 // set to be known. For purposes of initialization, this is always the
1428 // last public rule set, no matter what the localization data says.
1429 initDefaultRuleSet();
1431 // finally, we can go back through the temporary descriptions
1432 // list and finish seting up the substructure (and we throw
1433 // away the temporary descriptions as we go)
1435 for (int i
= 0; i
< numRuleSets
; i
++) {
1436 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1440 // Now that the rules are initialized, the 'real' default rule
1441 // set can be adjusted by the localization data.
1443 // The C code keeps the localization array as is, rather than building
1444 // a separate array of the public rule set names, so we have less work
1445 // to do here-- but we still need to check the names.
1447 if (localizationInfos
) {
1448 // confirm the names, if any aren't in the rules, that's an error
1449 // it is ok if the rules contain public rule sets that are not in this list
1450 for (int32_t i
= 0; i
< localizationInfos
->getNumberOfRuleSets(); ++i
) {
1451 UnicodeString
name(TRUE
, localizationInfos
->getRuleSetName(i
), -1);
1452 NFRuleSet
* rs
= findRuleSet(name
, status
);
1457 defaultRuleSet
= rs
;
1461 defaultRuleSet
= getDefaultRuleSet();
1465 delete[] ruleSetDescriptions
;
1469 RuleBasedNumberFormat::stripWhitespace(UnicodeString
& description
)
1471 // iterate through the characters...
1472 UnicodeString result
;
1475 while (start
!= -1 && start
< description
.length()) {
1476 // seek to the first non-whitespace character...
1477 while (start
< description
.length()
1478 && uprv_isRuleWhiteSpace(description
.charAt(start
))) {
1482 // locate the next semicolon in the text and copy the text from
1483 // our current position up to that semicolon into the result
1484 int32_t p
= description
.indexOf(gSemiColon
, start
);
1486 // or if we don't find a semicolon, just copy the rest of
1487 // the string into the result
1488 result
.append(description
, start
, description
.length() - start
);
1491 else if (p
< description
.length()) {
1492 result
.append(description
, start
, p
+ 1 - start
);
1496 // when we get here, we've seeked off the end of the sring, and
1497 // we terminate the loop (we continue until *start* is -1 rather
1498 // than until *p* is -1, because otherwise we'd miss the last
1499 // rule in the description)
1505 description
.setTo(result
);
1510 RuleBasedNumberFormat::dispose()
1513 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1516 uprv_free(ruleSets
);
1520 #if !UCONFIG_NO_COLLATION
1525 delete decimalFormatSymbols
;
1526 decimalFormatSymbols
= NULL
;
1528 delete lenientParseRules
;
1529 lenientParseRules
= NULL
;
1531 if (localizations
) localizations
= localizations
->unref();
1535 //-----------------------------------------------------------------------
1536 // package-internal API
1537 //-----------------------------------------------------------------------
1540 * Returns the collator to use for lenient parsing. The collator is lazily created:
1541 * this function creates it the first time it's called.
1542 * @return The collator to use for lenient parsing, or null if lenient parsing
1546 RuleBasedNumberFormat::getCollator() const
1548 #if !UCONFIG_NO_COLLATION
1553 // lazy-evaulate the collator
1554 if (collator
== NULL
&& lenient
) {
1555 // create a default collator based on the formatter's locale,
1556 // then pull out that collator's rules, append any additional
1557 // rules specified in the description, and create a _new_
1558 // collator based on the combinaiton of those rules
1560 UErrorCode status
= U_ZERO_ERROR
;
1562 Collator
* temp
= Collator::createInstance(locale
, status
);
1563 RuleBasedCollator
* newCollator
;
1564 if (U_SUCCESS(status
) && (newCollator
= dynamic_cast<RuleBasedCollator
*>(temp
)) != NULL
) {
1565 if (lenientParseRules
) {
1566 UnicodeString
rules(newCollator
->getRules());
1567 rules
.append(*lenientParseRules
);
1569 newCollator
= new RuleBasedCollator(rules
, status
);
1570 // Exit if newCollator could not be created.
1571 if (newCollator
== NULL
) {
1577 if (U_SUCCESS(status
)) {
1578 newCollator
->setAttribute(UCOL_DECOMPOSITION_MODE
, UCOL_ON
, status
);
1580 ((RuleBasedNumberFormat
*)this)->collator
= newCollator
;
1589 // if lenient-parse mode is off, this will be null
1590 // (see setLenientParseMode())
1596 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1597 * instances owned by this formatter. This object is lazily created: this function
1598 * creates it the first time it's called.
1599 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1600 * instances owned by this formatter.
1602 DecimalFormatSymbols
*
1603 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1605 // lazy-evaluate the DecimalFormatSymbols object. This object
1606 // is shared by all DecimalFormat instances belonging to this
1608 if (decimalFormatSymbols
== NULL
) {
1609 UErrorCode status
= U_ZERO_ERROR
;
1610 DecimalFormatSymbols
* temp
= new DecimalFormatSymbols(locale
, status
);
1611 if (U_SUCCESS(status
)) {
1612 ((RuleBasedNumberFormat
*)this)->decimalFormatSymbols
= temp
;
1617 return decimalFormatSymbols
;