2 *******************************************************************************
3 * Copyright (C) 1997-2009, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
8 #include "unicode/rbnf.h"
12 #include "unicode/normlzr.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/uchar.h"
15 #include "unicode/ucol.h"
16 #include "unicode/uloc.h"
17 #include "unicode/unum.h"
18 #include "unicode/ures.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/udata.h"
35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
37 static const UChar gPercentPercent
[] =
42 // All urbnf objects are created through openRules, so we init all of the
43 // Unicode string constants required by rbnf, nfrs, or nfr here.
44 static const UChar gLenientParse
[] =
46 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
47 }; /* "%%lenient-parse:" */
48 static const UChar gSemiColon
= 0x003B;
49 static const UChar gSemiPercent
[] =
54 #define kSomeNumberOfBitsDiv2 22
55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat
)
63 This is a utility class. It does not use ICU's RTTI.
64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
65 Please make sure that intltest passes on Windows in Release mode,
66 since the string pooling per compilation unit will mess up how RTTI works.
67 The RTTI code was also removed due to lack of code coverage.
69 class LocalizationInfo
: public UMemory
{
71 virtual ~LocalizationInfo() {};
75 LocalizationInfo() : refcount(0) {}
77 LocalizationInfo
* ref(void) {
82 LocalizationInfo
* unref(void) {
83 if (refcount
&& --refcount
== 0) {
89 virtual UBool
operator==(const LocalizationInfo
* rhs
) const;
90 inline UBool
operator!=(const LocalizationInfo
* rhs
) const { return !operator==(rhs
); }
92 virtual int32_t getNumberOfRuleSets(void) const = 0;
93 virtual const UChar
* getRuleSetName(int32_t index
) const = 0;
94 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
95 virtual const UChar
* getLocaleName(int32_t index
) const = 0;
96 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const = 0;
98 virtual int32_t indexForLocale(const UChar
* locale
) const;
99 virtual int32_t indexForRuleSet(const UChar
* ruleset
) const;
101 // virtual UClassID getDynamicClassID() const = 0;
102 // static UClassID getStaticClassID(void);
105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
107 // if both strings are NULL, this returns TRUE
109 streq(const UChar
* lhs
, const UChar
* rhs
) {
114 return u_strcmp(lhs
, rhs
) == 0;
120 LocalizationInfo::operator==(const LocalizationInfo
* rhs
) const {
126 int32_t rsc
= getNumberOfRuleSets();
127 if (rsc
== rhs
->getNumberOfRuleSets()) {
128 for (int i
= 0; i
< rsc
; ++i
) {
129 if (!streq(getRuleSetName(i
), rhs
->getRuleSetName(i
))) {
133 int32_t dlc
= getNumberOfDisplayLocales();
134 if (dlc
== rhs
->getNumberOfDisplayLocales()) {
135 for (int i
= 0; i
< dlc
; ++i
) {
136 const UChar
* locale
= getLocaleName(i
);
137 int32_t ix
= rhs
->indexForLocale(locale
);
138 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
139 if (!streq(locale
, rhs
->getLocaleName(ix
))) {
142 for (int j
= 0; j
< rsc
; ++j
) {
143 if (!streq(getDisplayName(i
, j
), rhs
->getDisplayName(ix
, j
))) {
156 LocalizationInfo::indexForLocale(const UChar
* locale
) const {
157 for (int i
= 0; i
< getNumberOfDisplayLocales(); ++i
) {
158 if (streq(locale
, getLocaleName(i
))) {
166 LocalizationInfo::indexForRuleSet(const UChar
* ruleset
) const {
168 for (int i
= 0; i
< getNumberOfRuleSets(); ++i
) {
169 if (streq(ruleset
, getRuleSetName(i
))) {
178 typedef void (*Fn_Deleter
)(void*);
186 VArray() : buf(NULL
), cap(0), size(0), deleter(NULL
) {}
188 VArray(Fn_Deleter del
) : buf(NULL
), cap(0), size(0), deleter(del
) {}
192 for (int i
= 0; i
< size
; ++i
) {
203 void add(void* elem
, UErrorCode
& status
) {
204 if (U_SUCCESS(status
)) {
208 } else if (cap
< 256) {
214 buf
= (void**)uprv_malloc(cap
* sizeof(void*));
216 buf
= (void**)uprv_realloc(buf
, cap
* sizeof(void*));
219 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
220 status
= U_MEMORY_ALLOCATION_ERROR
;
223 void* start
= &buf
[size
];
224 size_t count
= (cap
- size
) * sizeof(void*);
225 uprv_memset(start
, 0, count
); // fill with nulls, just because
231 void** release(void) {
242 class StringLocalizationInfo
: public LocalizationInfo
{
248 friend class LocDataParser
;
250 StringLocalizationInfo(UChar
* i
, UChar
*** d
, int32_t numRS
, int32_t numLocs
)
251 : info(i
), data(d
), numRuleSets(numRS
), numLocales(numLocs
)
256 static StringLocalizationInfo
* create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
);
258 virtual ~StringLocalizationInfo();
259 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets
; }
260 virtual const UChar
* getRuleSetName(int32_t index
) const;
261 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales
; }
262 virtual const UChar
* getLocaleName(int32_t index
) const;
263 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const;
265 // virtual UClassID getDynamicClassID() const;
266 // static UClassID getStaticClassID(void);
269 void init(UErrorCode
& status
) const;
274 OPEN_ANGLE
= 0x003c, /* '<' */
275 CLOSE_ANGLE
= 0x003e, /* '>' */
283 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
285 class LocDataParser
{
294 LocDataParser(UParseError
& parseError
, UErrorCode
& status
)
295 : data(NULL
), e(NULL
), p(NULL
), ch(0xffff), pe(parseError
), ec(status
) {}
299 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
300 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
302 StringLocalizationInfo
* parse(UChar
* data
, int32_t len
);
306 void inc(void) { ++p
; ch
= 0xffff; }
307 UBool
checkInc(UChar c
) { if (p
< e
&& (ch
== c
|| *p
== c
)) { inc(); return TRUE
; } return FALSE
; }
308 UBool
check(UChar c
) { return p
< e
&& (ch
== c
|| *p
== c
); }
309 void skipWhitespace(void) { while (p
< e
&& uprv_isRuleWhiteSpace(ch
!= 0xffff ? ch
: *p
)) inc();}
310 UBool
inList(UChar c
, const UChar
* list
) const {
311 if (*list
== SPACE
&& uprv_isRuleWhiteSpace(c
)) return TRUE
;
312 while (*list
&& *list
!= c
) ++list
; return *list
== c
;
314 void parseError(const char* msg
);
316 StringLocalizationInfo
* doParse(void);
318 UChar
** nextArray(int32_t& requiredLength
);
319 UChar
* nextString(void);
323 #define ERROR(msg) parseError(msg); return NULL;
325 #define ERROR(msg) parseError(NULL); return NULL;
329 static const UChar DQUOTE_STOPLIST
[] = {
333 static const UChar SQUOTE_STOPLIST
[] = {
337 static const UChar NOQUOTE_STOPLIST
[] = {
338 SPACE
, COMMA
, CLOSE_ANGLE
, OPEN_ANGLE
, TICK
, QUOTE
, 0
346 StringLocalizationInfo
*
347 LocDataParser::parse(UChar
* _data
, int32_t len
) {
349 if (_data
) uprv_free(_data
);
355 pe
.postContext
[0] = 0;
356 pe
.preContext
[0] = 0;
359 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
364 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
378 StringLocalizationInfo
*
379 LocDataParser::doParse(void) {
381 if (!checkInc(OPEN_ANGLE
)) {
382 ERROR("Missing open angle");
384 VArray
array(DeleteFn
);
385 UBool mightHaveNext
= TRUE
;
386 int32_t requiredLength
= -1;
387 while (mightHaveNext
) {
388 mightHaveNext
= FALSE
;
389 UChar
** elem
= nextArray(requiredLength
);
391 UBool haveComma
= check(COMMA
);
396 mightHaveNext
= TRUE
;
398 } else if (haveComma
) {
399 ERROR("Unexpected character");
404 if (!checkInc(CLOSE_ANGLE
)) {
405 if (check(OPEN_ANGLE
)) {
406 ERROR("Missing comma in outer array");
408 ERROR("Missing close angle bracket in outer array");
414 ERROR("Extra text after close of localization data");
419 int32_t numLocs
= array
.length() - 2; // subtract first, NULL
420 UChar
*** result
= (UChar
***)array
.release();
422 return new StringLocalizationInfo(data
, result
, requiredLength
-2, numLocs
); // subtract first, NULL
426 ERROR("Unknown error");
430 LocDataParser::nextArray(int32_t& requiredLength
) {
436 if (!checkInc(OPEN_ANGLE
)) {
437 ERROR("Missing open angle");
441 UBool mightHaveNext
= TRUE
;
442 while (mightHaveNext
) {
443 mightHaveNext
= FALSE
;
444 UChar
* elem
= nextString();
446 UBool haveComma
= check(COMMA
);
451 mightHaveNext
= TRUE
;
453 } else if (haveComma
) {
454 ERROR("Unexpected comma");
458 if (!checkInc(CLOSE_ANGLE
)) {
459 if (check(OPEN_ANGLE
)) {
460 ERROR("Missing close angle bracket in inner array");
462 ERROR("Missing comma in inner array");
468 if (requiredLength
== -1) {
469 requiredLength
= array
.length() + 1;
470 } else if (array
.length() != requiredLength
) {
471 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
472 ERROR("Array not of required length");
475 return (UChar
**)array
.release();
477 ERROR("Unknown Error");
481 LocDataParser::nextString() {
482 UChar
* result
= NULL
;
486 const UChar
* terminators
;
488 UBool haveQuote
= c
== QUOTE
|| c
== TICK
;
491 terminators
= c
== QUOTE
? DQUOTE_STOPLIST
: SQUOTE_STOPLIST
;
493 terminators
= NOQUOTE_STOPLIST
;
496 while (p
< e
&& !inList(*p
, terminators
)) ++p
;
498 ERROR("Unexpected end of data");
504 *p
= 0x0; // terminate by writing to data
505 result
= start
; // just point into data
509 ERROR("Missing matching quote");
510 } else if (p
== start
) {
511 ERROR("Empty string");
514 } else if (x
== OPEN_ANGLE
|| x
== TICK
|| x
== QUOTE
) {
515 ERROR("Unexpected character in string");
519 // ok for there to be no next string
524 LocDataParser::parseError(const char* /*str*/) {
529 const UChar
* start
= p
- U_PARSE_CONTEXT_LEN
- 1;
533 for (UChar
* x
= p
; --x
>= start
;) {
539 const UChar
* limit
= p
+ U_PARSE_CONTEXT_LEN
- 1;
543 u_strncpy(pe
.preContext
, start
, (int32_t)(p
-start
));
544 pe
.preContext
[p
-start
] = 0;
545 u_strncpy(pe
.postContext
, p
, (int32_t)(limit
-p
));
546 pe
.postContext
[limit
-p
] = 0;
547 pe
.offset
= (int32_t)(p
- data
);
550 fprintf(stderr
, "%s at or near character %d: ", str
, p
-data
);
553 msg
.append(start
, p
- start
);
554 msg
.append((UChar
)0x002f); /* SOLIDUS/SLASH */
555 msg
.append(p
, limit
-p
);
559 int32_t len
= msg
.extract(0, msg
.length(), buf
, 128);
565 fprintf(stderr
, "%s\n", buf
);
579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
581 StringLocalizationInfo
*
582 StringLocalizationInfo::create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
) {
583 if (U_FAILURE(status
)) {
587 int32_t len
= info
.length();
589 return NULL
; // no error;
592 UChar
* p
= (UChar
*)uprv_malloc(len
* sizeof(UChar
));
594 status
= U_MEMORY_ALLOCATION_ERROR
;
597 info
.extract(p
, len
, status
);
598 if (!U_FAILURE(status
)) {
599 status
= U_ZERO_ERROR
; // clear warning about non-termination
602 LocDataParser
parser(perror
, status
);
603 return parser
.parse(p
, len
);
606 StringLocalizationInfo::~StringLocalizationInfo() {
607 for (UChar
*** p
= (UChar
***)data
; *p
; ++p
) {
608 // remaining data is simply pointer into our unicode string data.
609 if (*p
) uprv_free(*p
);
611 if (data
) uprv_free(data
);
612 if (info
) uprv_free(info
);
617 StringLocalizationInfo::getRuleSetName(int32_t index
) const {
618 if (index
>= 0 && index
< getNumberOfRuleSets()) {
619 return data
[0][index
];
625 StringLocalizationInfo::getLocaleName(int32_t index
) const {
626 if (index
>= 0 && index
< getNumberOfDisplayLocales()) {
627 return data
[index
+1][0];
633 StringLocalizationInfo::getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const {
634 if (localeIndex
>= 0 && localeIndex
< getNumberOfDisplayLocales() &&
635 ruleIndex
>= 0 && ruleIndex
< getNumberOfRuleSets()) {
636 return data
[localeIndex
+1][ruleIndex
+1];
643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
644 const UnicodeString
& locs
,
645 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
647 , defaultRuleSet(NULL
)
650 , decimalFormatSymbols(NULL
)
652 , lenientParseRules(NULL
)
653 , localizations(NULL
)
655 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
656 init(description
, locinfo
, perror
, status
);
659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
660 const UnicodeString
& locs
,
661 UParseError
& perror
, UErrorCode
& status
)
663 , defaultRuleSet(NULL
)
664 , locale(Locale::getDefault())
666 , decimalFormatSymbols(NULL
)
668 , lenientParseRules(NULL
)
669 , localizations(NULL
)
671 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
672 init(description
, locinfo
, perror
, status
);
675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
676 LocalizationInfo
* info
,
677 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
679 , defaultRuleSet(NULL
)
682 , decimalFormatSymbols(NULL
)
684 , lenientParseRules(NULL
)
685 , localizations(NULL
)
687 init(description
, info
, perror
, status
);
690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
694 , defaultRuleSet(NULL
)
695 , locale(Locale::getDefault())
697 , decimalFormatSymbols(NULL
)
699 , lenientParseRules(NULL
)
700 , localizations(NULL
)
702 init(description
, NULL
, perror
, status
);
705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
706 const Locale
& aLocale
,
710 , defaultRuleSet(NULL
)
713 , decimalFormatSymbols(NULL
)
715 , lenientParseRules(NULL
)
716 , localizations(NULL
)
718 init(description
, NULL
, perror
, status
);
721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag
, const Locale
& alocale
, UErrorCode
& status
)
723 , defaultRuleSet(NULL
)
726 , decimalFormatSymbols(NULL
)
728 , lenientParseRules(NULL
)
729 , localizations(NULL
)
731 if (U_FAILURE(status
)) {
735 const char* fmt_tag
= "";
737 case URBNF_SPELLOUT
: fmt_tag
= "SpelloutRules"; break;
738 case URBNF_ORDINAL
: fmt_tag
= "OrdinalRules"; break;
739 case URBNF_DURATION
: fmt_tag
= "DurationRules"; break;
740 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
743 // TODO: read localization info from resource
744 LocalizationInfo
* locinfo
= NULL
;
747 UResourceBundle
* nfrb
= ures_open(U_ICUDATA_RBNF
, locale
.getName(), &status
);
748 if (U_SUCCESS(status
)) {
749 setLocaleIDs(ures_getLocaleByType(nfrb
, ULOC_VALID_LOCALE
, &status
),
750 ures_getLocaleByType(nfrb
, ULOC_ACTUAL_LOCALE
, &status
));
751 const UChar
* description
= ures_getStringByKey(nfrb
, fmt_tag
, &len
, &status
);
752 UnicodeString
desc(description
, len
);
754 init (desc
, locinfo
, perror
, status
);
759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat
& rhs
)
762 , defaultRuleSet(NULL
)
765 , decimalFormatSymbols(NULL
)
767 , lenientParseRules(NULL
)
768 , localizations(NULL
)
770 this->operator=(rhs
);
775 RuleBasedNumberFormat
&
776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat
& rhs
)
778 UErrorCode status
= U_ZERO_ERROR
;
781 lenient
= rhs
.lenient
;
783 UnicodeString rules
= rhs
.getRules();
785 init(rules
, rhs
.localizations
? rhs
.localizations
->ref() : NULL
, perror
, status
);
790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
796 RuleBasedNumberFormat::clone(void) const
798 RuleBasedNumberFormat
* result
= NULL
;
799 UnicodeString rules
= getRules();
800 UErrorCode status
= U_ZERO_ERROR
;
802 result
= new RuleBasedNumberFormat(rules
, localizations
, locale
, perror
, status
);
805 status
= U_MEMORY_ALLOCATION_ERROR
;
808 if (U_FAILURE(status
)) {
812 result
->lenient
= lenient
;
818 RuleBasedNumberFormat::operator==(const Format
& other
) const
820 if (this == &other
) {
824 if (other
.getDynamicClassID() == getStaticClassID()) {
825 const RuleBasedNumberFormat
& rhs
= (const RuleBasedNumberFormat
&)other
;
826 if (locale
== rhs
.locale
&&
827 lenient
== rhs
.lenient
&&
828 (localizations
== NULL
829 ? rhs
.localizations
== NULL
830 : (rhs
.localizations
== NULL
832 : *localizations
== rhs
.localizations
))) {
834 NFRuleSet
** p
= ruleSets
;
835 NFRuleSet
** q
= rhs
.ruleSets
;
838 } else if (q
== NULL
) {
841 while (*p
&& *q
&& (**p
== **q
)) {
845 return *q
== NULL
&& *p
== NULL
;
853 RuleBasedNumberFormat::getRules() const
855 UnicodeString result
;
856 if (ruleSets
!= NULL
) {
857 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
858 (*p
)->appendRules(result
);
865 RuleBasedNumberFormat::getRuleSetName(int32_t index
) const
868 UnicodeString
string(TRUE
, localizations
->getRuleSetName(index
), (int32_t)-1);
870 } else if (ruleSets
) {
871 UnicodeString result
;
872 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
874 if (rs
->isPublic()) {
887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
891 result
= localizations
->getNumberOfRuleSets();
892 } else if (ruleSets
) {
893 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
894 if ((**p
).isPublic()) {
903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
905 return localizations
->getNumberOfDisplayLocales();
911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index
, UErrorCode
& status
) const {
912 if (U_FAILURE(status
)) {
915 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfDisplayLocales()) {
916 UnicodeString
name(TRUE
, localizations
->getLocaleName(index
), -1);
918 int32_t cap
= name
.length() + 1;
921 bp
= (char *)uprv_malloc(cap
);
923 status
= U_MEMORY_ALLOCATION_ERROR
;
927 name
.extract(0, name
.length(), bp
, cap
, UnicodeString::kInvariant
);
928 Locale
retLocale(bp
);
934 status
= U_ILLEGAL_ARGUMENT_ERROR
;
940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index
, const Locale
& localeParam
) {
941 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfRuleSets()) {
942 UnicodeString
localeName(localeParam
.getBaseName(), -1, UnicodeString::kInvariant
);
943 int32_t len
= localeName
.length();
944 UChar
* localeStr
= localeName
.getBuffer(len
+ 1);
947 int32_t ix
= localizations
->indexForLocale(localeStr
);
949 UnicodeString
name(TRUE
, localizations
->getDisplayName(ix
, index
), -1);
953 // trim trailing portion, skipping over ommitted sections
954 do { --len
;} while (len
> 0 && localeStr
[len
] != 0x005f); // underscore
955 while (len
> 0 && localeStr
[len
-1] == 0x005F) --len
;
957 UnicodeString
name(TRUE
, localizations
->getRuleSetName(index
), -1);
966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString
& ruleSetName
, const Locale
& localeParam
) {
968 UnicodeString
rsn(ruleSetName
);
969 int32_t ix
= localizations
->indexForRuleSet(rsn
.getTerminatedBuffer());
970 return getRuleSetDisplayName(ix
, localeParam
);
978 RuleBasedNumberFormat::findRuleSet(const UnicodeString
& name
, UErrorCode
& status
) const
980 if (U_SUCCESS(status
) && ruleSets
) {
981 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
983 if (rs
->isNamed(name
)) {
987 status
= U_ILLEGAL_ARGUMENT_ERROR
;
993 RuleBasedNumberFormat::format(int32_t number
,
994 UnicodeString
& toAppendTo
,
995 FieldPosition
& /* pos */) const
997 if (defaultRuleSet
) defaultRuleSet
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1003 RuleBasedNumberFormat::format(int64_t number
,
1004 UnicodeString
& toAppendTo
,
1005 FieldPosition
& /* pos */) const
1007 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1013 RuleBasedNumberFormat::format(double number
,
1014 UnicodeString
& toAppendTo
,
1015 FieldPosition
& /* pos */) const
1017 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1023 RuleBasedNumberFormat::format(int32_t number
,
1024 const UnicodeString
& ruleSetName
,
1025 UnicodeString
& toAppendTo
,
1026 FieldPosition
& /* pos */,
1027 UErrorCode
& status
) const
1029 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030 if (U_SUCCESS(status
)) {
1031 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1032 // throw new IllegalArgumentException("Can't use internal rule set");
1033 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1035 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1037 rs
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1046 RuleBasedNumberFormat::format(int64_t number
,
1047 const UnicodeString
& ruleSetName
,
1048 UnicodeString
& toAppendTo
,
1049 FieldPosition
& /* pos */,
1050 UErrorCode
& status
) const
1052 if (U_SUCCESS(status
)) {
1053 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1054 // throw new IllegalArgumentException("Can't use internal rule set");
1055 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1057 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1059 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1067 // make linker happy
1069 RuleBasedNumberFormat::format(const Formattable
& obj
,
1070 UnicodeString
& toAppendTo
,
1072 UErrorCode
& status
) const
1074 return NumberFormat::format(obj
, toAppendTo
, pos
, status
);
1078 RuleBasedNumberFormat::format(double number
,
1079 const UnicodeString
& ruleSetName
,
1080 UnicodeString
& toAppendTo
,
1081 FieldPosition
& /* pos */,
1082 UErrorCode
& status
) const
1084 if (U_SUCCESS(status
)) {
1085 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1086 // throw new IllegalArgumentException("Can't use internal rule set");
1087 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1089 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1091 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1099 RuleBasedNumberFormat::parse(const UnicodeString
& text
,
1100 Formattable
& result
,
1101 ParsePosition
& parsePosition
) const
1104 parsePosition
.setErrorIndex(0);
1108 UnicodeString
workingText(text
, parsePosition
.getIndex());
1109 ParsePosition
workingPos(0);
1111 ParsePosition
high_pp(0);
1112 Formattable high_result
;
1114 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1116 if (rp
->isPublic()) {
1117 ParsePosition
working_pp(0);
1118 Formattable working_result
;
1120 rp
->parse(workingText
, working_pp
, kMaxDouble
, working_result
);
1121 if (working_pp
.getIndex() > high_pp
.getIndex()) {
1122 high_pp
= working_pp
;
1123 high_result
= working_result
;
1125 if (high_pp
.getIndex() == workingText
.length()) {
1132 int32_t startIndex
= parsePosition
.getIndex();
1133 parsePosition
.setIndex(startIndex
+ high_pp
.getIndex());
1134 if (high_pp
.getIndex() > 0) {
1135 parsePosition
.setErrorIndex(-1);
1137 int32_t errorIndex
= (high_pp
.getErrorIndex()>0)? high_pp
.getErrorIndex(): 0;
1138 parsePosition
.setErrorIndex(startIndex
+ errorIndex
);
1140 result
= high_result
;
1141 if (result
.getType() == Formattable::kDouble
) {
1142 int32_t r
= (int32_t)result
.getDouble();
1143 if ((double)r
== result
.getDouble()) {
1149 #if !UCONFIG_NO_COLLATION
1152 RuleBasedNumberFormat::setLenient(UBool enabled
)
1155 if (!enabled
&& collator
) {
1164 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString
& ruleSetName
, UErrorCode
& status
) {
1165 if (U_SUCCESS(status
)) {
1166 if (ruleSetName
.isEmpty()) {
1167 if (localizations
) {
1168 UnicodeString
name(TRUE
, localizations
->getRuleSetName(0), -1);
1169 defaultRuleSet
= findRuleSet(name
, status
);
1171 initDefaultRuleSet();
1173 } else if (ruleSetName
.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1174 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1176 NFRuleSet
* result
= findRuleSet(ruleSetName
, status
);
1177 if (result
!= NULL
) {
1178 defaultRuleSet
= result
;
1185 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1186 UnicodeString result
;
1187 if (defaultRuleSet
&& defaultRuleSet
->isPublic()) {
1188 defaultRuleSet
->getName(result
);
1190 result
.setToBogus();
1196 RuleBasedNumberFormat::initDefaultRuleSet()
1198 defaultRuleSet
= NULL
;
1203 NFRuleSet
**p
= &ruleSets
[0];
1208 defaultRuleSet
= *--p
;
1209 if (!defaultRuleSet
->isPublic()) {
1210 while (p
!= ruleSets
) {
1211 if ((*--p
)->isPublic()) {
1212 defaultRuleSet
= *p
;
1221 RuleBasedNumberFormat::init(const UnicodeString
& rules
, LocalizationInfo
* localizationInfos
,
1222 UParseError
& pErr
, UErrorCode
& status
)
1224 // TODO: implement UParseError
1225 uprv_memset(&pErr
, 0, sizeof(UParseError
));
1226 // Note: this can leave ruleSets == NULL, so remaining code should check
1227 if (U_FAILURE(status
)) {
1231 this->localizations
= localizationInfos
== NULL
? NULL
: localizationInfos
->ref();
1233 UnicodeString
description(rules
);
1234 if (!description
.length()) {
1235 status
= U_MEMORY_ALLOCATION_ERROR
;
1239 // start by stripping the trailing whitespace from all the rules
1240 // (this is all the whitespace follwing each semicolon in the
1241 // description). This allows us to look for rule-set boundaries
1242 // by searching for ";%" without having to worry about whitespace
1243 // between the ; and the %
1244 stripWhitespace(description
);
1246 // check to see if there's a set of lenient-parse rules. If there
1247 // is, pull them out into our temporary holding place for them,
1248 // and delete them from the description before the real desciption-
1249 // parsing code sees them
1250 int32_t lp
= description
.indexOf(gLenientParse
);
1252 // we've got to make sure we're not in the middle of a rule
1253 // (where "%%lenient-parse" would actually get treated as
1255 if (lp
== 0 || description
.charAt(lp
- 1) == gSemiColon
) {
1256 // locate the beginning and end of the actual collation
1257 // rules (there may be whitespace between the name and
1258 // the first token in the description)
1259 int lpEnd
= description
.indexOf(gSemiPercent
, lp
);
1262 lpEnd
= description
.length() - 1;
1264 int lpStart
= lp
+ u_strlen(gLenientParse
);
1265 while (uprv_isRuleWhiteSpace(description
.charAt(lpStart
))) {
1269 // copy out the lenient-parse rules and delete them
1270 // from the description
1271 lenientParseRules
= new UnicodeString();
1273 if (lenientParseRules
== 0) {
1274 status
= U_MEMORY_ALLOCATION_ERROR
;
1277 lenientParseRules
->setTo(description
, lpStart
, lpEnd
- lpStart
);
1279 description
.remove(lp
, lpEnd
+ 1 - lp
);
1283 // pre-flight parsing the description and count the number of
1284 // rule sets (";%" marks the end of one rule set and the beginning
1286 int numRuleSets
= 0;
1287 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, p
)) {
1293 // our rule list is an array of the appropriate size
1294 ruleSets
= (NFRuleSet
**)uprv_malloc((numRuleSets
+ 1) * sizeof(NFRuleSet
*));
1296 if (ruleSets
== 0) {
1297 status
= U_MEMORY_ALLOCATION_ERROR
;
1301 for (int i
= 0; i
<= numRuleSets
; ++i
) {
1305 // divide up the descriptions into individual rule-set descriptions
1306 // and store them in a temporary array. At each step, we also
1307 // new up a rule set, but all this does is initialize its name
1308 // and remove it from its description. We can't actually parse
1309 // the rest of the descriptions and finish initializing everything
1310 // because we have to know the names and locations of all the rule
1311 // sets before we can actually set everything up
1313 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1316 UnicodeString
* ruleSetDescriptions
= new UnicodeString
[numRuleSets
];
1317 if (ruleSetDescriptions
== 0) {
1318 status
= U_MEMORY_ALLOCATION_ERROR
;
1325 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, start
)) {
1326 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, p
+ 1 - start
);
1327 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1328 if (ruleSets
[curRuleSet
] == 0) {
1329 status
= U_MEMORY_ALLOCATION_ERROR
;
1335 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, description
.length() - start
);
1336 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1337 if (ruleSets
[curRuleSet
] == 0) {
1338 status
= U_MEMORY_ALLOCATION_ERROR
;
1343 // now we can take note of the formatter's default rule set, which
1344 // is the last public rule set in the description (it's the last
1345 // rather than the first so that a user can create a new formatter
1346 // from an existing formatter and change its default behavior just
1347 // by appending more rule sets to the end)
1349 // {dlf} Initialization of a fraction rule set requires the default rule
1350 // set to be known. For purposes of initialization, this is always the
1351 // last public rule set, no matter what the localization data says.
1352 initDefaultRuleSet();
1354 // finally, we can go back through the temporary descriptions
1355 // list and finish seting up the substructure (and we throw
1356 // away the temporary descriptions as we go)
1358 for (int i
= 0; i
< numRuleSets
; i
++) {
1359 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1363 // Now that the rules are initialized, the 'real' default rule
1364 // set can be adjusted by the localization data.
1366 // The C code keeps the localization array as is, rather than building
1367 // a separate array of the public rule set names, so we have less work
1368 // to do here-- but we still need to check the names.
1370 if (localizationInfos
) {
1371 // confirm the names, if any aren't in the rules, that's an error
1372 // it is ok if the rules contain public rule sets that are not in this list
1373 for (int32_t i
= 0; i
< localizationInfos
->getNumberOfRuleSets(); ++i
) {
1374 UnicodeString
name(TRUE
, localizationInfos
->getRuleSetName(i
), -1);
1375 NFRuleSet
* rs
= findRuleSet(name
, status
);
1380 defaultRuleSet
= rs
;
1384 defaultRuleSet
= getDefaultRuleSet();
1388 delete[] ruleSetDescriptions
;
1392 RuleBasedNumberFormat::stripWhitespace(UnicodeString
& description
)
1394 // iterate through the characters...
1395 UnicodeString result
;
1398 while (start
!= -1 && start
< description
.length()) {
1399 // seek to the first non-whitespace character...
1400 while (start
< description
.length()
1401 && uprv_isRuleWhiteSpace(description
.charAt(start
))) {
1405 // locate the next semicolon in the text and copy the text from
1406 // our current position up to that semicolon into the result
1407 int32_t p
= description
.indexOf(gSemiColon
, start
);
1409 // or if we don't find a semicolon, just copy the rest of
1410 // the string into the result
1411 result
.append(description
, start
, description
.length() - start
);
1414 else if (p
< description
.length()) {
1415 result
.append(description
, start
, p
+ 1 - start
);
1419 // when we get here, we've seeked off the end of the sring, and
1420 // we terminate the loop (we continue until *start* is -1 rather
1421 // than until *p* is -1, because otherwise we'd miss the last
1422 // rule in the description)
1428 description
.setTo(result
);
1433 RuleBasedNumberFormat::dispose()
1436 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1439 uprv_free(ruleSets
);
1443 #if !UCONFIG_NO_COLLATION
1448 delete decimalFormatSymbols
;
1449 decimalFormatSymbols
= NULL
;
1451 delete lenientParseRules
;
1452 lenientParseRules
= NULL
;
1454 if (localizations
) localizations
= localizations
->unref();
1458 //-----------------------------------------------------------------------
1459 // package-internal API
1460 //-----------------------------------------------------------------------
1463 * Returns the collator to use for lenient parsing. The collator is lazily created:
1464 * this function creates it the first time it's called.
1465 * @return The collator to use for lenient parsing, or null if lenient parsing
1469 RuleBasedNumberFormat::getCollator() const
1471 #if !UCONFIG_NO_COLLATION
1476 // lazy-evaulate the collator
1477 if (collator
== NULL
&& lenient
) {
1478 // create a default collator based on the formatter's locale,
1479 // then pull out that collator's rules, append any additional
1480 // rules specified in the description, and create a _new_
1481 // collator based on the combinaiton of those rules
1483 UErrorCode status
= U_ZERO_ERROR
;
1485 Collator
* temp
= Collator::createInstance(locale
, status
);
1486 if (U_SUCCESS(status
) &&
1487 temp
->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1489 RuleBasedCollator
* newCollator
= (RuleBasedCollator
*)temp
;
1490 if (lenientParseRules
) {
1491 UnicodeString
rules(newCollator
->getRules());
1492 rules
.append(*lenientParseRules
);
1494 newCollator
= new RuleBasedCollator(rules
, status
);
1495 // Exit if newCollator could not be created.
1496 if (newCollator
== NULL
) {
1502 if (U_SUCCESS(status
)) {
1503 newCollator
->setAttribute(UCOL_DECOMPOSITION_MODE
, UCOL_ON
, status
);
1505 ((RuleBasedNumberFormat
*)this)->collator
= newCollator
;
1514 // if lenient-parse mode is off, this will be null
1515 // (see setLenientParseMode())
1521 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1522 * instances owned by this formatter. This object is lazily created: this function
1523 * creates it the first time it's called.
1524 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1525 * instances owned by this formatter.
1527 DecimalFormatSymbols
*
1528 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1530 // lazy-evaluate the DecimalFormatSymbols object. This object
1531 // is shared by all DecimalFormat instances belonging to this
1533 if (decimalFormatSymbols
== NULL
) {
1534 UErrorCode status
= U_ZERO_ERROR
;
1535 DecimalFormatSymbols
* temp
= new DecimalFormatSymbols(locale
, status
);
1536 if (U_SUCCESS(status
)) {
1537 ((RuleBasedNumberFormat
*)this)->decimalFormatSymbols
= temp
;
1542 return decimalFormatSymbols
;