2 *******************************************************************************
3 * Copyright (C) 1997-2006, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
8 #include "unicode/rbnf.h"
12 #include "unicode/normlzr.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/uchar.h"
15 #include "unicode/ucol.h"
16 #include "unicode/uloc.h"
17 #include "unicode/unum.h"
18 #include "unicode/ures.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/udata.h"
35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
37 static const UChar gPercentPercent
[] =
42 // All urbnf objects are created through openRules, so we init all of the
43 // Unicode string constants required by rbnf, nfrs, or nfr here.
44 static const UChar gLenientParse
[] =
46 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
47 }; /* "%%lenient-parse:" */
48 static const UChar gSemiColon
= 0x003B;
49 static const UChar gSemiPercent
[] =
54 #define kSomeNumberOfBitsDiv2 22
55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat
)
63 This is a utility class. It does not use ICU's RTTI.
64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
65 Please make sure that intltest passes on Windows in Release mode,
66 since the string pooling per compilation unit will mess up how RTTI works.
67 The RTTI code was also removed due to lack of code coverage.
69 class LocalizationInfo
: public UMemory
{
71 virtual ~LocalizationInfo() {};
75 LocalizationInfo() : refcount(0) {}
77 LocalizationInfo
* ref(void) {
82 LocalizationInfo
* unref(void) {
83 if (refcount
&& --refcount
== 0) {
89 virtual UBool
operator==(const LocalizationInfo
* rhs
) const;
90 inline UBool
operator!=(const LocalizationInfo
* rhs
) const { return !operator==(rhs
); }
92 virtual int32_t getNumberOfRuleSets(void) const = 0;
93 virtual const UChar
* getRuleSetName(int32_t index
) const = 0;
94 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
95 virtual const UChar
* getLocaleName(int32_t index
) const = 0;
96 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const = 0;
98 virtual int32_t indexForLocale(const UChar
* locale
) const;
99 virtual int32_t indexForRuleSet(const UChar
* ruleset
) const;
101 // virtual UClassID getDynamicClassID() const = 0;
102 // static UClassID getStaticClassID(void);
105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
107 // if both strings are NULL, this returns TRUE
109 streq(const UChar
* lhs
, const UChar
* rhs
) {
114 return u_strcmp(lhs
, rhs
) == 0;
120 LocalizationInfo::operator==(const LocalizationInfo
* rhs
) const {
126 int32_t rsc
= getNumberOfRuleSets();
127 if (rsc
== rhs
->getNumberOfRuleSets()) {
128 for (int i
= 0; i
< rsc
; ++i
) {
129 if (!streq(getRuleSetName(i
), rhs
->getRuleSetName(i
))) {
133 int32_t dlc
= getNumberOfDisplayLocales();
134 if (dlc
== rhs
->getNumberOfDisplayLocales()) {
135 for (int i
= 0; i
< dlc
; ++i
) {
136 const UChar
* locale
= getLocaleName(i
);
137 int32_t ix
= rhs
->indexForLocale(locale
);
138 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
139 if (!streq(locale
, rhs
->getLocaleName(ix
))) {
142 for (int j
= 0; j
< rsc
; ++j
) {
143 if (!streq(getDisplayName(i
, j
), rhs
->getDisplayName(ix
, j
))) {
156 LocalizationInfo::indexForLocale(const UChar
* locale
) const {
157 for (int i
= 0; i
< getNumberOfDisplayLocales(); ++i
) {
158 if (streq(locale
, getLocaleName(i
))) {
166 LocalizationInfo::indexForRuleSet(const UChar
* ruleset
) const {
168 for (int i
= 0; i
< getNumberOfRuleSets(); ++i
) {
169 if (streq(ruleset
, getRuleSetName(i
))) {
178 typedef void (*Fn_Deleter
)(void*);
186 VArray() : buf(NULL
), cap(0), size(0), deleter(NULL
) {}
188 VArray(Fn_Deleter del
) : buf(NULL
), cap(0), size(0), deleter(del
) {}
192 for (int i
= 0; i
< size
; ++i
) {
203 void add(void* elem
, UErrorCode
& status
) {
204 if (U_SUCCESS(status
)) {
208 } else if (cap
< 256) {
214 buf
= (void**)uprv_malloc(cap
* sizeof(void*));
216 buf
= (void**)uprv_realloc(buf
, cap
* sizeof(void*));
219 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
220 status
= U_MEMORY_ALLOCATION_ERROR
;
223 void* start
= &buf
[size
];
224 size_t count
= (cap
- size
) * sizeof(void*);
225 uprv_memset(start
, 0, count
); // fill with nulls, just because
231 void** release(void) {
242 class StringLocalizationInfo
: public LocalizationInfo
{
248 friend class LocDataParser
;
250 StringLocalizationInfo(UChar
* i
, UChar
*** d
, int32_t numRS
, int32_t numLocs
)
251 : info(i
), data(d
), numRuleSets(numRS
), numLocales(numLocs
)
256 static StringLocalizationInfo
* create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
);
258 virtual ~StringLocalizationInfo();
259 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets
; }
260 virtual const UChar
* getRuleSetName(int32_t index
) const;
261 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales
; }
262 virtual const UChar
* getLocaleName(int32_t index
) const;
263 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const;
265 // virtual UClassID getDynamicClassID() const;
266 // static UClassID getStaticClassID(void);
269 void init(UErrorCode
& status
) const;
274 OPEN_ANGLE
= 0x003c, /* '<' */
275 CLOSE_ANGLE
= 0x003e, /* '>' */
283 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
285 class LocDataParser
{
294 LocDataParser(UParseError
& parseError
, UErrorCode
& status
)
295 : data(NULL
), e(NULL
), p(NULL
), ch(0xffff), pe(parseError
), ec(status
) {}
299 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
300 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
302 StringLocalizationInfo
* parse(UChar
* data
, int32_t len
);
306 void inc(void) { ++p
; ch
= 0xffff; }
307 UBool
checkInc(UChar c
) { if (p
< e
&& (ch
== c
|| *p
== c
)) { inc(); return TRUE
; } return FALSE
; }
308 UBool
check(UChar c
) { return p
< e
&& (ch
== c
|| *p
== c
); }
309 void skipWhitespace(void) { while (p
< e
&& uprv_isRuleWhiteSpace(ch
!= 0xffff ? ch
: *p
)) inc();}
310 UBool
inList(UChar c
, const UChar
* list
) const {
311 if (*list
== SPACE
&& uprv_isRuleWhiteSpace(c
)) return TRUE
;
312 while (*list
&& *list
!= c
) ++list
; return *list
== c
;
314 void parseError(const char* msg
);
316 StringLocalizationInfo
* doParse(void);
318 UChar
** nextArray(int32_t& requiredLength
);
319 UChar
* nextString(void);
323 #define ERROR(msg) parseError(msg); return NULL;
325 #define ERROR(msg) parseError(NULL); return NULL;
329 static const UChar DQUOTE_STOPLIST
[] = {
333 static const UChar SQUOTE_STOPLIST
[] = {
337 static const UChar NOQUOTE_STOPLIST
[] = {
338 SPACE
, COMMA
, CLOSE_ANGLE
, OPEN_ANGLE
, TICK
, QUOTE
, 0
346 StringLocalizationInfo
*
347 LocDataParser::parse(UChar
* _data
, int32_t len
) {
349 if (_data
) uprv_free(_data
);
355 pe
.postContext
[0] = 0;
356 pe
.preContext
[0] = 0;
359 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
364 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
378 StringLocalizationInfo
*
379 LocDataParser::doParse(void) {
381 if (!checkInc(OPEN_ANGLE
)) {
382 ERROR("Missing open angle");
384 VArray
array(DeleteFn
);
385 UBool mightHaveNext
= TRUE
;
386 int32_t requiredLength
= -1;
387 while (mightHaveNext
) {
388 mightHaveNext
= FALSE
;
389 UChar
** elem
= nextArray(requiredLength
);
391 UBool haveComma
= check(COMMA
);
396 mightHaveNext
= TRUE
;
398 } else if (haveComma
) {
399 ERROR("Unexpected character");
404 if (!checkInc(CLOSE_ANGLE
)) {
405 if (check(OPEN_ANGLE
)) {
406 ERROR("Missing comma in outer array");
408 ERROR("Missing close angle bracket in outer array");
414 ERROR("Extra text after close of localization data");
419 int32_t numLocs
= array
.length() - 2; // subtract first, NULL
420 UChar
*** result
= (UChar
***)array
.release();
422 return new StringLocalizationInfo(data
, result
, requiredLength
-2, numLocs
); // subtract first, NULL
426 ERROR("Unknown error");
430 LocDataParser::nextArray(int32_t& requiredLength
) {
436 if (!checkInc(OPEN_ANGLE
)) {
437 ERROR("Missing open angle");
441 UBool mightHaveNext
= TRUE
;
442 while (mightHaveNext
) {
443 mightHaveNext
= FALSE
;
444 UChar
* elem
= nextString();
446 UBool haveComma
= check(COMMA
);
451 mightHaveNext
= TRUE
;
453 } else if (haveComma
) {
454 ERROR("Unexpected comma");
458 if (!checkInc(CLOSE_ANGLE
)) {
459 if (check(OPEN_ANGLE
)) {
460 ERROR("Missing close angle bracket in inner array");
462 ERROR("Missing comma in inner array");
468 if (requiredLength
== -1) {
469 requiredLength
= array
.length() + 1;
470 } else if (array
.length() != requiredLength
) {
471 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
472 ERROR("Array not of required length");
475 return (UChar
**)array
.release();
477 ERROR("Unknown Error");
481 LocDataParser::nextString() {
482 UChar
* result
= NULL
;
486 const UChar
* terminators
;
488 UBool haveQuote
= c
== QUOTE
|| c
== TICK
;
491 terminators
= c
== QUOTE
? DQUOTE_STOPLIST
: SQUOTE_STOPLIST
;
493 terminators
= NOQUOTE_STOPLIST
;
496 while (p
< e
&& !inList(*p
, terminators
)) ++p
;
498 ERROR("Unexpected end of data");
504 *p
= 0x0; // terminate by writing to data
505 result
= start
; // just point into data
509 ERROR("Missing matching quote");
510 } else if (p
== start
) {
511 ERROR("Empty string");
514 } else if (x
== OPEN_ANGLE
|| x
== TICK
|| x
== QUOTE
) {
515 ERROR("Unexpected character in string");
519 // ok for there to be no next string
524 LocDataParser::parseError(const char* /*str*/) {
529 const UChar
* start
= p
- U_PARSE_CONTEXT_LEN
- 1;
533 for (UChar
* x
= p
; --x
>= start
;) {
539 const UChar
* limit
= p
+ U_PARSE_CONTEXT_LEN
- 1;
543 u_strncpy(pe
.preContext
, start
, (int32_t)(p
-start
));
544 pe
.preContext
[p
-start
] = 0;
545 u_strncpy(pe
.postContext
, p
, (int32_t)(limit
-p
));
546 pe
.postContext
[limit
-p
] = 0;
547 pe
.offset
= (int32_t)(p
- data
);
550 fprintf(stderr
, "%s at or near character %d: ", str
, p
-data
);
553 msg
.append(start
, p
- start
);
554 msg
.append((UChar
)0x002f); /* SOLIDUS/SLASH */
555 msg
.append(p
, limit
-p
);
559 int32_t len
= msg
.extract(0, msg
.length(), buf
, 128);
565 fprintf(stderr
, "%s\n", buf
);
579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
581 StringLocalizationInfo
*
582 StringLocalizationInfo::create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
) {
583 if (U_FAILURE(status
)) {
587 int32_t len
= info
.length();
589 return NULL
; // no error;
592 UChar
* p
= (UChar
*)uprv_malloc(len
* sizeof(UChar
));
594 status
= U_MEMORY_ALLOCATION_ERROR
;
597 info
.extract(p
, len
, status
);
598 if (!U_FAILURE(status
)) {
599 status
= U_ZERO_ERROR
; // clear warning about non-termination
602 LocDataParser
parser(perror
, status
);
603 return parser
.parse(p
, len
);
606 StringLocalizationInfo::~StringLocalizationInfo() {
607 for (UChar
*** p
= (UChar
***)data
; *p
; ++p
) {
608 // remaining data is simply pointer into our unicode string data.
609 if (*p
) uprv_free(*p
);
611 if (data
) uprv_free(data
);
612 if (info
) uprv_free(info
);
617 StringLocalizationInfo::getRuleSetName(int32_t index
) const {
618 if (index
>= 0 && index
< getNumberOfRuleSets()) {
619 return data
[0][index
];
625 StringLocalizationInfo::getLocaleName(int32_t index
) const {
626 if (index
>= 0 && index
< getNumberOfDisplayLocales()) {
627 return data
[index
+1][0];
633 StringLocalizationInfo::getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const {
634 if (localeIndex
>= 0 && localeIndex
< getNumberOfDisplayLocales() &&
635 ruleIndex
>= 0 && ruleIndex
< getNumberOfRuleSets()) {
636 return data
[localeIndex
+1][ruleIndex
+1];
643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
644 const UnicodeString
& locs
,
645 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
647 , defaultRuleSet(NULL
)
650 , decimalFormatSymbols(NULL
)
652 , lenientParseRules(NULL
)
653 , localizations(NULL
)
655 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
656 init(description
, locinfo
, perror
, status
);
659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
660 const UnicodeString
& locs
,
661 UParseError
& perror
, UErrorCode
& status
)
663 , defaultRuleSet(NULL
)
664 , locale(Locale::getDefault())
666 , decimalFormatSymbols(NULL
)
668 , lenientParseRules(NULL
)
669 , localizations(NULL
)
671 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
672 init(description
, locinfo
, perror
, status
);
675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
676 LocalizationInfo
* info
,
677 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
679 , defaultRuleSet(NULL
)
682 , decimalFormatSymbols(NULL
)
684 , lenientParseRules(NULL
)
685 , localizations(NULL
)
687 init(description
, info
, perror
, status
);
690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
694 , defaultRuleSet(NULL
)
695 , locale(Locale::getDefault())
697 , decimalFormatSymbols(NULL
)
699 , lenientParseRules(NULL
)
700 , localizations(NULL
)
702 init(description
, NULL
, perror
, status
);
705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
706 const Locale
& aLocale
,
710 , defaultRuleSet(NULL
)
713 , decimalFormatSymbols(NULL
)
715 , lenientParseRules(NULL
)
716 , localizations(NULL
)
718 init(description
, NULL
, perror
, status
);
721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag
, const Locale
& alocale
, UErrorCode
& status
)
723 , defaultRuleSet(NULL
)
726 , decimalFormatSymbols(NULL
)
728 , lenientParseRules(NULL
)
729 , localizations(NULL
)
731 if (U_FAILURE(status
)) {
735 const char* fmt_tag
= "";
737 case URBNF_SPELLOUT
: fmt_tag
= "SpelloutRules"; break;
738 case URBNF_ORDINAL
: fmt_tag
= "OrdinalRules"; break;
739 case URBNF_DURATION
: fmt_tag
= "DurationRules"; break;
740 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
743 // TODO: read localization info from resource
744 LocalizationInfo
* locinfo
= NULL
;
747 UResourceBundle
* nfrb
= ures_open(U_ICUDATA_RBNF
, locale
.getName(), &status
);
748 if (U_SUCCESS(status
)) {
749 setLocaleIDs(ures_getLocaleByType(nfrb
, ULOC_VALID_LOCALE
, &status
),
750 ures_getLocaleByType(nfrb
, ULOC_ACTUAL_LOCALE
, &status
));
751 const UChar
* description
= ures_getStringByKey(nfrb
, fmt_tag
, &len
, &status
);
752 UnicodeString
desc(description
, len
);
754 init (desc
, locinfo
, perror
, status
);
759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat
& rhs
)
762 , defaultRuleSet(NULL
)
765 , decimalFormatSymbols(NULL
)
767 , lenientParseRules(NULL
)
768 , localizations(NULL
)
770 this->operator=(rhs
);
775 RuleBasedNumberFormat
&
776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat
& rhs
)
778 UErrorCode status
= U_ZERO_ERROR
;
781 lenient
= rhs
.lenient
;
783 UnicodeString rules
= rhs
.getRules();
785 init(rules
, rhs
.localizations
? rhs
.localizations
->ref() : NULL
, perror
, status
);
790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
796 RuleBasedNumberFormat::clone(void) const
798 RuleBasedNumberFormat
* result
= NULL
;
799 UnicodeString rules
= getRules();
800 UErrorCode status
= U_ZERO_ERROR
;
802 result
= new RuleBasedNumberFormat(rules
, localizations
, locale
, perror
, status
);
805 status
= U_MEMORY_ALLOCATION_ERROR
;
808 if (U_FAILURE(status
)) {
812 result
->lenient
= lenient
;
818 RuleBasedNumberFormat::operator==(const Format
& other
) const
820 if (this == &other
) {
824 if (other
.getDynamicClassID() == getStaticClassID()) {
825 const RuleBasedNumberFormat
& rhs
= (const RuleBasedNumberFormat
&)other
;
826 if (locale
== rhs
.locale
&&
827 lenient
== rhs
.lenient
&&
828 (localizations
== NULL
829 ? rhs
.localizations
== NULL
830 : (rhs
.localizations
== NULL
832 : *localizations
== rhs
.localizations
))) {
834 NFRuleSet
** p
= ruleSets
;
835 NFRuleSet
** q
= rhs
.ruleSets
;
838 } else if (q
== NULL
) {
841 while (*p
&& *q
&& (**p
== **q
)) {
845 return *q
== NULL
&& *p
== NULL
;
853 RuleBasedNumberFormat::getRules() const
855 UnicodeString result
;
856 if (ruleSets
!= NULL
) {
857 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
858 (*p
)->appendRules(result
);
865 RuleBasedNumberFormat::getRuleSetName(int32_t index
) const
868 UnicodeString
string(TRUE
, localizations
->getRuleSetName(index
), (int32_t)-1);
870 } else if (ruleSets
) {
871 UnicodeString result
;
872 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
874 if (rs
->isPublic()) {
887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
891 result
= localizations
->getNumberOfRuleSets();
892 } else if (ruleSets
) {
893 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
894 if ((**p
).isPublic()) {
903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
905 return localizations
->getNumberOfDisplayLocales();
911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index
, UErrorCode
& status
) const {
912 if (U_FAILURE(status
)) {
915 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfDisplayLocales()) {
916 UnicodeString
name(TRUE
, localizations
->getLocaleName(index
), -1);
918 int32_t cap
= name
.length() + 1;
921 bp
= (char *)uprv_malloc(cap
);
923 status
= U_MEMORY_ALLOCATION_ERROR
;
927 name
.extract(0, name
.length(), bp
, cap
, UnicodeString::kInvariant
);
928 Locale
retLocale(bp
);
934 status
= U_ILLEGAL_ARGUMENT_ERROR
;
940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index
, const Locale
& localeParam
) {
941 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfRuleSets()) {
942 UnicodeString
localeName(localeParam
.getBaseName(), -1, UnicodeString::kInvariant
);
943 int32_t len
= localeName
.length();
944 UChar
* localeStr
= localeName
.getBuffer(len
+ 1);
947 int32_t ix
= localizations
->indexForLocale(localeStr
);
949 UnicodeString
name(TRUE
, localizations
->getDisplayName(ix
, index
), -1);
953 // trim trailing portion, skipping over ommitted sections
954 do { --len
;} while (len
> 0 && localeStr
[len
] != 0x005f); // underscore
955 while (len
> 0 && localeStr
[len
-1] == 0x005F) --len
;
957 UnicodeString
name(TRUE
, localizations
->getRuleSetName(index
), -1);
966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString
& ruleSetName
, const Locale
& localeParam
) {
968 UnicodeString
rsn(ruleSetName
);
969 int32_t ix
= localizations
->indexForRuleSet(rsn
.getTerminatedBuffer());
970 return getRuleSetDisplayName(ix
, localeParam
);
978 RuleBasedNumberFormat::findRuleSet(const UnicodeString
& name
, UErrorCode
& status
) const
980 if (U_SUCCESS(status
) && ruleSets
) {
981 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
983 if (rs
->isNamed(name
)) {
987 status
= U_ILLEGAL_ARGUMENT_ERROR
;
993 RuleBasedNumberFormat::format(int32_t number
,
994 UnicodeString
& toAppendTo
,
995 FieldPosition
& /* pos */) const
997 if (defaultRuleSet
) defaultRuleSet
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1003 RuleBasedNumberFormat::format(int64_t number
,
1004 UnicodeString
& toAppendTo
,
1005 FieldPosition
& /* pos */) const
1007 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1013 RuleBasedNumberFormat::format(double number
,
1014 UnicodeString
& toAppendTo
,
1015 FieldPosition
& /* pos */) const
1017 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1023 RuleBasedNumberFormat::format(int32_t number
,
1024 const UnicodeString
& ruleSetName
,
1025 UnicodeString
& toAppendTo
,
1026 FieldPosition
& /* pos */,
1027 UErrorCode
& status
) const
1029 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030 if (U_SUCCESS(status
)) {
1031 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1032 // throw new IllegalArgumentException("Can't use internal rule set");
1033 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1035 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1037 rs
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1046 RuleBasedNumberFormat::format(int64_t number
,
1047 const UnicodeString
& ruleSetName
,
1048 UnicodeString
& toAppendTo
,
1049 FieldPosition
& /* pos */,
1050 UErrorCode
& status
) const
1052 if (U_SUCCESS(status
)) {
1053 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1054 // throw new IllegalArgumentException("Can't use internal rule set");
1055 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1057 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1059 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1067 // make linker happy
1069 RuleBasedNumberFormat::format(const Formattable
& obj
,
1070 UnicodeString
& toAppendTo
,
1072 UErrorCode
& status
) const
1074 return NumberFormat::format(obj
, toAppendTo
, pos
, status
);
1078 RuleBasedNumberFormat::format(double number
,
1079 const UnicodeString
& ruleSetName
,
1080 UnicodeString
& toAppendTo
,
1081 FieldPosition
& /* pos */,
1082 UErrorCode
& status
) const
1084 if (U_SUCCESS(status
)) {
1085 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1086 // throw new IllegalArgumentException("Can't use internal rule set");
1087 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1089 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1091 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1099 RuleBasedNumberFormat::parse(const UnicodeString
& text
,
1100 Formattable
& result
,
1101 ParsePosition
& parsePosition
) const
1104 parsePosition
.setErrorIndex(0);
1108 UnicodeString
workingText(text
, parsePosition
.getIndex());
1109 ParsePosition
workingPos(0);
1111 ParsePosition
high_pp(0);
1112 Formattable high_result
;
1114 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1116 if (rp
->isPublic()) {
1117 ParsePosition
working_pp(0);
1118 Formattable working_result
;
1120 rp
->parse(workingText
, working_pp
, kMaxDouble
, working_result
);
1121 if (working_pp
.getIndex() > high_pp
.getIndex()) {
1122 high_pp
= working_pp
;
1123 high_result
= working_result
;
1125 if (high_pp
.getIndex() == workingText
.length()) {
1132 parsePosition
.setIndex(parsePosition
.getIndex() + high_pp
.getIndex());
1133 if (high_pp
.getIndex() > 0) {
1134 parsePosition
.setErrorIndex(-1);
1136 result
= high_result
;
1137 if (result
.getType() == Formattable::kDouble
) {
1138 int32_t r
= (int32_t)result
.getDouble();
1139 if ((double)r
== result
.getDouble()) {
1145 #if !UCONFIG_NO_COLLATION
1148 RuleBasedNumberFormat::setLenient(UBool enabled
)
1151 if (!enabled
&& collator
) {
1160 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString
& ruleSetName
, UErrorCode
& status
) {
1161 if (U_SUCCESS(status
)) {
1162 if (ruleSetName
.isEmpty()) {
1163 if (localizations
) {
1164 UnicodeString
name(TRUE
, localizations
->getRuleSetName(0), -1);
1165 defaultRuleSet
= findRuleSet(name
, status
);
1167 initDefaultRuleSet();
1169 } else if (ruleSetName
.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1170 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1172 NFRuleSet
* result
= findRuleSet(ruleSetName
, status
);
1173 if (result
!= NULL
) {
1174 defaultRuleSet
= result
;
1181 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1182 UnicodeString result
;
1183 if (defaultRuleSet
&& defaultRuleSet
->isPublic()) {
1184 defaultRuleSet
->getName(result
);
1186 result
.setToBogus();
1192 RuleBasedNumberFormat::initDefaultRuleSet()
1194 defaultRuleSet
= NULL
;
1199 NFRuleSet
**p
= &ruleSets
[0];
1204 defaultRuleSet
= *--p
;
1205 if (!defaultRuleSet
->isPublic()) {
1206 while (p
!= ruleSets
) {
1207 if ((*--p
)->isPublic()) {
1208 defaultRuleSet
= *p
;
1217 RuleBasedNumberFormat::init(const UnicodeString
& rules
, LocalizationInfo
* localizationInfos
,
1218 UParseError
& pErr
, UErrorCode
& status
)
1220 // TODO: implement UParseError
1221 uprv_memset(&pErr
, 0, sizeof(UParseError
));
1222 // Note: this can leave ruleSets == NULL, so remaining code should check
1223 if (U_FAILURE(status
)) {
1227 this->localizations
= localizationInfos
== NULL
? NULL
: localizationInfos
->ref();
1229 UnicodeString
description(rules
);
1230 if (!description
.length()) {
1231 status
= U_MEMORY_ALLOCATION_ERROR
;
1235 // start by stripping the trailing whitespace from all the rules
1236 // (this is all the whitespace follwing each semicolon in the
1237 // description). This allows us to look for rule-set boundaries
1238 // by searching for ";%" without having to worry about whitespace
1239 // between the ; and the %
1240 stripWhitespace(description
);
1242 // check to see if there's a set of lenient-parse rules. If there
1243 // is, pull them out into our temporary holding place for them,
1244 // and delete them from the description before the real desciption-
1245 // parsing code sees them
1246 int32_t lp
= description
.indexOf(gLenientParse
);
1248 // we've got to make sure we're not in the middle of a rule
1249 // (where "%%lenient-parse" would actually get treated as
1251 if (lp
== 0 || description
.charAt(lp
- 1) == gSemiColon
) {
1252 // locate the beginning and end of the actual collation
1253 // rules (there may be whitespace between the name and
1254 // the first token in the description)
1255 int lpEnd
= description
.indexOf(gSemiPercent
, lp
);
1258 lpEnd
= description
.length() - 1;
1260 int lpStart
= lp
+ u_strlen(gLenientParse
);
1261 while (uprv_isRuleWhiteSpace(description
.charAt(lpStart
))) {
1265 // copy out the lenient-parse rules and delete them
1266 // from the description
1267 lenientParseRules
= new UnicodeString();
1269 if (lenientParseRules
== 0) {
1270 status
= U_MEMORY_ALLOCATION_ERROR
;
1273 lenientParseRules
->setTo(description
, lpStart
, lpEnd
- lpStart
);
1275 description
.remove(lp
, lpEnd
+ 1 - lp
);
1279 // pre-flight parsing the description and count the number of
1280 // rule sets (";%" marks the end of one rule set and the beginning
1282 int numRuleSets
= 0;
1283 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, p
)) {
1289 // our rule list is an array of the appropriate size
1290 ruleSets
= (NFRuleSet
**)uprv_malloc((numRuleSets
+ 1) * sizeof(NFRuleSet
*));
1292 if (ruleSets
== 0) {
1293 status
= U_MEMORY_ALLOCATION_ERROR
;
1297 for (int i
= 0; i
<= numRuleSets
; ++i
) {
1301 // divide up the descriptions into individual rule-set descriptions
1302 // and store them in a temporary array. At each step, we also
1303 // new up a rule set, but all this does is initialize its name
1304 // and remove it from its description. We can't actually parse
1305 // the rest of the descriptions and finish initializing everything
1306 // because we have to know the names and locations of all the rule
1307 // sets before we can actually set everything up
1309 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1312 UnicodeString
* ruleSetDescriptions
= new UnicodeString
[numRuleSets
];
1314 if (ruleSetDescriptions
== 0) {
1315 status
= U_MEMORY_ALLOCATION_ERROR
;
1322 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, start
)) {
1323 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, p
+ 1 - start
);
1324 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1326 if (ruleSets
[curRuleSet
] == 0) {
1327 status
= U_MEMORY_ALLOCATION_ERROR
;
1333 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, description
.length() - start
);
1334 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1336 if (ruleSets
[curRuleSet
] == 0) {
1337 status
= U_MEMORY_ALLOCATION_ERROR
;
1342 // now we can take note of the formatter's default rule set, which
1343 // is the last public rule set in the description (it's the last
1344 // rather than the first so that a user can create a new formatter
1345 // from an existing formatter and change its default behavior just
1346 // by appending more rule sets to the end)
1348 // {dlf} Initialization of a fraction rule set requires the default rule
1349 // set to be known. For purposes of initialization, this is always the
1350 // last public rule set, no matter what the localization data says.
1351 initDefaultRuleSet();
1353 // finally, we can go back through the temporary descriptions
1354 // list and finish seting up the substructure (and we throw
1355 // away the temporary descriptions as we go)
1357 for (int i
= 0; i
< numRuleSets
; i
++) {
1358 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1362 delete[] ruleSetDescriptions
;
1364 // Now that the rules are initialized, the 'real' default rule
1365 // set can be adjusted by the localization data.
1367 // The C code keeps the localization array as is, rather than building
1368 // a separate array of the public rule set names, so we have less work
1369 // to do here-- but we still need to check the names.
1371 if (localizationInfos
) {
1372 // confirm the names, if any aren't in the rules, that's an error
1373 // it is ok if the rules contain public rule sets that are not in this list
1374 for (int32_t i
= 0; i
< localizationInfos
->getNumberOfRuleSets(); ++i
) {
1375 UnicodeString
name(TRUE
, localizationInfos
->getRuleSetName(i
), -1);
1376 NFRuleSet
* rs
= findRuleSet(name
, status
);
1381 defaultRuleSet
= rs
;
1385 defaultRuleSet
= getDefaultRuleSet();
1390 RuleBasedNumberFormat::stripWhitespace(UnicodeString
& description
)
1392 // iterate through the characters...
1393 UnicodeString result
;
1396 while (start
!= -1 && start
< description
.length()) {
1397 // seek to the first non-whitespace character...
1398 while (start
< description
.length()
1399 && uprv_isRuleWhiteSpace(description
.charAt(start
))) {
1403 // locate the next semicolon in the text and copy the text from
1404 // our current position up to that semicolon into the result
1405 int32_t p
= description
.indexOf(gSemiColon
, start
);
1407 // or if we don't find a semicolon, just copy the rest of
1408 // the string into the result
1409 result
.append(description
, start
, description
.length() - start
);
1412 else if (p
< description
.length()) {
1413 result
.append(description
, start
, p
+ 1 - start
);
1417 // when we get here, we've seeked off the end of the sring, and
1418 // we terminate the loop (we continue until *start* is -1 rather
1419 // than until *p* is -1, because otherwise we'd miss the last
1420 // rule in the description)
1426 description
.setTo(result
);
1431 RuleBasedNumberFormat::dispose()
1434 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1437 uprv_free(ruleSets
);
1441 #if !UCONFIG_NO_COLLATION
1446 delete decimalFormatSymbols
;
1447 decimalFormatSymbols
= NULL
;
1449 delete lenientParseRules
;
1450 lenientParseRules
= NULL
;
1452 if (localizations
) localizations
= localizations
->unref();
1456 //-----------------------------------------------------------------------
1457 // package-internal API
1458 //-----------------------------------------------------------------------
1461 * Returns the collator to use for lenient parsing. The collator is lazily created:
1462 * this function creates it the first time it's called.
1463 * @return The collator to use for lenient parsing, or null if lenient parsing
1467 RuleBasedNumberFormat::getCollator() const
1469 #if !UCONFIG_NO_COLLATION
1474 // lazy-evaulate the collator
1475 if (collator
== NULL
&& lenient
) {
1476 // create a default collator based on the formatter's locale,
1477 // then pull out that collator's rules, append any additional
1478 // rules specified in the description, and create a _new_
1479 // collator based on the combinaiton of those rules
1481 UErrorCode status
= U_ZERO_ERROR
;
1483 Collator
* temp
= Collator::createInstance(locale
, status
);
1484 if (U_SUCCESS(status
) &&
1485 temp
->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1487 RuleBasedCollator
* newCollator
= (RuleBasedCollator
*)temp
;
1488 if (lenientParseRules
) {
1489 UnicodeString
rules(newCollator
->getRules());
1490 rules
.append(*lenientParseRules
);
1492 newCollator
= new RuleBasedCollator(rules
, status
);
1496 if (U_SUCCESS(status
)) {
1497 newCollator
->setAttribute(UCOL_DECOMPOSITION_MODE
, UCOL_ON
, status
);
1499 ((RuleBasedNumberFormat
*)this)->collator
= newCollator
;
1508 // if lenient-parse mode is off, this will be null
1509 // (see setLenientParseMode())
1515 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1516 * instances owned by this formatter. This object is lazily created: this function
1517 * creates it the first time it's called.
1518 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1519 * instances owned by this formatter.
1521 DecimalFormatSymbols
*
1522 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1524 // lazy-evaluate the DecimalFormatSymbols object. This object
1525 // is shared by all DecimalFormat instances belonging to this
1527 if (decimalFormatSymbols
== NULL
) {
1528 UErrorCode status
= U_ZERO_ERROR
;
1529 DecimalFormatSymbols
* temp
= new DecimalFormatSymbols(locale
, status
);
1530 if (U_SUCCESS(status
)) {
1531 ((RuleBasedNumberFormat
*)this)->decimalFormatSymbols
= temp
;
1536 return decimalFormatSymbols
;