2 *******************************************************************************
3 * Copyright (C) 1997-2004, International Business Machines Corporation and others. All Rights Reserved.
4 *******************************************************************************
7 #include "unicode/rbnf.h"
11 #include "unicode/normlzr.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/uchar.h"
14 #include "unicode/ucol.h"
15 #include "unicode/uloc.h"
16 #include "unicode/unum.h"
17 #include "unicode/ures.h"
18 #include "unicode/ustring.h"
19 #include "unicode/utf16.h"
20 #include "unicode/udata.h"
34 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
36 static const UChar gPercentPercent
[] =
41 // All urbnf objects are created through openRules, so we init all of the
42 // Unicode string constants required by rbnf, nfrs, or nfr here.
43 static const UChar gLenientParse
[] =
45 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
46 }; /* "%%lenient-parse:" */
47 static const UChar gSemiColon
= 0x003B;
48 static const UChar gSemiPercent
[] =
53 #define kSomeNumberOfBitsDiv2 22
54 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
55 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat
)
61 class LocalizationInfo
: public UObject
{
63 virtual ~LocalizationInfo() {};
67 LocalizationInfo() : refcount(0) {}
69 LocalizationInfo
* ref(void) {
74 LocalizationInfo
* unref(void) {
75 if (refcount
&& --refcount
== 0) {
81 virtual UBool
operator==(const LocalizationInfo
* rhs
) const;
82 inline UBool
operator!=(const LocalizationInfo
* rhs
) const { return !operator==(rhs
); }
84 virtual int32_t getNumberOfRuleSets(void) const = 0;
85 virtual const UChar
* getRuleSetName(int32_t index
) const = 0;
86 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
87 virtual const UChar
* getLocaleName(int32_t index
) const = 0;
88 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const = 0;
90 virtual int32_t indexForLocale(const UChar
* locale
) const;
91 virtual int32_t indexForRuleSet(const UChar
* ruleset
) const;
93 virtual UClassID
getDynamicClassID() const = 0;
94 static UClassID
getStaticClassID(void);
97 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo
)
99 // if both strings are NULL, this returns TRUE
101 streq(const UChar
* lhs
, const UChar
* rhs
) {
106 return u_strcmp(lhs
, rhs
) == 0;
112 LocalizationInfo::operator==(const LocalizationInfo
* rhs
) const {
118 int32_t rsc
= getNumberOfRuleSets();
119 if (rsc
== rhs
->getNumberOfRuleSets()) {
120 for (int i
= 0; i
< rsc
; ++i
) {
121 if (!streq(getRuleSetName(i
), rhs
->getRuleSetName(i
))) {
125 int32_t dlc
= getNumberOfDisplayLocales();
126 if (dlc
== rhs
->getNumberOfDisplayLocales()) {
127 for (int i
= 0; i
< dlc
; ++i
) {
128 const UChar
* locale
= getLocaleName(i
);
129 int32_t ix
= rhs
->indexForLocale(locale
);
130 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
131 if (!streq(locale
, rhs
->getLocaleName(ix
))) {
134 for (int j
= 0; j
< rsc
; ++j
) {
135 if (!streq(getDisplayName(i
, j
), rhs
->getDisplayName(ix
, j
))) {
148 LocalizationInfo::indexForLocale(const UChar
* locale
) const {
149 for (int i
= 0; i
< getNumberOfDisplayLocales(); ++i
) {
150 if (streq(locale
, getLocaleName(i
))) {
158 LocalizationInfo::indexForRuleSet(const UChar
* ruleset
) const {
160 for (int i
= 0; i
< getNumberOfRuleSets(); ++i
) {
161 if (streq(ruleset
, getRuleSetName(i
))) {
170 typedef void (*Fn_Deleter
)(void*);
178 VArray() : buf(NULL
), cap(0), size(0), deleter(NULL
) {}
180 VArray(Fn_Deleter del
) : buf(NULL
), cap(0), size(0), deleter(del
) {}
184 for (int i
= 0; i
< size
; ++i
) {
195 void add(void* elem
, UErrorCode
& status
) {
196 if (U_SUCCESS(status
)) {
200 } else if (cap
< 256) {
206 buf
= (void**)uprv_malloc(cap
* sizeof(void*));
208 buf
= (void**)uprv_realloc(buf
, cap
* sizeof(void*));
211 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
212 status
= U_MEMORY_ALLOCATION_ERROR
;
215 void* start
= &buf
[size
];
216 size_t count
= (cap
- size
) * sizeof(void*);
217 uprv_memset(start
, 0, count
); // fill with nulls, just because
223 void** release(void) {
234 class StringLocalizationInfo
: public LocalizationInfo
{
240 friend class LocDataParser
;
242 StringLocalizationInfo(UChar
* i
, UChar
*** d
, int32_t numRS
, int32_t numLocs
)
243 : info(i
), data(d
), numRuleSets(numRS
), numLocales(numLocs
)
248 static StringLocalizationInfo
* create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
);
250 virtual ~StringLocalizationInfo();
251 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets
; }
252 virtual const UChar
* getRuleSetName(int32_t index
) const;
253 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales
; }
254 virtual const UChar
* getLocaleName(int32_t index
) const;
255 virtual const UChar
* getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const;
257 virtual UClassID
getDynamicClassID() const;
258 static UClassID
getStaticClassID(void);
261 void init(UErrorCode
& status
) const;
266 OPEN_ANGLE
= 0x003c, /* '<' */
267 CLOSE_ANGLE
= 0x003e, /* '>' */
275 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
277 class LocDataParser
{
286 LocDataParser(UParseError
& parseError
, UErrorCode
& status
)
287 : data(NULL
), e(NULL
), p(NULL
), ch(0xffff), pe(parseError
), ec(status
) {}
291 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
292 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
294 StringLocalizationInfo
* parse(UChar
* data
, int32_t len
);
298 void inc(void) { ++p
; ch
= 0xffff; }
299 UBool
checkInc(UChar c
) { if (p
< e
&& (ch
== c
|| *p
== c
)) { inc(); return TRUE
; } return FALSE
; }
300 UBool
check(UChar c
) { return p
< e
&& (ch
== c
|| *p
== c
); }
301 void skipWhitespace(void) { while (p
< e
&& uprv_isRuleWhiteSpace(ch
!= 0xffff ? ch
: *p
)) inc();}
302 UBool
inList(UChar c
, const UChar
* list
) const {
303 if (*list
== SPACE
&& uprv_isRuleWhiteSpace(c
)) return TRUE
;
304 while (*list
&& *list
!= c
) ++list
; return *list
== c
;
306 void parseError(const char* msg
);
308 StringLocalizationInfo
* doParse(void);
310 UChar
** nextArray(int32_t& requiredLength
);
311 UChar
* nextString(void);
315 #define ERROR(msg) parseError(msg); return NULL;
317 #define ERROR(msg) parseError(NULL); return NULL;
321 static const UChar DQUOTE_STOPLIST
[] = {
325 static const UChar SQUOTE_STOPLIST
[] = {
329 static const UChar NOQUOTE_STOPLIST
[] = {
330 SPACE
, COMMA
, CLOSE_ANGLE
, OPEN_ANGLE
, TICK
, QUOTE
, 0
338 StringLocalizationInfo
*
339 LocDataParser::parse(UChar
* _data
, int32_t len
) {
341 if (_data
) uprv_free(_data
);
347 pe
.postContext
[0] = 0;
348 pe
.preContext
[0] = 0;
351 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
356 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
370 StringLocalizationInfo
*
371 LocDataParser::doParse(void) {
373 if (!checkInc(OPEN_ANGLE
)) {
374 ERROR("Missing open angle");
376 VArray
array(DeleteFn
);
377 UBool mightHaveNext
= TRUE
;
378 int32_t requiredLength
= -1;
379 while (mightHaveNext
) {
380 mightHaveNext
= FALSE
;
381 UChar
** elem
= nextArray(requiredLength
);
383 UBool haveComma
= check(COMMA
);
388 mightHaveNext
= TRUE
;
390 } else if (haveComma
) {
391 ERROR("Unexpected character");
396 if (!checkInc(CLOSE_ANGLE
)) {
397 if (check(OPEN_ANGLE
)) {
398 ERROR("Missing comma in outer array");
400 ERROR("Missing close angle bracket in outer array");
406 ERROR("Extra text after close of localization data");
411 int32_t numLocs
= array
.length() - 2; // subtract first, NULL
412 UChar
*** result
= (UChar
***)array
.release();
414 return new StringLocalizationInfo(data
, result
, requiredLength
-2, numLocs
); // subtract first, NULL
418 ERROR("Unknown error");
422 LocDataParser::nextArray(int32_t& requiredLength
) {
428 if (!checkInc(OPEN_ANGLE
)) {
429 ERROR("Missing open angle");
433 UBool mightHaveNext
= TRUE
;
434 while (mightHaveNext
) {
435 mightHaveNext
= FALSE
;
436 UChar
* elem
= nextString();
438 UBool haveComma
= check(COMMA
);
443 mightHaveNext
= TRUE
;
445 } else if (haveComma
) {
446 ERROR("Unexpected comma");
450 if (!checkInc(CLOSE_ANGLE
)) {
451 if (check(OPEN_ANGLE
)) {
452 ERROR("Missing close angle bracket in inner array");
454 ERROR("Missing comma in inner array");
460 if (requiredLength
== -1) {
461 requiredLength
= array
.length() + 1;
462 } else if (array
.length() != requiredLength
) {
463 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
464 ERROR("Array not of required length");
467 return (UChar
**)array
.release();
469 ERROR("Unknown Error");
473 LocDataParser::nextString() {
474 UChar
* result
= NULL
;
478 const UChar
* terminators
;
480 UBool haveQuote
= c
== QUOTE
|| c
== TICK
;
483 terminators
= c
== QUOTE
? DQUOTE_STOPLIST
: SQUOTE_STOPLIST
;
485 terminators
= NOQUOTE_STOPLIST
;
488 while (p
< e
&& !inList(*p
, terminators
)) ++p
;
490 ERROR("Unexpected end of data");
496 *p
= 0x0; // terminate by writing to data
497 result
= start
; // just point into data
501 ERROR("Missing matching quote");
502 } else if (p
== start
) {
503 ERROR("Empty string");
506 } else if (x
== OPEN_ANGLE
|| x
== TICK
|| x
== QUOTE
) {
507 ERROR("Unexpected character in string");
511 // ok for there to be no next string
516 LocDataParser::parseError(const char* /*str*/) {
521 const UChar
* start
= p
- U_PARSE_CONTEXT_LEN
- 1;
524 for (UChar
* x
= p
; --x
>= start
;)
529 const UChar
* limit
= p
+ U_PARSE_CONTEXT_LEN
- 1;
532 u_strncpy(pe
.preContext
, start
, p
-start
);
533 pe
.preContext
[p
-start
] = 0;
534 u_strncpy(pe
.postContext
, p
, limit
-p
);
535 pe
.postContext
[limit
-p
] = 0;
536 pe
.offset
= p
- data
;
539 fprintf(stderr
, "%s at or near character %d: ", str
, p
-data
);
542 msg
.append(start
, p
- start
);
543 msg
.append((UChar
)0x002f); /* SOLIDUS/SLASH */
544 msg
.append(p
, limit
-p
);
548 int32_t len
= msg
.extract(0, msg
.length(), buf
, 128);
554 fprintf(stderr
, "%s\n", buf
);
568 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo
)
570 StringLocalizationInfo
*
571 StringLocalizationInfo::create(const UnicodeString
& info
, UParseError
& perror
, UErrorCode
& status
) {
572 if (U_FAILURE(status
)) {
576 int32_t len
= info
.length();
578 return NULL
; // no error;
581 UChar
* p
= (UChar
*)uprv_malloc(len
* sizeof(UChar
));
583 status
= U_MEMORY_ALLOCATION_ERROR
;
586 info
.extract(p
, len
, status
);
587 if (!U_FAILURE(status
)) {
588 status
= U_ZERO_ERROR
; // clear warning about non-termination
591 LocDataParser
parser(perror
, status
);
592 return parser
.parse(p
, len
);
595 StringLocalizationInfo::~StringLocalizationInfo() {
596 for (UChar
*** p
= (UChar
***)data
; *p
; ++p
) {
597 // remaining data is simply pointer into our unicode string data.
598 if (*p
) uprv_free(*p
);
600 if (data
) uprv_free(data
);
601 if (info
) uprv_free(info
);
606 StringLocalizationInfo::getRuleSetName(int32_t index
) const {
607 if (index
>= 0 && index
< getNumberOfRuleSets()) {
608 return data
[0][index
];
614 StringLocalizationInfo::getLocaleName(int32_t index
) const {
615 if (index
>= 0 && index
< getNumberOfDisplayLocales()) {
616 return data
[index
+1][0];
622 StringLocalizationInfo::getDisplayName(int32_t localeIndex
, int32_t ruleIndex
) const {
623 if (localeIndex
>= 0 && localeIndex
< getNumberOfDisplayLocales() &&
624 ruleIndex
>= 0 && ruleIndex
< getNumberOfRuleSets()) {
625 return data
[localeIndex
+1][ruleIndex
+1];
632 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
633 const UnicodeString
& locs
,
634 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
636 , defaultRuleSet(NULL
)
639 , decimalFormatSymbols(NULL
)
641 , lenientParseRules(NULL
)
642 , localizations(NULL
)
644 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
645 init(description
, locinfo
, perror
, status
);
648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
649 const UnicodeString
& locs
,
650 UParseError
& perror
, UErrorCode
& status
)
652 , defaultRuleSet(NULL
)
653 , locale(Locale::getDefault())
655 , decimalFormatSymbols(NULL
)
657 , lenientParseRules(NULL
)
658 , localizations(NULL
)
660 LocalizationInfo
* locinfo
= StringLocalizationInfo::create(locs
, perror
, status
);
661 init(description
, locinfo
, perror
, status
);
664 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
665 LocalizationInfo
* info
,
666 const Locale
& alocale
, UParseError
& perror
, UErrorCode
& status
)
668 , defaultRuleSet(NULL
)
671 , decimalFormatSymbols(NULL
)
673 , lenientParseRules(NULL
)
674 , localizations(NULL
)
676 init(description
, info
, perror
, status
);
679 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
683 , defaultRuleSet(NULL
)
684 , locale(Locale::getDefault())
686 , decimalFormatSymbols(NULL
)
688 , lenientParseRules(NULL
)
689 , localizations(NULL
)
691 init(description
, NULL
, perror
, status
);
694 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString
& description
,
695 const Locale
& aLocale
,
699 , defaultRuleSet(NULL
)
702 , decimalFormatSymbols(NULL
)
704 , lenientParseRules(NULL
)
705 , localizations(NULL
)
707 init(description
, NULL
, perror
, status
);
710 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag
, const Locale
& alocale
, UErrorCode
& status
)
712 , defaultRuleSet(NULL
)
715 , decimalFormatSymbols(NULL
)
717 , lenientParseRules(NULL
)
718 , localizations(NULL
)
720 if (U_FAILURE(status
)) {
724 const char* fmt_tag
= "";
726 case URBNF_SPELLOUT
: fmt_tag
= "SpelloutRules"; break;
727 case URBNF_ORDINAL
: fmt_tag
= "OrdinalRules"; break;
728 case URBNF_DURATION
: fmt_tag
= "DurationRules"; break;
729 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
732 // TODO: read localization info from resource
733 LocalizationInfo
* locinfo
= NULL
;
736 UResourceBundle
* nfrb
= ures_open(U_ICUDATA_RBNF
, locale
.getName(), &status
);
737 if (U_SUCCESS(status
)) {
738 setLocaleIDs(ures_getLocaleByType(nfrb
, ULOC_VALID_LOCALE
, &status
),
739 ures_getLocaleByType(nfrb
, ULOC_ACTUAL_LOCALE
, &status
));
740 const UChar
* description
= ures_getStringByKey(nfrb
, fmt_tag
, &len
, &status
);
741 UnicodeString
desc(description
, len
);
743 init (desc
, locinfo
, perror
, status
);
748 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat
& rhs
)
751 , defaultRuleSet(NULL
)
754 , decimalFormatSymbols(NULL
)
756 , lenientParseRules(NULL
)
757 , localizations(NULL
)
759 this->operator=(rhs
);
764 RuleBasedNumberFormat
&
765 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat
& rhs
)
767 UErrorCode status
= U_ZERO_ERROR
;
770 lenient
= rhs
.lenient
;
772 UnicodeString rules
= rhs
.getRules();
774 init(rules
, rhs
.localizations
? rhs
.localizations
->ref() : NULL
, perror
, status
);
779 RuleBasedNumberFormat::~RuleBasedNumberFormat()
785 RuleBasedNumberFormat::clone(void) const
787 RuleBasedNumberFormat
* result
= NULL
;
788 UnicodeString rules
= getRules();
789 UErrorCode status
= U_ZERO_ERROR
;
791 result
= new RuleBasedNumberFormat(rules
, localizations
, locale
, perror
, status
);
794 status
= U_MEMORY_ALLOCATION_ERROR
;
797 if (U_FAILURE(status
)) {
801 result
->lenient
= lenient
;
807 RuleBasedNumberFormat::operator==(const Format
& other
) const
809 if (this == &other
) {
813 if (other
.getDynamicClassID() == getStaticClassID()) {
814 const RuleBasedNumberFormat
& rhs
= (const RuleBasedNumberFormat
&)other
;
815 if (locale
== rhs
.locale
&&
816 lenient
== rhs
.lenient
&&
817 (localizations
== NULL
818 ? rhs
.localizations
== NULL
819 : (rhs
.localizations
== NULL
821 : *localizations
== rhs
.localizations
))) {
823 NFRuleSet
** p
= ruleSets
;
824 NFRuleSet
** q
= rhs
.ruleSets
;
827 } else if (q
== NULL
) {
830 while (*p
&& *q
&& (**p
== **q
)) {
834 return *q
== NULL
&& *p
== NULL
;
842 RuleBasedNumberFormat::getRules() const
844 UnicodeString result
;
845 if (ruleSets
!= NULL
) {
846 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
847 (*p
)->appendRules(result
);
854 RuleBasedNumberFormat::getRuleSetName(int32_t index
) const
857 UnicodeString
string(TRUE
, localizations
->getRuleSetName(index
), (int32_t)-1);
859 } else if (ruleSets
) {
860 UnicodeString result
;
861 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
863 if (rs
->isPublic()) {
876 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
880 result
= localizations
->getNumberOfRuleSets();
881 } else if (ruleSets
) {
882 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
883 if ((**p
).isPublic()) {
892 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
894 return localizations
->getNumberOfDisplayLocales();
900 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index
, UErrorCode
& status
) const {
901 if (U_FAILURE(status
)) {
904 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfDisplayLocales()) {
905 UnicodeString
name(TRUE
, localizations
->getLocaleName(index
), -1);
907 int32_t cap
= name
.length() + 1;
910 bp
= (char *)uprv_malloc(cap
);
912 status
= U_MEMORY_ALLOCATION_ERROR
;
916 name
.extract(0, name
.length(), bp
, cap
, UnicodeString::kInvariant
);
917 Locale
retLocale(bp
);
923 status
= U_ILLEGAL_ARGUMENT_ERROR
;
929 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index
, const Locale
& localeParam
) {
930 if (localizations
&& index
>= 0 && index
< localizations
->getNumberOfRuleSets()) {
931 UnicodeString
localeName(localeParam
.getBaseName(), -1, UnicodeString::kInvariant
);
932 int32_t len
= localeName
.length();
933 UChar
* localeStr
= localeName
.getBuffer(len
+ 1);
936 int32_t ix
= localizations
->indexForLocale(localeStr
);
938 UnicodeString
name(TRUE
, localizations
->getDisplayName(ix
, index
), -1);
942 // trim trailing portion, skipping over ommitted sections
943 do { --len
;} while (len
> 0 && localeStr
[len
] != 0x005f); // underscore
944 while (len
> 0 && localeStr
[len
-1] == 0x005F) --len
;
946 UnicodeString
name(TRUE
, localizations
->getRuleSetName(index
), -1);
955 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString
& ruleSetName
, const Locale
& localeParam
) {
957 UnicodeString
rsn(ruleSetName
);
958 int32_t ix
= localizations
->indexForRuleSet(rsn
.getTerminatedBuffer());
959 return getRuleSetDisplayName(ix
, localeParam
);
967 RuleBasedNumberFormat::findRuleSet(const UnicodeString
& name
, UErrorCode
& status
) const
969 if (U_SUCCESS(status
) && ruleSets
) {
970 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
972 if (rs
->isNamed(name
)) {
976 status
= U_ILLEGAL_ARGUMENT_ERROR
;
982 RuleBasedNumberFormat::format(int32_t number
,
983 UnicodeString
& toAppendTo
,
984 FieldPosition
& /* pos */) const
986 if (defaultRuleSet
) defaultRuleSet
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
992 RuleBasedNumberFormat::format(int64_t number
,
993 UnicodeString
& toAppendTo
,
994 FieldPosition
& /* pos */) const
996 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1002 RuleBasedNumberFormat::format(double number
,
1003 UnicodeString
& toAppendTo
,
1004 FieldPosition
& /* pos */) const
1006 if (defaultRuleSet
) defaultRuleSet
->format(number
, toAppendTo
, toAppendTo
.length());
1012 RuleBasedNumberFormat::format(int32_t number
,
1013 const UnicodeString
& ruleSetName
,
1014 UnicodeString
& toAppendTo
,
1015 FieldPosition
& /* pos */,
1016 UErrorCode
& status
) const
1018 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1019 if (U_SUCCESS(status
)) {
1020 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1021 // throw new IllegalArgumentException("Can't use internal rule set");
1022 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1024 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1026 rs
->format((int64_t)number
, toAppendTo
, toAppendTo
.length());
1035 RuleBasedNumberFormat::format(int64_t number
,
1036 const UnicodeString
& ruleSetName
,
1037 UnicodeString
& toAppendTo
,
1038 FieldPosition
& /* pos */,
1039 UErrorCode
& status
) const
1041 if (U_SUCCESS(status
)) {
1042 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1043 // throw new IllegalArgumentException("Can't use internal rule set");
1044 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1046 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1048 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1056 // make linker happy
1058 RuleBasedNumberFormat::format(const Formattable
& obj
,
1059 UnicodeString
& toAppendTo
,
1061 UErrorCode
& status
) const
1063 return NumberFormat::format(obj
, toAppendTo
, pos
, status
);
1067 RuleBasedNumberFormat::format(double number
,
1068 const UnicodeString
& ruleSetName
,
1069 UnicodeString
& toAppendTo
,
1070 FieldPosition
& /* pos */,
1071 UErrorCode
& status
) const
1073 if (U_SUCCESS(status
)) {
1074 if (ruleSetName
.indexOf(gPercentPercent
) == 0) {
1075 // throw new IllegalArgumentException("Can't use internal rule set");
1076 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1078 NFRuleSet
*rs
= findRuleSet(ruleSetName
, status
);
1080 rs
->format(number
, toAppendTo
, toAppendTo
.length());
1088 RuleBasedNumberFormat::parse(const UnicodeString
& text
,
1089 Formattable
& result
,
1090 ParsePosition
& parsePosition
) const
1093 parsePosition
.setErrorIndex(0);
1097 UnicodeString
workingText(text
, parsePosition
.getIndex());
1098 ParsePosition
workingPos(0);
1100 ParsePosition
high_pp(0);
1101 Formattable high_result
;
1103 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1105 if (rp
->isPublic()) {
1106 ParsePosition
working_pp(0);
1107 Formattable working_result
;
1109 rp
->parse(workingText
, working_pp
, kMaxDouble
, working_result
);
1110 if (working_pp
.getIndex() > high_pp
.getIndex()) {
1111 high_pp
= working_pp
;
1112 high_result
= working_result
;
1114 if (high_pp
.getIndex() == workingText
.length()) {
1121 parsePosition
.setIndex(parsePosition
.getIndex() + high_pp
.getIndex());
1122 if (high_pp
.getIndex() > 0) {
1123 parsePosition
.setErrorIndex(-1);
1125 result
= high_result
;
1126 if (result
.getType() == Formattable::kDouble
) {
1127 int32_t r
= (int32_t)result
.getDouble();
1128 if ((double)r
== result
.getDouble()) {
1134 #if !UCONFIG_NO_COLLATION
1137 RuleBasedNumberFormat::setLenient(UBool enabled
)
1140 if (!enabled
&& collator
) {
1149 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString
& ruleSetName
, UErrorCode
& status
) {
1150 if (U_SUCCESS(status
)) {
1151 if (ruleSetName
.isEmpty()) {
1152 if (localizations
) {
1153 UnicodeString
name(TRUE
, localizations
->getRuleSetName(0), -1);
1154 defaultRuleSet
= findRuleSet(name
, status
);
1156 initDefaultRuleSet();
1158 } else if (ruleSetName
.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1159 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1161 NFRuleSet
* result
= findRuleSet(ruleSetName
, status
);
1162 if (result
!= NULL
) {
1163 defaultRuleSet
= result
;
1170 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1171 UnicodeString result
;
1172 if (defaultRuleSet
&& defaultRuleSet
->isPublic()) {
1173 defaultRuleSet
->getName(result
);
1175 result
.setToBogus();
1181 RuleBasedNumberFormat::initDefaultRuleSet()
1183 defaultRuleSet
= NULL
;
1188 NFRuleSet
**p
= &ruleSets
[0];
1193 defaultRuleSet
= *--p
;
1194 if (!defaultRuleSet
->isPublic()) {
1195 while (p
!= ruleSets
) {
1196 if ((*--p
)->isPublic()) {
1197 defaultRuleSet
= *p
;
1206 RuleBasedNumberFormat::init(const UnicodeString
& rules
, LocalizationInfo
* localizationInfos
,
1207 UParseError
& /* pErr */, UErrorCode
& status
)
1209 // TODO: implement UParseError
1210 // Note: this can leave ruleSets == NULL, so remaining code should check
1211 if (U_FAILURE(status
)) {
1215 this->localizations
= localizationInfos
== NULL
? NULL
: localizationInfos
->ref();
1217 UnicodeString
description(rules
);
1218 if (!description
.length()) {
1219 status
= U_MEMORY_ALLOCATION_ERROR
;
1223 // start by stripping the trailing whitespace from all the rules
1224 // (this is all the whitespace follwing each semicolon in the
1225 // description). This allows us to look for rule-set boundaries
1226 // by searching for ";%" without having to worry about whitespace
1227 // between the ; and the %
1228 stripWhitespace(description
);
1230 // check to see if there's a set of lenient-parse rules. If there
1231 // is, pull them out into our temporary holding place for them,
1232 // and delete them from the description before the real desciption-
1233 // parsing code sees them
1234 int32_t lp
= description
.indexOf(gLenientParse
);
1236 // we've got to make sure we're not in the middle of a rule
1237 // (where "%%lenient-parse" would actually get treated as
1239 if (lp
== 0 || description
.charAt(lp
- 1) == gSemiColon
) {
1240 // locate the beginning and end of the actual collation
1241 // rules (there may be whitespace between the name and
1242 // the first token in the description)
1243 int lpEnd
= description
.indexOf(gSemiPercent
, lp
);
1246 lpEnd
= description
.length() - 1;
1248 int lpStart
= lp
+ u_strlen(gLenientParse
);
1249 while (uprv_isRuleWhiteSpace(description
.charAt(lpStart
))) {
1253 // copy out the lenient-parse rules and delete them
1254 // from the description
1255 lenientParseRules
= new UnicodeString();
1257 if (lenientParseRules
== 0) {
1258 status
= U_MEMORY_ALLOCATION_ERROR
;
1261 lenientParseRules
->setTo(description
, lpStart
, lpEnd
- lpStart
);
1263 description
.remove(lp
, lpEnd
+ 1 - lp
);
1267 // pre-flight parsing the description and count the number of
1268 // rule sets (";%" marks the end of one rule set and the beginning
1270 int numRuleSets
= 0;
1271 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, p
)) {
1277 // our rule list is an array of the appropriate size
1278 ruleSets
= (NFRuleSet
**)uprv_malloc((numRuleSets
+ 1) * sizeof(NFRuleSet
*));
1280 if (ruleSets
== 0) {
1281 status
= U_MEMORY_ALLOCATION_ERROR
;
1285 for (int i
= 0; i
<= numRuleSets
; ++i
) {
1289 // divide up the descriptions into individual rule-set descriptions
1290 // and store them in a temporary array. At each step, we also
1291 // new up a rule set, but all this does is initialize its name
1292 // and remove it from its description. We can't actually parse
1293 // the rest of the descriptions and finish initializing everything
1294 // because we have to know the names and locations of all the rule
1295 // sets before we can actually set everything up
1297 status
= U_ILLEGAL_ARGUMENT_ERROR
;
1300 UnicodeString
* ruleSetDescriptions
= new UnicodeString
[numRuleSets
];
1302 if (ruleSetDescriptions
== 0) {
1303 status
= U_MEMORY_ALLOCATION_ERROR
;
1310 for (int32_t p
= description
.indexOf(gSemiPercent
); p
!= -1; p
= description
.indexOf(gSemiPercent
, start
)) {
1311 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, p
+ 1 - start
);
1312 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1314 if (ruleSets
[curRuleSet
] == 0) {
1315 status
= U_MEMORY_ALLOCATION_ERROR
;
1321 ruleSetDescriptions
[curRuleSet
].setTo(description
, start
, description
.length() - start
);
1322 ruleSets
[curRuleSet
] = new NFRuleSet(ruleSetDescriptions
, curRuleSet
, status
);
1324 if (ruleSets
[curRuleSet
] == 0) {
1325 status
= U_MEMORY_ALLOCATION_ERROR
;
1330 // now we can take note of the formatter's default rule set, which
1331 // is the last public rule set in the description (it's the last
1332 // rather than the first so that a user can create a new formatter
1333 // from an existing formatter and change its default behavior just
1334 // by appending more rule sets to the end)
1336 // {dlf} Initialization of a fraction rule set requires the default rule
1337 // set to be known. For purposes of initialization, this is always the
1338 // last public rule set, no matter what the localization data says.
1339 initDefaultRuleSet();
1341 // finally, we can go back through the temporary descriptions
1342 // list and finish seting up the substructure (and we throw
1343 // away the temporary descriptions as we go)
1345 for (int i
= 0; i
< numRuleSets
; i
++) {
1346 ruleSets
[i
]->parseRules(ruleSetDescriptions
[i
], this, status
);
1350 delete[] ruleSetDescriptions
;
1352 // Now that the rules are initialized, the 'real' default rule
1353 // set can be adjusted by the localization data.
1355 // The C code keeps the localization array as is, rather than building
1356 // a separate array of the public rule set names, so we have less work
1357 // to do here-- but we still need to check the names.
1359 if (localizationInfos
) {
1360 // confirm the names, if any aren't in the rules, that's an error
1361 // it is ok if the rules contain public rule sets that are not in this list
1362 for (int32_t i
= 0; i
< localizationInfos
->getNumberOfRuleSets(); ++i
) {
1363 UnicodeString
name(TRUE
, localizationInfos
->getRuleSetName(i
), -1);
1364 NFRuleSet
* rs
= findRuleSet(name
, status
);
1369 defaultRuleSet
= rs
;
1373 defaultRuleSet
= getDefaultRuleSet();
1378 RuleBasedNumberFormat::stripWhitespace(UnicodeString
& description
)
1380 // iterate through the characters...
1381 UnicodeString result
;
1384 while (start
!= -1 && start
< description
.length()) {
1385 // seek to the first non-whitespace character...
1386 while (start
< description
.length()
1387 && uprv_isRuleWhiteSpace(description
.charAt(start
))) {
1391 // locate the next semicolon in the text and copy the text from
1392 // our current position up to that semicolon into the result
1393 int32_t p
= description
.indexOf(gSemiColon
, start
);
1395 // or if we don't find a semicolon, just copy the rest of
1396 // the string into the result
1397 result
.append(description
, start
, description
.length() - start
);
1400 else if (p
< description
.length()) {
1401 result
.append(description
, start
, p
+ 1 - start
);
1405 // when we get here, we've seeked off the end of the sring, and
1406 // we terminate the loop (we continue until *start* is -1 rather
1407 // than until *p* is -1, because otherwise we'd miss the last
1408 // rule in the description)
1414 description
.setTo(result
);
1419 RuleBasedNumberFormat::dispose()
1422 for (NFRuleSet
** p
= ruleSets
; *p
; ++p
) {
1425 uprv_free(ruleSets
);
1429 #if !UCONFIG_NO_COLLATION
1434 delete decimalFormatSymbols
;
1435 decimalFormatSymbols
= NULL
;
1437 delete lenientParseRules
;
1438 lenientParseRules
= NULL
;
1440 if (localizations
) localizations
= localizations
->unref();
1444 //-----------------------------------------------------------------------
1445 // package-internal API
1446 //-----------------------------------------------------------------------
1449 * Returns the collator to use for lenient parsing. The collator is lazily created:
1450 * this function creates it the first time it's called.
1451 * @return The collator to use for lenient parsing, or null if lenient parsing
1455 RuleBasedNumberFormat::getCollator() const
1457 #if !UCONFIG_NO_COLLATION
1462 // lazy-evaulate the collator
1463 if (collator
== NULL
&& lenient
) {
1464 // create a default collator based on the formatter's locale,
1465 // then pull out that collator's rules, append any additional
1466 // rules specified in the description, and create a _new_
1467 // collator based on the combinaiton of those rules
1469 UErrorCode status
= U_ZERO_ERROR
;
1471 Collator
* temp
= Collator::createInstance(locale
, status
);
1472 if (U_SUCCESS(status
) &&
1473 temp
->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1475 RuleBasedCollator
* newCollator
= (RuleBasedCollator
*)temp
;
1476 if (lenientParseRules
) {
1477 UnicodeString
rules(newCollator
->getRules());
1478 rules
.append(*lenientParseRules
);
1480 newCollator
= new RuleBasedCollator(rules
, status
);
1484 if (U_SUCCESS(status
)) {
1485 newCollator
->setAttribute(UCOL_DECOMPOSITION_MODE
, UCOL_ON
, status
);
1487 ((RuleBasedNumberFormat
*)this)->collator
= newCollator
;
1496 // if lenient-parse mode is off, this will be null
1497 // (see setLenientParseMode())
1503 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1504 * instances owned by this formatter. This object is lazily created: this function
1505 * creates it the first time it's called.
1506 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1507 * instances owned by this formatter.
1509 DecimalFormatSymbols
*
1510 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1512 // lazy-evaluate the DecimalFormatSymbols object. This object
1513 // is shared by all DecimalFormat instances belonging to this
1515 if (decimalFormatSymbols
== NULL
) {
1516 UErrorCode status
= U_ZERO_ERROR
;
1517 DecimalFormatSymbols
* temp
= new DecimalFormatSymbols(locale
, status
);
1518 if (U_SUCCESS(status
)) {
1519 ((RuleBasedNumberFormat
*)this)->decimalFormatSymbols
= temp
;
1524 return decimalFormatSymbols
;