]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/choicfmt.cpp
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / i18n / choicfmt.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2008, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 *
7 * File CHOICFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation and got rid
14 * of nextDouble/previousDouble and replaced with
15 * boolean array.
16 * 4/10/97 aliu Clean up. Modified to work on AIX.
17 * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
18 * wchar.h.
19 * 07/09/97 helena Made ParsePosition into a class.
20 * 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
21 * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
22 * 02/22/99 stephen Removed character literals for EBCDIC safety
23 ********************************************************************************
24 */
25
26 #include "unicode/utypes.h"
27
28 #if !UCONFIG_NO_FORMATTING
29
30 #include "unicode/choicfmt.h"
31 #include "unicode/numfmt.h"
32 #include "unicode/locid.h"
33 #include "cpputils.h"
34 #include "cstring.h"
35 #include "putilimp.h"
36 #include <stdio.h>
37 #include <float.h>
38
39 // *****************************************************************************
40 // class ChoiceFormat
41 // *****************************************************************************
42
43 U_NAMESPACE_BEGIN
44
45 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
46
47 // Special characters used by ChoiceFormat. There are two characters
48 // used interchangeably to indicate <=. Either is parsed, but only
49 // LESS_EQUAL is generated by toPattern().
50 #define SINGLE_QUOTE ((UChar)0x0027) /*'*/
51 #define LESS_THAN ((UChar)0x003C) /*<*/
52 #define LESS_EQUAL ((UChar)0x0023) /*#*/
53 #define LESS_EQUAL2 ((UChar)0x2264)
54 #define VERTICAL_BAR ((UChar)0x007C) /*|*/
55 #define MINUS ((UChar)0x002D) /*-*/
56 #define INFINITY ((UChar)0x221E)
57
58 static const UChar gPositiveInfinity[] = {INFINITY, 0};
59 static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
60 #define POSITIVE_INF_STRLEN 1
61 #define NEGATIVE_INF_STRLEN 2
62
63 // -------------------------------------
64 // Creates a ChoiceFormat instance based on the pattern.
65
66 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
67 UErrorCode& status)
68 : fChoiceLimits(0),
69 fClosures(0),
70 fChoiceFormats(0),
71 fCount(0)
72 {
73 applyPattern(newPattern, status);
74 }
75
76 // -------------------------------------
77 // Creates a ChoiceFormat instance with the limit array and
78 // format strings for each limit.
79
80 ChoiceFormat::ChoiceFormat(const double* limits,
81 const UnicodeString* formats,
82 int32_t cnt )
83 : fChoiceLimits(0),
84 fClosures(0),
85 fChoiceFormats(0),
86 fCount(0)
87 {
88 setChoices(limits, formats, cnt );
89 }
90
91 // -------------------------------------
92
93 ChoiceFormat::ChoiceFormat(const double* limits,
94 const UBool* closures,
95 const UnicodeString* formats,
96 int32_t cnt )
97 : fChoiceLimits(0),
98 fClosures(0),
99 fChoiceFormats(0),
100 fCount(0)
101 {
102 setChoices(limits, closures, formats, cnt );
103 }
104
105 // -------------------------------------
106 // copy constructor
107
108 ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
109 : NumberFormat(that),
110 fChoiceLimits(0),
111 fClosures(0),
112 fChoiceFormats(0)
113 {
114 *this = that;
115 }
116
117 // -------------------------------------
118 // Private constructor that creates a
119 // ChoiceFormat instance based on the
120 // pattern and populates UParseError
121
122 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
123 UParseError& parseError,
124 UErrorCode& status)
125 : fChoiceLimits(0),
126 fClosures(0),
127 fChoiceFormats(0),
128 fCount(0)
129 {
130 applyPattern(newPattern,parseError, status);
131 }
132 // -------------------------------------
133
134 UBool
135 ChoiceFormat::operator==(const Format& that) const
136 {
137 if (this == &that) return TRUE;
138 if (!NumberFormat::operator==(that)) return FALSE;
139 ChoiceFormat& thatAlias = (ChoiceFormat&)that;
140 if (fCount != thatAlias.fCount) return FALSE;
141 // Checks the limits, the corresponding format string and LE or LT flags.
142 // LE means less than and equal to, LT means less than.
143 for (int32_t i = 0; i < fCount; i++) {
144 if ((fChoiceLimits[i] != thatAlias.fChoiceLimits[i]) ||
145 (fClosures[i] != thatAlias.fClosures[i]) ||
146 (fChoiceFormats[i] != thatAlias.fChoiceFormats[i]))
147 return FALSE;
148 }
149 return TRUE;
150 }
151
152 // -------------------------------------
153 // copy constructor
154
155 const ChoiceFormat&
156 ChoiceFormat::operator=(const ChoiceFormat& that)
157 {
158 if (this != &that) {
159 NumberFormat::operator=(that);
160 fCount = that.fCount;
161 uprv_free(fChoiceLimits);
162 fChoiceLimits = NULL;
163 uprv_free(fClosures);
164 fClosures = NULL;
165 delete [] fChoiceFormats;
166 fChoiceFormats = NULL;
167
168 fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
169 fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
170 fChoiceFormats = new UnicodeString[fCount];
171
172 // check for memory allocation error
173 if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
174 if (fChoiceLimits) {
175 uprv_free(fChoiceLimits);
176 fChoiceLimits = NULL;
177 }
178 if (fClosures) {
179 uprv_free(fClosures);
180 fClosures = NULL;
181 }
182 if (fChoiceFormats) {
183 delete[] fChoiceFormats;
184 fChoiceFormats = NULL;
185 }
186 } else {
187 uprv_arrayCopy(that.fChoiceLimits, fChoiceLimits, fCount);
188 uprv_arrayCopy(that.fClosures, fClosures, fCount);
189 uprv_arrayCopy(that.fChoiceFormats, fChoiceFormats, fCount);
190 }
191 }
192 return *this;
193 }
194
195 // -------------------------------------
196
197 ChoiceFormat::~ChoiceFormat()
198 {
199 uprv_free(fChoiceLimits);
200 fChoiceLimits = NULL;
201 uprv_free(fClosures);
202 fClosures = NULL;
203 delete [] fChoiceFormats;
204 fChoiceFormats = NULL;
205 fCount = 0;
206 }
207
208 /**
209 * Convert a string to a double value
210 */
211 double
212 ChoiceFormat::stod(const UnicodeString& string)
213 {
214 char source[256];
215 char* end;
216
217 string.extract(0, string.length(), source, (int32_t)sizeof(source), US_INV); /* invariant codepage */
218 return uprv_strtod(source,&end);
219 }
220
221 // -------------------------------------
222
223 /**
224 * Convert a double value to a string without the overhead of ICU.
225 */
226 UnicodeString&
227 ChoiceFormat::dtos(double value,
228 UnicodeString& string)
229 {
230 /* Buffer to contain the digits and any extra formatting stuff. */
231 char temp[DBL_DIG + 16];
232 char *itrPtr = temp;
233 char *expPtr;
234
235 sprintf(temp, "%.*g", DBL_DIG, value);
236
237 /* Find and convert the decimal point.
238 Using setlocale on some machines will cause sprintf to use a comma for certain locales.
239 */
240 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
241 itrPtr++;
242 }
243 if (*itrPtr != 0 && *itrPtr != 'e') {
244 /* We reached something that looks like a decimal point.
245 In case someone used setlocale(), which changes the decimal point. */
246 *itrPtr = '.';
247 itrPtr++;
248 }
249 /* Search for the exponent */
250 while (*itrPtr && *itrPtr != 'e') {
251 itrPtr++;
252 }
253 if (*itrPtr == 'e') {
254 itrPtr++;
255 /* Verify the exponent sign */
256 if (*itrPtr == '+' || *itrPtr == '-') {
257 itrPtr++;
258 }
259 /* Remove leading zeros. You will see this on Windows machines. */
260 expPtr = itrPtr;
261 while (*itrPtr == '0') {
262 itrPtr++;
263 }
264 if (*itrPtr && expPtr != itrPtr) {
265 /* Shift the exponent without zeros. */
266 while (*itrPtr) {
267 *(expPtr++) = *(itrPtr++);
268 }
269 // NULL terminate
270 *expPtr = 0;
271 }
272 }
273
274 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
275 return string;
276 }
277
278 // -------------------------------------
279 // calls the overloaded applyPattern method.
280
281 void
282 ChoiceFormat::applyPattern(const UnicodeString& pattern,
283 UErrorCode& status)
284 {
285 UParseError parseError;
286 applyPattern(pattern, parseError, status);
287 }
288
289 // -------------------------------------
290 // Applies the pattern to this ChoiceFormat instance.
291
292 void
293 ChoiceFormat::applyPattern(const UnicodeString& pattern,
294 UParseError& parseError,
295 UErrorCode& status)
296 {
297 if (U_FAILURE(status))
298 {
299 return;
300 }
301
302 // Clear error struct
303 parseError.offset = -1;
304 parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
305
306 // Perform 2 passes. The first computes the number of limits in
307 // this pattern (fCount), which is 1 more than the number of
308 // literal VERTICAL_BAR characters.
309 int32_t count = 1;
310 int32_t i;
311 for (i=0; i<pattern.length(); ++i) {
312 UChar c = pattern[i];
313 if (c == SINGLE_QUOTE) {
314 // Skip over the entire quote, including embedded
315 // contiguous pairs of SINGLE_QUOTE.
316 for (;;) {
317 do {
318 ++i;
319 } while (i<pattern.length() &&
320 pattern[i] != SINGLE_QUOTE);
321 if ((i+1)<pattern.length() &&
322 pattern[i+1] == SINGLE_QUOTE) {
323 // SINGLE_QUOTE pair; skip over it
324 ++i;
325 } else {
326 break;
327 }
328 }
329 } else if (c == VERTICAL_BAR) {
330 ++count;
331 }
332 }
333
334 // Allocate the required storage.
335 double *newLimits = (double*) uprv_malloc( sizeof(double) * count);
336 /* test for NULL */
337 if (newLimits == 0) {
338 status = U_MEMORY_ALLOCATION_ERROR;
339 return;
340 }
341 UBool *newClosures = (UBool*) uprv_malloc( sizeof(UBool) * count);
342 /* test for NULL */
343 if (newClosures == 0) {
344 status = U_MEMORY_ALLOCATION_ERROR;
345 uprv_free(newLimits);
346 return;
347 }
348 UnicodeString *newFormats = new UnicodeString[count];
349 /* test for NULL */
350 if (newFormats == 0) {
351 status = U_MEMORY_ALLOCATION_ERROR;
352 uprv_free(newLimits);
353 uprv_free(newClosures);
354 return;
355 }
356
357 // Perform the second pass
358 int32_t k = 0; // index into newXxx[] arrays
359 UnicodeString buf; // scratch buffer
360 UBool inQuote = FALSE;
361 UBool inNumber = TRUE; // TRUE before < or #, FALSE after
362
363 for (i=0; i<pattern.length(); ++i) {
364 UChar c = pattern[i];
365 if (c == SINGLE_QUOTE) {
366 // Check for SINGLE_QUOTE pair indicating a literal quote
367 if ((i+1) < pattern.length() &&
368 pattern[i+1] == SINGLE_QUOTE) {
369 buf += SINGLE_QUOTE;
370 ++i;
371 } else {
372 inQuote = !inQuote;
373 }
374 } else if (inQuote) {
375 buf += c;
376 } else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) {
377 if (!inNumber || buf.length() == 0) {
378 goto error;
379 }
380 inNumber = FALSE;
381
382 double limit;
383 buf.trim();
384 if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN)) {
385 limit = uprv_getInfinity();
386 } else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN)) {
387 limit = -uprv_getInfinity();
388 } else {
389 limit = stod(buf);
390 }
391
392 if (k == count) {
393 // This shouldn't happen. If it does, it means that
394 // the count determined in the first pass did not
395 // match the number of elements found in the second
396 // pass.
397 goto error;
398 }
399 newLimits[k] = limit;
400 newClosures[k] = (c == LESS_THAN);
401
402 if (k > 0 && limit <= newLimits[k-1]) {
403 // Each limit must be strictly > than the previous
404 // limit. One exception: Two subsequent limits may be
405 // == if the first closure is FALSE and the second
406 // closure is TRUE. This places the limit value in
407 // the second interval.
408 if (!(limit == newLimits[k-1] &&
409 !newClosures[k-1] &&
410 newClosures[k])) {
411 goto error;
412 }
413 }
414
415 buf.truncate(0);
416 } else if (c == VERTICAL_BAR) {
417 if (inNumber) {
418 goto error;
419 }
420 inNumber = TRUE;
421
422 newFormats[k] = buf;
423 ++k;
424 buf.truncate(0);
425 } else {
426 buf += c;
427 }
428 }
429
430 if (k != (count-1) || inNumber || inQuote) {
431 goto error;
432 }
433 newFormats[k] = buf;
434
435 // Don't modify this object until the parse succeeds
436 uprv_free(fChoiceLimits);
437 uprv_free(fClosures);
438 delete[] fChoiceFormats;
439 fCount = count;
440 fChoiceLimits = newLimits;
441 fClosures = newClosures;
442 fChoiceFormats = newFormats;
443 return;
444
445 error:
446 status = U_ILLEGAL_ARGUMENT_ERROR;
447 syntaxError(pattern,i,parseError);
448 uprv_free(newLimits);
449 uprv_free(newClosures);
450 delete[] newFormats;
451 return;
452
453 }
454 // -------------------------------------
455 // Reconstruct the original input pattern.
456
457 UnicodeString&
458 ChoiceFormat::toPattern(UnicodeString& result) const
459 {
460 result.remove();
461 for (int32_t i = 0; i < fCount; ++i) {
462 if (i != 0) {
463 result += VERTICAL_BAR;
464 }
465 UnicodeString buf;
466 if (uprv_isPositiveInfinity(fChoiceLimits[i])) {
467 result += INFINITY;
468 } else if (uprv_isNegativeInfinity(fChoiceLimits[i])) {
469 result += MINUS;
470 result += INFINITY;
471 } else {
472 result += dtos(fChoiceLimits[i], buf);
473 }
474 if (fClosures[i]) {
475 result += LESS_THAN;
476 } else {
477 result += LESS_EQUAL;
478 }
479 // Append fChoiceFormats[i], using quotes if there are special
480 // characters. Single quotes themselves must be escaped in
481 // either case.
482 const UnicodeString& text = fChoiceFormats[i];
483 UBool needQuote = text.indexOf(LESS_THAN) >= 0
484 || text.indexOf(LESS_EQUAL) >= 0
485 || text.indexOf(LESS_EQUAL2) >= 0
486 || text.indexOf(VERTICAL_BAR) >= 0;
487 if (needQuote) {
488 result += SINGLE_QUOTE;
489 }
490 if (text.indexOf(SINGLE_QUOTE) < 0) {
491 result += text;
492 }
493 else {
494 for (int32_t j = 0; j < text.length(); ++j) {
495 UChar c = text[j];
496 result += c;
497 if (c == SINGLE_QUOTE) {
498 result += c;
499 }
500 }
501 }
502 if (needQuote) {
503 result += SINGLE_QUOTE;
504 }
505 }
506
507 return result;
508 }
509
510 // -------------------------------------
511 // Sets the limit and format arrays.
512 void
513 ChoiceFormat::setChoices( const double* limits,
514 const UnicodeString* formats,
515 int32_t cnt )
516 {
517 setChoices(limits, 0, formats, cnt);
518 }
519
520 // -------------------------------------
521 // Sets the limit and format arrays.
522 void
523 ChoiceFormat::setChoices( const double* limits,
524 const UBool* closures,
525 const UnicodeString* formats,
526 int32_t cnt )
527 {
528 if(limits == 0 || formats == 0)
529 return;
530
531 if (fChoiceLimits) {
532 uprv_free(fChoiceLimits);
533 }
534 if (fClosures) {
535 uprv_free(fClosures);
536 }
537 if (fChoiceFormats) {
538 delete [] fChoiceFormats;
539 }
540
541 // Note that the old arrays are deleted and this owns
542 // the created array.
543 fCount = cnt;
544 fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
545 fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
546 fChoiceFormats = new UnicodeString[fCount];
547
548 //check for memory allocation error
549 if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
550 if (fChoiceLimits) {
551 uprv_free(fChoiceLimits);
552 fChoiceLimits = NULL;
553 }
554 if (fClosures) {
555 uprv_free(fClosures);
556 fClosures = NULL;
557 }
558 if (fChoiceFormats) {
559 delete[] fChoiceFormats;
560 fChoiceFormats = NULL;
561 }
562 return;
563 }
564
565 uprv_arrayCopy(limits, fChoiceLimits, fCount);
566 uprv_arrayCopy(formats, fChoiceFormats, fCount);
567
568 if (closures != 0) {
569 uprv_arrayCopy(closures, fClosures, fCount);
570 } else {
571 int32_t i;
572 for (i=0; i<fCount; ++i) {
573 fClosures[i] = FALSE;
574 }
575 }
576 }
577
578 // -------------------------------------
579 // Gets the limit array.
580
581 const double*
582 ChoiceFormat::getLimits(int32_t& cnt) const
583 {
584 cnt = fCount;
585 return fChoiceLimits;
586 }
587
588 // -------------------------------------
589 // Gets the closures array.
590
591 const UBool*
592 ChoiceFormat::getClosures(int32_t& cnt) const
593 {
594 cnt = fCount;
595 return fClosures;
596 }
597
598 // -------------------------------------
599 // Gets the format array.
600
601 const UnicodeString*
602 ChoiceFormat::getFormats(int32_t& cnt) const
603 {
604 cnt = fCount;
605 return fChoiceFormats;
606 }
607
608 // -------------------------------------
609 // Formats an int64 number, it's actually formatted as
610 // a double. The returned format string may differ
611 // from the input number because of this.
612
613 UnicodeString&
614 ChoiceFormat::format(int64_t number,
615 UnicodeString& appendTo,
616 FieldPosition& status) const
617 {
618 return format((double) number, appendTo, status);
619 }
620
621 // -------------------------------------
622 // Formats a long number, it's actually formatted as
623 // a double. The returned format string may differ
624 // from the input number because of this.
625
626 UnicodeString&
627 ChoiceFormat::format(int32_t number,
628 UnicodeString& appendTo,
629 FieldPosition& status) const
630 {
631 return format((double) number, appendTo, status);
632 }
633
634 // -------------------------------------
635 // Formats a double number.
636
637 UnicodeString&
638 ChoiceFormat::format(double number,
639 UnicodeString& appendTo,
640 FieldPosition& /*pos*/) const
641 {
642 // find the number
643 int32_t i;
644 for (i = 0; i < fCount; ++i) {
645 if (fClosures[i]) {
646 if (!(number > fChoiceLimits[i])) {
647 // same as number <= fChoiceLimits, except catches NaN
648 break;
649 }
650 } else if (!(number >= fChoiceLimits[i])) {
651 // same as number < fChoiceLimits, except catches NaN
652 break;
653 }
654 }
655 --i;
656 if (i < 0) {
657 i = 0;
658 }
659 // return either a formatted number, or a string
660 appendTo += fChoiceFormats[i];
661 return appendTo;
662 }
663
664 // -------------------------------------
665 // Formats an array of objects. Checks if the data type of the objects
666 // to get the right value for formatting.
667
668 UnicodeString&
669 ChoiceFormat::format(const Formattable* objs,
670 int32_t cnt,
671 UnicodeString& appendTo,
672 FieldPosition& pos,
673 UErrorCode& status) const
674 {
675 if(cnt < 0) {
676 status = U_ILLEGAL_ARGUMENT_ERROR;
677 return appendTo;
678 }
679
680 UnicodeString buffer;
681 for (int32_t i = 0; i < cnt; i++) {
682 double objDouble = objs[i].getDouble(status);
683 if (U_SUCCESS(status)) {
684 buffer.remove();
685 appendTo += format(objDouble, buffer, pos);
686 }
687 }
688
689 return appendTo;
690 }
691
692 // -------------------------------------
693 // Formats an array of objects. Checks if the data type of the objects
694 // to get the right value for formatting.
695
696 UnicodeString&
697 ChoiceFormat::format(const Formattable& obj,
698 UnicodeString& appendTo,
699 FieldPosition& pos,
700 UErrorCode& status) const
701 {
702 return NumberFormat::format(obj, appendTo, pos, status);
703 }
704 // -------------------------------------
705
706 void
707 ChoiceFormat::parse(const UnicodeString& text,
708 Formattable& result,
709 ParsePosition& status) const
710 {
711 // find the best number (defined as the one with the longest parse)
712 int32_t start = status.getIndex();
713 int32_t furthest = start;
714 double bestNumber = uprv_getNaN();
715 double tempNumber = 0.0;
716 for (int i = 0; i < fCount; ++i) {
717 int32_t len = fChoiceFormats[i].length();
718 if (text.compare(start, len, fChoiceFormats[i]) == 0) {
719 status.setIndex(start + len);
720 tempNumber = fChoiceLimits[i];
721 if (status.getIndex() > furthest) {
722 furthest = status.getIndex();
723 bestNumber = tempNumber;
724 if (furthest == text.length())
725 break;
726 }
727 }
728 }
729 status.setIndex(furthest);
730 if (status.getIndex() == start) {
731 status.setErrorIndex(furthest);
732 }
733 result.setDouble(bestNumber);
734 }
735
736 // -------------------------------------
737 // Parses the text and return the Formattable object.
738
739 void
740 ChoiceFormat::parse(const UnicodeString& text,
741 Formattable& result,
742 UErrorCode& status) const
743 {
744 NumberFormat::parse(text, result, status);
745 }
746
747 // -------------------------------------
748
749 Format*
750 ChoiceFormat::clone() const
751 {
752 ChoiceFormat *aCopy = new ChoiceFormat(*this);
753 return aCopy;
754 }
755
756 U_NAMESPACE_END
757
758 #endif /* #if !UCONFIG_NO_FORMATTING */
759
760 //eof