2 *******************************************************************************
4 * Copyright (C) 1998-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 12/02/98 stephen Creation.
15 * 03/13/99 stephen Modified for new C API.
16 *******************************************************************************
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_FORMATTING
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unum.h"
26 #include "unicode/udat.h"
27 #include "unicode/uset.h"
36 /* flag characters for u_scanf */
37 #define FLAG_ASTERISK 0x002A
38 #define FLAG_PAREN 0x0028
40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
43 /* special characters for u_scanf */
44 #define SPEC_DOLLARSIGN 0x0024
47 #define DIGIT_ZERO 0x0030
48 #define DIGIT_ONE 0x0031
49 #define DIGIT_TWO 0x0032
50 #define DIGIT_THREE 0x0033
51 #define DIGIT_FOUR 0x0034
52 #define DIGIT_FIVE 0x0035
53 #define DIGIT_SIX 0x0036
54 #define DIGIT_SEVEN 0x0037
55 #define DIGIT_EIGHT 0x0038
56 #define DIGIT_NINE 0x0039
58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
69 /* u_scanf modifiers */
71 #define MOD_LOWERL 0x006C
74 #define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
79 * Struct encapsulating a single uscanf format specification.
81 typedef struct u_scanf_spec_info
{
82 int32_t fWidth
; /* Width */
84 UChar fSpec
; /* Format specification */
86 UChar fPadChar
; /* Padding character */
88 UBool fSkipArg
; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble
; /* L flag */
90 UBool fIsShort
; /* h flag */
91 UBool fIsLong
; /* l flag */
92 UBool fIsLongLong
; /* ll flag */
93 UBool fIsString
; /* TRUE if this is a NULL-terminated string. */
98 * Struct encapsulating a single u_scanf format specification.
100 typedef struct u_scanf_spec
{
101 u_scanf_spec_info fInfo
; /* Information on this spec */
102 int32_t fArgPos
; /* Position of data in arg list */
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
110 * @return The number of characters contained in this specifier.
113 u_scanf_parse_spec (const UChar
*fmt
,
116 const UChar
*s
= fmt
;
118 u_scanf_spec_info
*info
= &(spec
->fInfo
);
120 /* initialize spec to default values */
124 info
->fSpec
= 0x0000;
125 info
->fPadChar
= 0x0020;
126 info
->fSkipArg
= FALSE
;
127 info
->fIsLongDouble
= FALSE
;
128 info
->fIsShort
= FALSE
;
129 info
->fIsLong
= FALSE
;
130 info
->fIsLongLong
= FALSE
;
131 info
->fIsString
= TRUE
;
134 /* skip over the initial '%' */
137 /* Check for positional argument */
140 /* Save the current position */
143 /* handle positional parameters */
145 spec
->fArgPos
= (int) (*s
++ - DIGIT_ZERO
);
149 spec
->fArgPos
+= (int) (*s
++ - DIGIT_ZERO
);
153 /* if there is no '$', don't read anything */
154 if(*s
!= SPEC_DOLLARSIGN
) {
163 /* Get any format flags */
169 info
->fSkipArg
= TRUE
;
172 /* pad character specified */
175 /* first four characters are hex values for pad char */
176 info
->fPadChar
= (UChar
)ufmt_digitvalue(*s
++);
177 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
178 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
179 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
181 /* final character is ignored */
190 info
->fWidth
= (int) (*s
++ - DIGIT_ZERO
);
194 info
->fWidth
+= (int) (*s
++ - DIGIT_ZERO
);
198 /* Get any modifiers */
204 info
->fIsShort
= TRUE
;
207 /* long or long long */
209 if(*s
== MOD_LOWERL
) {
210 info
->fIsLongLong
= TRUE
;
211 /* skip over the next 'l' */
215 info
->fIsLong
= TRUE
;
220 info
->fIsLongDouble
= TRUE
;
225 /* finally, get the specifier letter */
228 /* return # of characters in this specifier */
229 return (int32_t)(s
- fmt
);
232 #define UP_PERCENT 0x0025
235 /* ANSI style formatting */
236 /* Use US-ASCII characters only for formatting */
239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
263 /* non-ANSI extensions */
264 /* Use US-ASCII characters only for formatting */
267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272 /* C K is old format */
273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274 /* S U is old format */
275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
278 #define UFMT_EMPTY {ufmt_empty, NULL}
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
294 * @return The number of code points consumed during reading.
296 typedef int32_t (*u_scanf_handler
) (UFILE
*stream
,
297 u_scanf_spec_info
*info
,
300 int32_t *fmtConsumed
,
301 int32_t *argConverted
);
303 typedef struct u_scanf_info
{
305 u_scanf_handler handler
;
308 #define USCANF_NUM_FMT_HANDLERS 108
309 #define USCANF_SYMBOL_BUFFER_SIZE 8
311 /* We do not use handlers for 0-0x1f */
312 #define USCANF_BASE_FMT_HANDLERS 0x20
316 u_scanf_skip_leading_ws(UFILE
*input
,
323 /* skip all leading ws in the input */
324 while( (isNotEOF
= ufile_getch(input
, &c
)) && (c
== pad
|| u_isWhitespace(c
)) )
329 /* put the final character back on the input */
336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
338 u_scanf_skip_leading_positive_sign(UFILE
*input
,
339 UNumberFormat
*format
,
345 UChar plusSymbol
[USCANF_SYMBOL_BUFFER_SIZE
];
347 UErrorCode localStatus
= U_ZERO_ERROR
;
349 if (U_SUCCESS(*status
)) {
350 symbolLen
= unum_getSymbol(format
,
351 UNUM_PLUS_SIGN_SYMBOL
,
353 sizeof(plusSymbol
)/sizeof(*plusSymbol
),
356 if (U_SUCCESS(localStatus
)) {
357 /* skip all leading ws in the input */
358 while( (isNotEOF
= ufile_getch(input
, &c
)) && (count
< symbolLen
&& c
== plusSymbol
[count
]) )
363 /* put the final character back on the input */
374 u_scanf_simple_percent_handler(UFILE
*input
,
375 u_scanf_spec_info
*info
,
378 int32_t *fmtConsumed
,
379 int32_t *argConverted
)
381 /* make sure the next character in the input is a percent */
383 if(u_fgetc(input
) != 0x0025) {
390 u_scanf_count_handler(UFILE
*input
,
391 u_scanf_spec_info
*info
,
394 int32_t *fmtConsumed
,
395 int32_t *argConverted
)
397 /* in the special case of count, the u_scanf_spec_info's width */
398 /* will contain the # of items converted thus far */
399 if (!info
->fSkipArg
) {
401 *(int16_t*)(args
[0].ptrValue
) = (int16_t)(UINT16_MAX
& info
->fWidth
);
402 else if (info
->fIsLongLong
)
403 *(int64_t*)(args
[0].ptrValue
) = info
->fWidth
;
405 *(int32_t*)(args
[0].ptrValue
) = (int32_t)(UINT32_MAX
& info
->fWidth
);
409 /* we converted 0 args */
414 u_scanf_double_handler(UFILE
*input
,
415 u_scanf_spec_info
*info
,
418 int32_t *fmtConsumed
,
419 int32_t *argConverted
)
423 UNumberFormat
*format
;
424 int32_t parsePos
= 0;
426 UErrorCode status
= U_ZERO_ERROR
;
429 /* skip all ws in the input */
430 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
432 /* fill the input's internal buffer */
433 ufile_fill_uchar_buffer(input
);
435 /* determine the size of the input's buffer */
436 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
438 /* truncate to the width, if specified */
439 if(info
->fWidth
!= -1)
440 len
= ufmt_min(len
, info
->fWidth
);
442 /* get the formatter */
443 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
452 /* parse the number */
453 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
455 if (!info
->fSkipArg
) {
457 *(double*)(args
[0].ptrValue
) = num
;
458 else if (info
->fIsLongDouble
)
459 *(long double*)(args
[0].ptrValue
) = num
;
461 *(float*)(args
[0].ptrValue
) = (float)num
;
464 /* mask off any necessary bits */
465 /* if(! info->fIsLong_double)
468 /* update the input's position to reflect consumed data */
469 input
->str
.fPos
+= parsePos
;
471 /* we converted 1 arg */
472 *argConverted
= !info
->fSkipArg
;
473 return parsePos
+ skipped
;
476 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
479 u_scanf_scientific_handler(UFILE
*input
,
480 u_scanf_spec_info
*info
,
483 int32_t *fmtConsumed
,
484 int32_t *argConverted
)
488 UNumberFormat
*format
;
489 int32_t parsePos
= 0;
491 UErrorCode status
= U_ZERO_ERROR
;
492 UChar srcExpBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
493 int32_t srcLen
, expLen
;
494 UChar expBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
497 /* skip all ws in the input */
498 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
500 /* fill the input's internal buffer */
501 ufile_fill_uchar_buffer(input
);
503 /* determine the size of the input's buffer */
504 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
506 /* truncate to the width, if specified */
507 if(info
->fWidth
!= -1)
508 len
= ufmt_min(len
, info
->fWidth
);
510 /* get the formatter */
511 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
517 /* set the appropriate flags on the formatter */
519 srcLen
= unum_getSymbol(format
,
520 UNUM_EXPONENTIAL_SYMBOL
,
525 /* Upper/lower case the e */
526 if (info
->fSpec
== (UChar
)0x65 /* e */) {
527 expLen
= u_strToLower(expBuf
, (int32_t)sizeof(expBuf
),
529 input
->str
.fBundle
.fLocale
,
533 expLen
= u_strToUpper(expBuf
, (int32_t)sizeof(expBuf
),
535 input
->str
.fBundle
.fLocale
,
539 unum_setSymbol(format
,
540 UNUM_EXPONENTIAL_SYMBOL
,
548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
549 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
551 /* parse the number */
552 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
554 if (!info
->fSkipArg
) {
556 *(double*)(args
[0].ptrValue
) = num
;
557 else if (info
->fIsLongDouble
)
558 *(long double*)(args
[0].ptrValue
) = num
;
560 *(float*)(args
[0].ptrValue
) = (float)num
;
563 /* mask off any necessary bits */
564 /* if(! info->fIsLong_double)
567 /* update the input's position to reflect consumed data */
568 input
->str
.fPos
+= parsePos
;
570 /* we converted 1 arg */
571 *argConverted
= !info
->fSkipArg
;
572 return parsePos
+ skipped
;
576 u_scanf_scidbl_handler(UFILE
*input
,
577 u_scanf_spec_info
*info
,
580 int32_t *fmtConsumed
,
581 int32_t *argConverted
)
585 UNumberFormat
*scientificFormat
, *genericFormat
;
586 /*int32_t scientificResult, genericResult;*/
587 double scientificResult
, genericResult
;
588 int32_t scientificParsePos
= 0, genericParsePos
= 0, parsePos
= 0;
590 UErrorCode scientificStatus
= U_ZERO_ERROR
;
591 UErrorCode genericStatus
= U_ZERO_ERROR
;
594 /* since we can't determine by scanning the characters whether */
595 /* a number was formatted in the 'f' or 'g' styles, parse the */
596 /* string with both formatters, and assume whichever one */
597 /* parsed the most is the correct formatter to use */
600 /* skip all ws in the input */
601 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
603 /* fill the input's internal buffer */
604 ufile_fill_uchar_buffer(input
);
606 /* determine the size of the input's buffer */
607 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
609 /* truncate to the width, if specified */
610 if(info
->fWidth
!= -1)
611 len
= ufmt_min(len
, info
->fWidth
);
613 /* get the formatters */
614 scientificFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
615 genericFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
618 if(scientificFormat
== 0 || genericFormat
== 0)
621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
622 skipped
+= u_scanf_skip_leading_positive_sign(input
, genericFormat
, &genericStatus
);
624 /* parse the number using each format*/
626 scientificResult
= unum_parseDouble(scientificFormat
, input
->str
.fPos
, len
,
627 &scientificParsePos
, &scientificStatus
);
629 genericResult
= unum_parseDouble(genericFormat
, input
->str
.fPos
, len
,
630 &genericParsePos
, &genericStatus
);
632 /* determine which parse made it farther */
633 if(scientificParsePos
> genericParsePos
) {
634 /* stash the result in num */
635 num
= scientificResult
;
636 /* update the input's position to reflect consumed data */
637 parsePos
+= scientificParsePos
;
640 /* stash the result in num */
642 /* update the input's position to reflect consumed data */
643 parsePos
+= genericParsePos
;
645 input
->str
.fPos
+= parsePos
;
647 if (!info
->fSkipArg
) {
649 *(double*)(args
[0].ptrValue
) = num
;
650 else if (info
->fIsLongDouble
)
651 *(long double*)(args
[0].ptrValue
) = num
;
653 *(float*)(args
[0].ptrValue
) = (float)num
;
656 /* mask off any necessary bits */
657 /* if(! info->fIsLong_double)
660 /* we converted 1 arg */
661 *argConverted
= !info
->fSkipArg
;
662 return parsePos
+ skipped
;
666 u_scanf_integer_handler(UFILE
*input
,
667 u_scanf_spec_info
*info
,
670 int32_t *fmtConsumed
,
671 int32_t *argConverted
)
674 void *num
= (void*) (args
[0].ptrValue
);
675 UNumberFormat
*format
;
676 int32_t parsePos
= 0;
678 UErrorCode status
= U_ZERO_ERROR
;
682 /* skip all ws in the input */
683 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
685 /* fill the input's internal buffer */
686 ufile_fill_uchar_buffer(input
);
688 /* determine the size of the input's buffer */
689 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
691 /* truncate to the width, if specified */
692 if(info
->fWidth
!= -1)
693 len
= ufmt_min(len
, info
->fWidth
);
695 /* get the formatter */
696 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
703 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
705 /* parse the number */
706 result
= unum_parseInt64(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
708 /* mask off any necessary bits */
709 if (!info
->fSkipArg
) {
711 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
712 else if (info
->fIsLongLong
)
713 *(int64_t*)num
= result
;
715 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
718 /* update the input's position to reflect consumed data */
719 input
->str
.fPos
+= parsePos
;
721 /* we converted 1 arg */
722 *argConverted
= !info
->fSkipArg
;
723 return parsePos
+ skipped
;
727 u_scanf_uinteger_handler(UFILE
*input
,
728 u_scanf_spec_info
*info
,
731 int32_t *fmtConsumed
,
732 int32_t *argConverted
)
734 /* TODO Fix this when Numberformat handles uint64_t */
735 return u_scanf_integer_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
739 u_scanf_percent_handler(UFILE
*input
,
740 u_scanf_spec_info
*info
,
743 int32_t *fmtConsumed
,
744 int32_t *argConverted
)
748 UNumberFormat
*format
;
749 int32_t parsePos
= 0;
751 UErrorCode status
= U_ZERO_ERROR
;
754 /* skip all ws in the input */
755 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
757 /* fill the input's internal buffer */
758 ufile_fill_uchar_buffer(input
);
760 /* determine the size of the input's buffer */
761 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
763 /* truncate to the width, if specified */
764 if(info
->fWidth
!= -1)
765 len
= ufmt_min(len
, info
->fWidth
);
767 /* get the formatter */
768 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_PERCENT
);
774 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
775 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
777 /* parse the number */
778 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
780 if (!info
->fSkipArg
) {
781 *(double*)(args
[0].ptrValue
) = num
;
784 /* mask off any necessary bits */
785 /* if(! info->fIsLong_double)
788 /* update the input's position to reflect consumed data */
789 input
->str
.fPos
+= parsePos
;
791 /* we converted 1 arg */
792 *argConverted
= !info
->fSkipArg
;
797 u_scanf_string_handler(UFILE
*input
,
798 u_scanf_spec_info
*info
,
801 int32_t *fmtConsumed
,
802 int32_t *argConverted
)
806 char *arg
= (char*)(args
[0].ptrValue
);
809 UErrorCode status
= U_ZERO_ERROR
;
813 UBool isNotEOF
= FALSE
;
815 /* skip all ws in the input */
816 if (info
->fIsString
) {
817 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
820 /* get the string one character at a time, truncating to the width */
823 /* open the default converter */
824 conv
= u_getDefaultConverter(&status
);
826 if(U_FAILURE(status
))
829 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
830 && (isNotEOF
= ufile_getch(input
, &c
))
831 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
834 if (!info
->fSkipArg
) {
835 /* put the character from the input onto the target */
837 /* Since we do this one character at a time, do it this way. */
838 if (info
->fWidth
> 0) {
839 limit
= alias
+ info
->fWidth
- count
;
842 limit
= alias
+ ucnv_getMaxCharSize(conv
);
845 /* convert the character to the default codepage */
846 ucnv_fromUnicode(conv
, &alias
, limit
, &source
, source
+ 1,
847 NULL
, TRUE
, &status
);
849 if(U_FAILURE(status
)) {
851 u_releaseDefaultConverter(conv
);
856 /* increment the count */
860 /* put the final character we read back on the input */
861 if (!info
->fSkipArg
) {
862 if ((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
)
865 /* add the terminator */
866 if (info
->fIsString
) {
872 u_releaseDefaultConverter(conv
);
874 /* we converted 1 arg */
875 *argConverted
= !info
->fSkipArg
;
876 return count
+ skipped
;
880 u_scanf_char_handler(UFILE
*input
,
881 u_scanf_spec_info
*info
,
884 int32_t *fmtConsumed
,
885 int32_t *argConverted
)
887 if (info
->fWidth
< 0) {
890 info
->fIsString
= FALSE
;
891 return u_scanf_string_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
895 u_scanf_ustring_handler(UFILE
*input
,
896 u_scanf_spec_info
*info
,
899 int32_t *fmtConsumed
,
900 int32_t *argConverted
)
902 UChar
*arg
= (UChar
*)(args
[0].ptrValue
);
907 UBool isNotEOF
= FALSE
;
909 /* skip all ws in the input */
910 if (info
->fIsString
) {
911 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
914 /* get the string one character at a time, truncating to the width */
917 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
918 && (isNotEOF
= ufile_getch(input
, &c
))
919 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
922 /* put the character from the input onto the target */
923 if (!info
->fSkipArg
) {
927 /* increment the count */
931 /* put the final character we read back on the input */
932 if (!info
->fSkipArg
) {
933 if((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
) {
937 /* add the terminator */
938 if (info
->fIsString
) {
943 /* we converted 1 arg */
944 *argConverted
= !info
->fSkipArg
;
945 return count
+ skipped
;
949 u_scanf_uchar_handler(UFILE
*input
,
950 u_scanf_spec_info
*info
,
953 int32_t *fmtConsumed
,
954 int32_t *argConverted
)
956 if (info
->fWidth
< 0) {
959 info
->fIsString
= FALSE
;
960 return u_scanf_ustring_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
964 u_scanf_spellout_handler(UFILE
*input
,
965 u_scanf_spec_info
*info
,
968 int32_t *fmtConsumed
,
969 int32_t *argConverted
)
973 UNumberFormat
*format
;
974 int32_t parsePos
= 0;
976 UErrorCode status
= U_ZERO_ERROR
;
979 /* skip all ws in the input */
980 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
982 /* fill the input's internal buffer */
983 ufile_fill_uchar_buffer(input
);
985 /* determine the size of the input's buffer */
986 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
988 /* truncate to the width, if specified */
989 if(info
->fWidth
!= -1)
990 len
= ufmt_min(len
, info
->fWidth
);
992 /* get the formatter */
993 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SPELLOUT
);
999 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1000 /* This is not applicable to RBNF. */
1001 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1003 /* parse the number */
1004 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
1006 if (!info
->fSkipArg
) {
1007 *(double*)(args
[0].ptrValue
) = num
;
1010 /* mask off any necessary bits */
1011 /* if(! info->fIsLong_double)
1014 /* update the input's position to reflect consumed data */
1015 input
->str
.fPos
+= parsePos
;
1017 /* we converted 1 arg */
1018 *argConverted
= !info
->fSkipArg
;
1019 return parsePos
+ skipped
;
1023 u_scanf_hex_handler(UFILE
*input
,
1024 u_scanf_spec_info
*info
,
1027 int32_t *fmtConsumed
,
1028 int32_t *argConverted
)
1032 void *num
= (void*) (args
[0].ptrValue
);
1035 /* skip all ws in the input */
1036 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1038 /* fill the input's internal buffer */
1039 ufile_fill_uchar_buffer(input
);
1041 /* determine the size of the input's buffer */
1042 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1044 /* truncate to the width, if specified */
1045 if(info
->fWidth
!= -1)
1046 len
= ufmt_min(len
, info
->fWidth
);
1048 /* check for alternate form */
1049 if( *(input
->str
.fPos
) == 0x0030 &&
1050 (*(input
->str
.fPos
+ 1) == 0x0078 || *(input
->str
.fPos
+ 1) == 0x0058) ) {
1052 /* skip the '0' and 'x' or 'X' if present */
1053 input
->str
.fPos
+= 2;
1057 /* parse the number */
1058 result
= ufmt_uto64(input
->str
.fPos
, &len
, 16);
1060 /* update the input's position to reflect consumed data */
1061 input
->str
.fPos
+= len
;
1063 /* mask off any necessary bits */
1064 if (!info
->fSkipArg
) {
1066 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1067 else if (info
->fIsLongLong
)
1068 *(int64_t*)num
= result
;
1070 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1073 /* we converted 1 arg */
1074 *argConverted
= !info
->fSkipArg
;
1075 return len
+ skipped
;
1079 u_scanf_octal_handler(UFILE
*input
,
1080 u_scanf_spec_info
*info
,
1083 int32_t *fmtConsumed
,
1084 int32_t *argConverted
)
1088 void *num
= (void*) (args
[0].ptrValue
);
1091 /* skip all ws in the input */
1092 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1094 /* fill the input's internal buffer */
1095 ufile_fill_uchar_buffer(input
);
1097 /* determine the size of the input's buffer */
1098 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1100 /* truncate to the width, if specified */
1101 if(info
->fWidth
!= -1)
1102 len
= ufmt_min(len
, info
->fWidth
);
1104 /* parse the number */
1105 result
= ufmt_uto64(input
->str
.fPos
, &len
, 8);
1107 /* update the input's position to reflect consumed data */
1108 input
->str
.fPos
+= len
;
1110 /* mask off any necessary bits */
1111 if (!info
->fSkipArg
) {
1113 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1114 else if (info
->fIsLongLong
)
1115 *(int64_t*)num
= result
;
1117 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1120 /* we converted 1 arg */
1121 *argConverted
= !info
->fSkipArg
;
1122 return len
+ skipped
;
1126 u_scanf_pointer_handler(UFILE
*input
,
1127 u_scanf_spec_info
*info
,
1130 int32_t *fmtConsumed
,
1131 int32_t *argConverted
)
1136 void **p
= (void**)(args
[0].ptrValue
);
1139 /* skip all ws in the input */
1140 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1142 /* fill the input's internal buffer */
1143 ufile_fill_uchar_buffer(input
);
1145 /* determine the size of the input's buffer */
1146 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1148 /* truncate to the width, if specified */
1149 if(info
->fWidth
!= -1) {
1150 len
= ufmt_min(len
, info
->fWidth
);
1153 /* Make sure that we don't consume too much */
1154 if (len
> (int32_t)(sizeof(void*)*2)) {
1155 len
= (int32_t)(sizeof(void*)*2);
1158 /* parse the pointer - assign to temporary value */
1159 result
= ufmt_utop(input
->str
.fPos
, &len
);
1161 if (!info
->fSkipArg
) {
1165 /* update the input's position to reflect consumed data */
1166 input
->str
.fPos
+= len
;
1168 /* we converted 1 arg */
1169 *argConverted
= !info
->fSkipArg
;
1170 return len
+ skipped
;
1174 u_scanf_scanset_handler(UFILE
*input
,
1175 u_scanf_spec_info
*info
,
1178 int32_t *fmtConsumed
,
1179 int32_t *argConverted
)
1182 UErrorCode status
= U_ZERO_ERROR
;
1183 int32_t chLeft
= INT32_MAX
;
1185 UChar
*alias
= (UChar
*) (args
[0].ptrValue
);
1186 UBool isNotEOF
= FALSE
;
1187 UBool readCharacter
= FALSE
;
1189 /* Create an empty set */
1190 scanset
= uset_open(0, -1);
1192 /* Back up one to get the [ */
1195 /* truncate to the width, if specified and alias the target */
1196 if(info
->fWidth
>= 0) {
1197 chLeft
= info
->fWidth
;
1200 /* parse the scanset from the fmt string */
1201 *fmtConsumed
= uset_applyPattern(scanset
, fmt
, -1, 0, &status
);
1203 /* verify that the parse was successful */
1204 if (U_SUCCESS(status
)) {
1207 /* grab characters one at a time and make sure they are in the scanset */
1209 if ((isNotEOF
= ufile_getch32(input
, &c
)) && uset_contains(scanset
, c
)) {
1210 readCharacter
= TRUE
;
1211 if (!info
->fSkipArg
) {
1213 UBool isError
= FALSE
;
1215 U16_APPEND(alias
, idx
, chLeft
, c
, isError
);
1221 chLeft
-= (1 + U_IS_SUPPLEMENTARY(c
));
1224 /* if the character's not in the scanset, break out */
1229 /* put the final character we read back on the input */
1230 if(isNotEOF
&& chLeft
> 0) {
1231 u_fungetc(c
, input
);
1235 uset_close(scanset
);
1237 /* if we didn't match at least 1 character, fail */
1240 /* otherwise, add the terminator */
1241 else if (!info
->fSkipArg
) {
1245 /* we converted 1 arg */
1246 *argConverted
= !info
->fSkipArg
;
1247 return (info
->fWidth
>= 0 ? info
->fWidth
: INT32_MAX
) - chLeft
;
1250 /* Use US-ASCII characters only for formatting. Most codepages have
1251 characters 20-7F from Unicode. Using any other codepage specific
1252 characters will make it very difficult to format the string on
1253 non-Unicode machines */
1254 static const u_scanf_info g_u_scanf_infos
[USCANF_NUM_FMT_HANDLERS
] = {
1256 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1257 UFMT_EMPTY
, UFMT_SIMPLE_PERCENT
,UFMT_EMPTY
, UFMT_EMPTY
,
1258 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1259 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1262 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1263 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1264 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1265 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1268 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
,
1269 UFMT_EMPTY
, UFMT_SCIENTIFIC
, UFMT_EMPTY
, UFMT_SCIDBL
,
1270 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1271 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
/*deprecated*/,
1273 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1275 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1278 UFMT_PERCENT
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_USTRING
,
1279 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1280 UFMT_EMPTY
, UFMT_USTRING
/*deprecated*/,UFMT_SPELLOUT
, UFMT_EMPTY
,
1282 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SPELLOUT
, UFMT_EMPTY
,
1284 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SCANSET
,
1285 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1288 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_CHAR
,
1289 UFMT_INT
, UFMT_SCIENTIFIC
, UFMT_DOUBLE
, UFMT_SCIDBL
,
1290 UFMT_EMPTY
, UFMT_INT
, UFMT_EMPTY
, UFMT_EMPTY
,
1291 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_COUNT
, UFMT_OCTAL
,
1294 UFMT_POINTER
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_STRING
,
1295 UFMT_EMPTY
, UFMT_UINT
, UFMT_EMPTY
, UFMT_EMPTY
,
1296 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1297 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1301 u_scanf_parse(UFILE
*f
,
1302 const UChar
*patternSpecification
,
1306 int32_t count
, converted
, argConsumed
, cpConsumed
;
1307 uint16_t handlerNum
;
1311 ufmt_type_info info
;
1312 u_scanf_handler handler
;
1314 /* alias the pattern */
1315 alias
= patternSpecification
;
1317 /* haven't converted anything yet */
1322 /* iterate through the pattern */
1325 /* match any characters up to the next '%' */
1326 while(*alias
!= UP_PERCENT
&& *alias
!= 0x0000 && u_fgetc(f
) == *alias
) {
1330 /* if we aren't at a '%', or if we're at end of string, break*/
1331 if(*alias
!= UP_PERCENT
|| *alias
== 0x0000)
1334 /* parse the specifier */
1335 count
= u_scanf_parse_spec(alias
, &spec
);
1337 /* update the pointer in pattern */
1340 handlerNum
= (uint16_t)(spec
.fInfo
.fSpec
- USCANF_BASE_FMT_HANDLERS
);
1341 if (handlerNum
< USCANF_NUM_FMT_HANDLERS
) {
1342 /* skip the argument, if necessary */
1343 /* query the info function for argument information */
1344 info
= g_u_scanf_infos
[ handlerNum
].info
;
1345 if (info
!= ufmt_count
&& u_feof(f
)) {
1348 else if(spec
.fInfo
.fSkipArg
) {
1349 args
.ptrValue
= NULL
;
1354 /* set the spec's width to the # of items converted */
1355 spec
.fInfo
.fWidth
= cpConsumed
;
1356 /* fall through to next case */
1365 args
.ptrValue
= va_arg(ap
, void*);
1369 /* else args is ignored */
1370 args
.ptrValue
= NULL
;
1375 /* call the handler function */
1376 handler
= g_u_scanf_infos
[ handlerNum
].handler
;
1379 /* reset count to 1 so that += for alias works. */
1382 cpConsumed
+= (*handler
)(f
, &spec
.fInfo
, &args
, alias
, &count
, &argConsumed
);
1384 /* if the handler encountered an error condition, break */
1385 if(argConsumed
< 0) {
1390 /* add to the # of items converted */
1391 converted
+= argConsumed
;
1393 /* update the pointer in pattern */
1396 /* else do nothing */
1398 /* else do nothing */
1400 /* just ignore unknown tags */
1403 /* return # of items converted */
1407 #endif /* #if !UCONFIG_NO_FORMATTING */