2 *******************************************************************************
4 * Copyright (C) 1998-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 12/02/98 stephen Creation.
15 * 03/13/99 stephen Modified for new C API.
16 *******************************************************************************
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_FORMATTING
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unum.h"
26 #include "unicode/udat.h"
27 #include "unicode/uset.h"
36 /* flag characters for u_scanf */
37 #define FLAG_ASTERISK 0x002A
38 #define FLAG_PAREN 0x0028
40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
43 /* special characters for u_scanf */
44 #define SPEC_DOLLARSIGN 0x0024
47 #define DIGIT_ZERO 0x0030
48 #define DIGIT_ONE 0x0031
49 #define DIGIT_TWO 0x0032
50 #define DIGIT_THREE 0x0033
51 #define DIGIT_FOUR 0x0034
52 #define DIGIT_FIVE 0x0035
53 #define DIGIT_SIX 0x0036
54 #define DIGIT_SEVEN 0x0037
55 #define DIGIT_EIGHT 0x0038
56 #define DIGIT_NINE 0x0039
58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
69 /* u_scanf modifiers */
71 #define MOD_LOWERL 0x006C
74 #define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
79 * Struct encapsulating a single uscanf format specification.
81 typedef struct u_scanf_spec_info
{
82 int32_t fWidth
; /* Width */
84 UChar fSpec
; /* Format specification */
86 UChar fPadChar
; /* Padding character */
88 UBool fSkipArg
; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble
; /* L flag */
90 UBool fIsShort
; /* h flag */
91 UBool fIsLong
; /* l flag */
92 UBool fIsLongLong
; /* ll flag */
93 UBool fIsString
; /* TRUE if this is a NULL-terminated string. */
98 * Struct encapsulating a single u_scanf format specification.
100 typedef struct u_scanf_spec
{
101 u_scanf_spec_info fInfo
; /* Information on this spec */
102 int32_t fArgPos
; /* Position of data in arg list */
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
110 * @return The number of characters contained in this specifier.
113 u_scanf_parse_spec (const UChar
*fmt
,
116 const UChar
*s
= fmt
;
118 u_scanf_spec_info
*info
= &(spec
->fInfo
);
120 /* initialize spec to default values */
124 info
->fSpec
= 0x0000;
125 info
->fPadChar
= 0x0020;
126 info
->fSkipArg
= FALSE
;
127 info
->fIsLongDouble
= FALSE
;
128 info
->fIsShort
= FALSE
;
129 info
->fIsLong
= FALSE
;
130 info
->fIsLongLong
= FALSE
;
131 info
->fIsString
= TRUE
;
134 /* skip over the initial '%' */
137 /* Check for positional argument */
140 /* Save the current position */
143 /* handle positional parameters */
145 spec
->fArgPos
= (int) (*s
++ - DIGIT_ZERO
);
149 spec
->fArgPos
+= (int) (*s
++ - DIGIT_ZERO
);
153 /* if there is no '$', don't read anything */
154 if(*s
!= SPEC_DOLLARSIGN
) {
163 /* Get any format flags */
169 info
->fSkipArg
= TRUE
;
172 /* pad character specified */
175 /* first four characters are hex values for pad char */
176 info
->fPadChar
= (UChar
)ufmt_digitvalue(*s
++);
177 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
178 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
179 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
181 /* final character is ignored */
190 info
->fWidth
= (int) (*s
++ - DIGIT_ZERO
);
194 info
->fWidth
+= (int) (*s
++ - DIGIT_ZERO
);
198 /* Get any modifiers */
204 info
->fIsShort
= TRUE
;
207 /* long or long long */
209 if(*s
== MOD_LOWERL
) {
210 info
->fIsLongLong
= TRUE
;
211 /* skip over the next 'l' */
215 info
->fIsLong
= TRUE
;
220 info
->fIsLongDouble
= TRUE
;
225 /* finally, get the specifier letter */
228 /* return # of characters in this specifier */
229 return (int32_t)(s
- fmt
);
232 #define UP_PERCENT 0x0025
235 /* ANSI style formatting */
236 /* Use US-ASCII characters only for formatting */
239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
263 /* non-ANSI extensions */
264 /* Use US-ASCII characters only for formatting */
267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272 /* C K is old format */
273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274 /* S U is old format */
275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
278 #define UFMT_EMPTY {ufmt_empty, NULL}
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
294 * @return The number of code points consumed during reading.
296 typedef int32_t (*u_scanf_handler
) (UFILE
*stream
,
297 u_scanf_spec_info
*info
,
300 int32_t *fmtConsumed
,
301 int32_t *argConverted
);
303 typedef struct u_scanf_info
{
305 u_scanf_handler handler
;
308 #define USCANF_NUM_FMT_HANDLERS 108
309 #define USCANF_SYMBOL_BUFFER_SIZE 8
311 /* We do not use handlers for 0-0x1f */
312 #define USCANF_BASE_FMT_HANDLERS 0x20
316 u_scanf_skip_leading_ws(UFILE
*input
,
323 /* skip all leading ws in the input */
324 while( (isNotEOF
= ufile_getch(input
, &c
)) && (c
== pad
|| u_isWhitespace(c
)) )
329 /* put the final character back on the input */
336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
338 u_scanf_skip_leading_positive_sign(UFILE
*input
,
339 UNumberFormat
*format
,
345 UChar plusSymbol
[USCANF_SYMBOL_BUFFER_SIZE
];
347 UErrorCode localStatus
= U_ZERO_ERROR
;
349 if (U_SUCCESS(*status
)) {
350 symbolLen
= unum_getSymbol(format
,
351 UNUM_PLUS_SIGN_SYMBOL
,
353 sizeof(plusSymbol
)/sizeof(*plusSymbol
),
356 if (U_SUCCESS(localStatus
)) {
357 /* skip all leading ws in the input */
358 while( (isNotEOF
= ufile_getch(input
, &c
)) && (count
< symbolLen
&& c
== plusSymbol
[count
]) )
363 /* put the final character back on the input */
374 u_scanf_simple_percent_handler(UFILE
*input
,
375 u_scanf_spec_info
*info
,
378 int32_t *fmtConsumed
,
379 int32_t *argConverted
)
381 /* make sure the next character in the input is a percent */
383 if(u_fgetc(input
) != 0x0025) {
390 u_scanf_count_handler(UFILE
*input
,
391 u_scanf_spec_info
*info
,
394 int32_t *fmtConsumed
,
395 int32_t *argConverted
)
397 /* in the special case of count, the u_scanf_spec_info's width */
398 /* will contain the # of items converted thus far */
399 if (!info
->fSkipArg
) {
401 *(int16_t*)(args
[0].ptrValue
) = (int16_t)(UINT16_MAX
& info
->fWidth
);
402 else if (info
->fIsLongLong
)
403 *(int64_t*)(args
[0].ptrValue
) = info
->fWidth
;
405 *(int32_t*)(args
[0].ptrValue
) = (int32_t)(UINT32_MAX
& info
->fWidth
);
409 /* we converted 0 args */
414 u_scanf_double_handler(UFILE
*input
,
415 u_scanf_spec_info
*info
,
418 int32_t *fmtConsumed
,
419 int32_t *argConverted
)
423 UNumberFormat
*format
;
424 int32_t parsePos
= 0;
426 UErrorCode status
= U_ZERO_ERROR
;
429 /* skip all ws in the input */
430 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
432 /* fill the input's internal buffer */
433 ufile_fill_uchar_buffer(input
);
435 /* determine the size of the input's buffer */
436 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
438 /* truncate to the width, if specified */
439 if(info
->fWidth
!= -1)
440 len
= ufmt_min(len
, info
->fWidth
);
442 /* get the formatter */
443 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
452 /* parse the number */
453 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
455 if (!info
->fSkipArg
) {
457 *(double*)(args
[0].ptrValue
) = num
;
458 else if (info
->fIsLongDouble
)
459 *(long double*)(args
[0].ptrValue
) = num
;
461 *(float*)(args
[0].ptrValue
) = (float)num
;
464 /* mask off any necessary bits */
465 /* if(! info->fIsLong_double)
468 /* update the input's position to reflect consumed data */
469 input
->str
.fPos
+= parsePos
;
471 /* we converted 1 arg */
472 *argConverted
= !info
->fSkipArg
;
473 return parsePos
+ skipped
;
476 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
479 u_scanf_scientific_handler(UFILE
*input
,
480 u_scanf_spec_info
*info
,
483 int32_t *fmtConsumed
,
484 int32_t *argConverted
)
488 UNumberFormat
*format
;
489 int32_t parsePos
= 0;
491 UErrorCode status
= U_ZERO_ERROR
;
492 UChar srcExpBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
493 int32_t srcLen
, expLen
;
494 UChar expBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
497 /* skip all ws in the input */
498 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
500 /* fill the input's internal buffer */
501 ufile_fill_uchar_buffer(input
);
503 /* determine the size of the input's buffer */
504 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
506 /* truncate to the width, if specified */
507 if(info
->fWidth
!= -1)
508 len
= ufmt_min(len
, info
->fWidth
);
510 /* get the formatter */
511 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
517 /* set the appropriate flags on the formatter */
519 srcLen
= unum_getSymbol(format
,
520 UNUM_EXPONENTIAL_SYMBOL
,
525 /* Upper/lower case the e */
526 if (info
->fSpec
== (UChar
)0x65 /* e */) {
527 expLen
= u_strToLower(expBuf
, (int32_t)sizeof(expBuf
),
529 input
->str
.fBundle
.fLocale
,
533 expLen
= u_strToUpper(expBuf
, (int32_t)sizeof(expBuf
),
535 input
->str
.fBundle
.fLocale
,
539 unum_setSymbol(format
,
540 UNUM_EXPONENTIAL_SYMBOL
,
548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
549 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
551 /* parse the number */
552 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
554 if (!info
->fSkipArg
) {
556 *(double*)(args
[0].ptrValue
) = num
;
557 else if (info
->fIsLongDouble
)
558 *(long double*)(args
[0].ptrValue
) = num
;
560 *(float*)(args
[0].ptrValue
) = (float)num
;
563 /* mask off any necessary bits */
564 /* if(! info->fIsLong_double)
567 /* update the input's position to reflect consumed data */
568 input
->str
.fPos
+= parsePos
;
570 /* we converted 1 arg */
571 *argConverted
= !info
->fSkipArg
;
572 return parsePos
+ skipped
;
576 u_scanf_scidbl_handler(UFILE
*input
,
577 u_scanf_spec_info
*info
,
580 int32_t *fmtConsumed
,
581 int32_t *argConverted
)
585 UNumberFormat
*scientificFormat
, *genericFormat
;
586 /*int32_t scientificResult, genericResult;*/
587 double scientificResult
, genericResult
;
588 int32_t scientificParsePos
= 0, genericParsePos
= 0, parsePos
= 0;
590 UErrorCode scientificStatus
= U_ZERO_ERROR
;
591 UErrorCode genericStatus
= U_ZERO_ERROR
;
594 /* since we can't determine by scanning the characters whether */
595 /* a number was formatted in the 'f' or 'g' styles, parse the */
596 /* string with both formatters, and assume whichever one */
597 /* parsed the most is the correct formatter to use */
600 /* skip all ws in the input */
601 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
603 /* fill the input's internal buffer */
604 ufile_fill_uchar_buffer(input
);
606 /* determine the size of the input's buffer */
607 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
609 /* truncate to the width, if specified */
610 if(info
->fWidth
!= -1)
611 len
= ufmt_min(len
, info
->fWidth
);
613 /* get the formatters */
614 scientificFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
615 genericFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
618 if(scientificFormat
== 0 || genericFormat
== 0)
621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
622 skipped
+= u_scanf_skip_leading_positive_sign(input
, genericFormat
, &genericStatus
);
624 /* parse the number using each format*/
626 scientificResult
= unum_parseDouble(scientificFormat
, input
->str
.fPos
, len
,
627 &scientificParsePos
, &scientificStatus
);
629 genericResult
= unum_parseDouble(genericFormat
, input
->str
.fPos
, len
,
630 &genericParsePos
, &genericStatus
);
632 /* determine which parse made it farther */
633 if(scientificParsePos
> genericParsePos
) {
634 /* stash the result in num */
635 num
= scientificResult
;
636 /* update the input's position to reflect consumed data */
637 parsePos
+= scientificParsePos
;
640 /* stash the result in num */
642 /* update the input's position to reflect consumed data */
643 parsePos
+= genericParsePos
;
645 input
->str
.fPos
+= parsePos
;
647 if (!info
->fSkipArg
) {
649 *(double*)(args
[0].ptrValue
) = num
;
650 else if (info
->fIsLongDouble
)
651 *(long double*)(args
[0].ptrValue
) = num
;
653 *(float*)(args
[0].ptrValue
) = (float)num
;
656 /* mask off any necessary bits */
657 /* if(! info->fIsLong_double)
660 /* we converted 1 arg */
661 *argConverted
= !info
->fSkipArg
;
662 return parsePos
+ skipped
;
666 u_scanf_integer_handler(UFILE
*input
,
667 u_scanf_spec_info
*info
,
670 int32_t *fmtConsumed
,
671 int32_t *argConverted
)
674 void *num
= (void*) (args
[0].ptrValue
);
675 UNumberFormat
*format
;
676 int32_t parsePos
= 0;
678 UErrorCode status
= U_ZERO_ERROR
;
682 /* skip all ws in the input */
683 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
685 /* fill the input's internal buffer */
686 ufile_fill_uchar_buffer(input
);
688 /* determine the size of the input's buffer */
689 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
691 /* truncate to the width, if specified */
692 if(info
->fWidth
!= -1)
693 len
= ufmt_min(len
, info
->fWidth
);
695 /* get the formatter */
696 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
703 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
705 /* parse the number */
706 result
= unum_parseInt64(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
708 /* mask off any necessary bits */
709 if (!info
->fSkipArg
) {
711 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
712 else if (info
->fIsLongLong
)
713 *(int64_t*)num
= result
;
715 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
718 /* update the input's position to reflect consumed data */
719 input
->str
.fPos
+= parsePos
;
721 /* we converted 1 arg */
722 *argConverted
= !info
->fSkipArg
;
723 return parsePos
+ skipped
;
727 u_scanf_uinteger_handler(UFILE
*input
,
728 u_scanf_spec_info
*info
,
731 int32_t *fmtConsumed
,
732 int32_t *argConverted
)
734 /* TODO Fix this when Numberformat handles uint64_t */
735 return u_scanf_integer_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
739 u_scanf_percent_handler(UFILE
*input
,
740 u_scanf_spec_info
*info
,
743 int32_t *fmtConsumed
,
744 int32_t *argConverted
)
748 UNumberFormat
*format
;
749 int32_t parsePos
= 0;
750 UErrorCode status
= U_ZERO_ERROR
;
753 /* skip all ws in the input */
754 u_scanf_skip_leading_ws(input
, info
->fPadChar
);
756 /* fill the input's internal buffer */
757 ufile_fill_uchar_buffer(input
);
759 /* determine the size of the input's buffer */
760 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
762 /* truncate to the width, if specified */
763 if(info
->fWidth
!= -1)
764 len
= ufmt_min(len
, info
->fWidth
);
766 /* get the formatter */
767 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_PERCENT
);
773 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
774 u_scanf_skip_leading_positive_sign(input
, format
, &status
);
776 /* parse the number */
777 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
779 if (!info
->fSkipArg
) {
780 *(double*)(args
[0].ptrValue
) = num
;
783 /* mask off any necessary bits */
784 /* if(! info->fIsLong_double)
787 /* update the input's position to reflect consumed data */
788 input
->str
.fPos
+= parsePos
;
790 /* we converted 1 arg */
791 *argConverted
= !info
->fSkipArg
;
796 u_scanf_string_handler(UFILE
*input
,
797 u_scanf_spec_info
*info
,
800 int32_t *fmtConsumed
,
801 int32_t *argConverted
)
805 char *arg
= (char*)(args
[0].ptrValue
);
808 UErrorCode status
= U_ZERO_ERROR
;
812 UBool isNotEOF
= FALSE
;
814 /* skip all ws in the input */
815 if (info
->fIsString
) {
816 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
819 /* get the string one character at a time, truncating to the width */
822 /* open the default converter */
823 conv
= u_getDefaultConverter(&status
);
825 if(U_FAILURE(status
))
828 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
829 && (isNotEOF
= ufile_getch(input
, &c
))
830 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
833 if (!info
->fSkipArg
) {
834 /* put the character from the input onto the target */
836 /* Since we do this one character at a time, do it this way. */
837 if (info
->fWidth
> 0) {
838 limit
= alias
+ info
->fWidth
- count
;
841 limit
= alias
+ ucnv_getMaxCharSize(conv
);
844 /* convert the character to the default codepage */
845 ucnv_fromUnicode(conv
, &alias
, limit
, &source
, source
+ 1,
846 NULL
, TRUE
, &status
);
848 if(U_FAILURE(status
)) {
850 u_releaseDefaultConverter(conv
);
855 /* increment the count */
859 /* put the final character we read back on the input */
860 if (!info
->fSkipArg
) {
861 if ((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
)
864 /* add the terminator */
865 if (info
->fIsString
) {
871 u_releaseDefaultConverter(conv
);
873 /* we converted 1 arg */
874 *argConverted
= !info
->fSkipArg
;
875 return count
+ skipped
;
879 u_scanf_char_handler(UFILE
*input
,
880 u_scanf_spec_info
*info
,
883 int32_t *fmtConsumed
,
884 int32_t *argConverted
)
886 if (info
->fWidth
< 0) {
889 info
->fIsString
= FALSE
;
890 return u_scanf_string_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
894 u_scanf_ustring_handler(UFILE
*input
,
895 u_scanf_spec_info
*info
,
898 int32_t *fmtConsumed
,
899 int32_t *argConverted
)
901 UChar
*arg
= (UChar
*)(args
[0].ptrValue
);
906 UBool isNotEOF
= FALSE
;
908 /* skip all ws in the input */
909 if (info
->fIsString
) {
910 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
913 /* get the string one character at a time, truncating to the width */
916 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
917 && (isNotEOF
= ufile_getch(input
, &c
))
918 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
921 /* put the character from the input onto the target */
922 if (!info
->fSkipArg
) {
926 /* increment the count */
930 /* put the final character we read back on the input */
931 if (!info
->fSkipArg
) {
932 if((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
) {
936 /* add the terminator */
937 if (info
->fIsString
) {
942 /* we converted 1 arg */
943 *argConverted
= !info
->fSkipArg
;
944 return count
+ skipped
;
948 u_scanf_uchar_handler(UFILE
*input
,
949 u_scanf_spec_info
*info
,
952 int32_t *fmtConsumed
,
953 int32_t *argConverted
)
955 if (info
->fWidth
< 0) {
958 info
->fIsString
= FALSE
;
959 return u_scanf_ustring_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
963 u_scanf_spellout_handler(UFILE
*input
,
964 u_scanf_spec_info
*info
,
967 int32_t *fmtConsumed
,
968 int32_t *argConverted
)
972 UNumberFormat
*format
;
973 int32_t parsePos
= 0;
975 UErrorCode status
= U_ZERO_ERROR
;
978 /* skip all ws in the input */
979 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
981 /* fill the input's internal buffer */
982 ufile_fill_uchar_buffer(input
);
984 /* determine the size of the input's buffer */
985 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
987 /* truncate to the width, if specified */
988 if(info
->fWidth
!= -1)
989 len
= ufmt_min(len
, info
->fWidth
);
991 /* get the formatter */
992 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SPELLOUT
);
998 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
999 /* This is not applicable to RBNF. */
1000 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1002 /* parse the number */
1003 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
1005 if (!info
->fSkipArg
) {
1006 *(double*)(args
[0].ptrValue
) = num
;
1009 /* mask off any necessary bits */
1010 /* if(! info->fIsLong_double)
1013 /* update the input's position to reflect consumed data */
1014 input
->str
.fPos
+= parsePos
;
1016 /* we converted 1 arg */
1017 *argConverted
= !info
->fSkipArg
;
1018 return parsePos
+ skipped
;
1022 u_scanf_hex_handler(UFILE
*input
,
1023 u_scanf_spec_info
*info
,
1026 int32_t *fmtConsumed
,
1027 int32_t *argConverted
)
1031 void *num
= (void*) (args
[0].ptrValue
);
1034 /* skip all ws in the input */
1035 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1037 /* fill the input's internal buffer */
1038 ufile_fill_uchar_buffer(input
);
1040 /* determine the size of the input's buffer */
1041 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1043 /* truncate to the width, if specified */
1044 if(info
->fWidth
!= -1)
1045 len
= ufmt_min(len
, info
->fWidth
);
1047 /* check for alternate form */
1048 if( *(input
->str
.fPos
) == 0x0030 &&
1049 (*(input
->str
.fPos
+ 1) == 0x0078 || *(input
->str
.fPos
+ 1) == 0x0058) ) {
1051 /* skip the '0' and 'x' or 'X' if present */
1052 input
->str
.fPos
+= 2;
1056 /* parse the number */
1057 result
= ufmt_uto64(input
->str
.fPos
, &len
, 16);
1059 /* update the input's position to reflect consumed data */
1060 input
->str
.fPos
+= len
;
1062 /* mask off any necessary bits */
1063 if (!info
->fSkipArg
) {
1065 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1066 else if (info
->fIsLongLong
)
1067 *(int64_t*)num
= result
;
1069 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1072 /* we converted 1 arg */
1073 *argConverted
= !info
->fSkipArg
;
1074 return len
+ skipped
;
1078 u_scanf_octal_handler(UFILE
*input
,
1079 u_scanf_spec_info
*info
,
1082 int32_t *fmtConsumed
,
1083 int32_t *argConverted
)
1087 void *num
= (void*) (args
[0].ptrValue
);
1090 /* skip all ws in the input */
1091 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1093 /* fill the input's internal buffer */
1094 ufile_fill_uchar_buffer(input
);
1096 /* determine the size of the input's buffer */
1097 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1099 /* truncate to the width, if specified */
1100 if(info
->fWidth
!= -1)
1101 len
= ufmt_min(len
, info
->fWidth
);
1103 /* parse the number */
1104 result
= ufmt_uto64(input
->str
.fPos
, &len
, 8);
1106 /* update the input's position to reflect consumed data */
1107 input
->str
.fPos
+= len
;
1109 /* mask off any necessary bits */
1110 if (!info
->fSkipArg
) {
1112 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1113 else if (info
->fIsLongLong
)
1114 *(int64_t*)num
= result
;
1116 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1119 /* we converted 1 arg */
1120 *argConverted
= !info
->fSkipArg
;
1121 return len
+ skipped
;
1125 u_scanf_pointer_handler(UFILE
*input
,
1126 u_scanf_spec_info
*info
,
1129 int32_t *fmtConsumed
,
1130 int32_t *argConverted
)
1135 void **p
= (void**)(args
[0].ptrValue
);
1138 /* skip all ws in the input */
1139 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1141 /* fill the input's internal buffer */
1142 ufile_fill_uchar_buffer(input
);
1144 /* determine the size of the input's buffer */
1145 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1147 /* truncate to the width, if specified */
1148 if(info
->fWidth
!= -1) {
1149 len
= ufmt_min(len
, info
->fWidth
);
1152 /* Make sure that we don't consume too much */
1153 if (len
> (int32_t)(sizeof(void*)*2)) {
1154 len
= (int32_t)(sizeof(void*)*2);
1157 /* parse the pointer - assign to temporary value */
1158 result
= ufmt_utop(input
->str
.fPos
, &len
);
1160 if (!info
->fSkipArg
) {
1164 /* update the input's position to reflect consumed data */
1165 input
->str
.fPos
+= len
;
1167 /* we converted 1 arg */
1168 *argConverted
= !info
->fSkipArg
;
1169 return len
+ skipped
;
1173 u_scanf_scanset_handler(UFILE
*input
,
1174 u_scanf_spec_info
*info
,
1177 int32_t *fmtConsumed
,
1178 int32_t *argConverted
)
1181 UErrorCode status
= U_ZERO_ERROR
;
1182 int32_t chLeft
= INT32_MAX
;
1184 UChar
*alias
= (UChar
*) (args
[0].ptrValue
);
1185 UBool isNotEOF
= FALSE
;
1186 UBool readCharacter
= FALSE
;
1188 /* Create an empty set */
1189 scanset
= uset_open(0, -1);
1191 /* Back up one to get the [ */
1194 /* truncate to the width, if specified and alias the target */
1195 if(info
->fWidth
>= 0) {
1196 chLeft
= info
->fWidth
;
1199 /* parse the scanset from the fmt string */
1200 *fmtConsumed
= uset_applyPattern(scanset
, fmt
, -1, 0, &status
);
1202 /* verify that the parse was successful */
1203 if (U_SUCCESS(status
)) {
1206 /* grab characters one at a time and make sure they are in the scanset */
1208 if ((isNotEOF
= ufile_getch32(input
, &c
)) && uset_contains(scanset
, c
)) {
1209 readCharacter
= TRUE
;
1210 if (!info
->fSkipArg
) {
1212 UBool isError
= FALSE
;
1214 U16_APPEND(alias
, idx
, chLeft
, c
, isError
);
1220 chLeft
-= (1 + U_IS_SUPPLEMENTARY(c
));
1223 /* if the character's not in the scanset, break out */
1228 /* put the final character we read back on the input */
1229 if(isNotEOF
&& chLeft
> 0) {
1230 u_fungetc(c
, input
);
1234 uset_close(scanset
);
1236 /* if we didn't match at least 1 character, fail */
1239 /* otherwise, add the terminator */
1240 else if (!info
->fSkipArg
) {
1244 /* we converted 1 arg */
1245 *argConverted
= !info
->fSkipArg
;
1246 return (info
->fWidth
>= 0 ? info
->fWidth
: INT32_MAX
) - chLeft
;
1249 /* Use US-ASCII characters only for formatting. Most codepages have
1250 characters 20-7F from Unicode. Using any other codepage specific
1251 characters will make it very difficult to format the string on
1252 non-Unicode machines */
1253 static const u_scanf_info g_u_scanf_infos
[USCANF_NUM_FMT_HANDLERS
] = {
1255 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1256 UFMT_EMPTY
, UFMT_SIMPLE_PERCENT
,UFMT_EMPTY
, UFMT_EMPTY
,
1257 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1258 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1261 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1262 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1263 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1264 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1267 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
,
1268 UFMT_EMPTY
, UFMT_SCIENTIFIC
, UFMT_EMPTY
, UFMT_SCIDBL
,
1269 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1270 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
/*deprecated*/,
1272 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1274 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1277 UFMT_PERCENT
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_USTRING
,
1278 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1279 UFMT_EMPTY
, UFMT_USTRING
/*deprecated*/,UFMT_SPELLOUT
, UFMT_EMPTY
,
1281 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SPELLOUT
, UFMT_EMPTY
,
1283 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SCANSET
,
1284 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1287 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_CHAR
,
1288 UFMT_INT
, UFMT_SCIENTIFIC
, UFMT_DOUBLE
, UFMT_SCIDBL
,
1289 UFMT_EMPTY
, UFMT_INT
, UFMT_EMPTY
, UFMT_EMPTY
,
1290 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_COUNT
, UFMT_OCTAL
,
1293 UFMT_POINTER
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_STRING
,
1294 UFMT_EMPTY
, UFMT_UINT
, UFMT_EMPTY
, UFMT_EMPTY
,
1295 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1296 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1300 u_scanf_parse(UFILE
*f
,
1301 const UChar
*patternSpecification
,
1305 int32_t count
, converted
, argConsumed
, cpConsumed
;
1306 uint16_t handlerNum
;
1310 ufmt_type_info info
;
1311 u_scanf_handler handler
;
1313 /* alias the pattern */
1314 alias
= patternSpecification
;
1316 /* haven't converted anything yet */
1321 /* iterate through the pattern */
1324 /* match any characters up to the next '%' */
1325 while(*alias
!= UP_PERCENT
&& *alias
!= 0x0000 && u_fgetc(f
) == *alias
) {
1329 /* if we aren't at a '%', or if we're at end of string, break*/
1330 if(*alias
!= UP_PERCENT
|| *alias
== 0x0000)
1333 /* parse the specifier */
1334 count
= u_scanf_parse_spec(alias
, &spec
);
1336 /* update the pointer in pattern */
1339 handlerNum
= (uint16_t)(spec
.fInfo
.fSpec
- USCANF_BASE_FMT_HANDLERS
);
1340 if (handlerNum
< USCANF_NUM_FMT_HANDLERS
) {
1341 /* skip the argument, if necessary */
1342 /* query the info function for argument information */
1343 info
= g_u_scanf_infos
[ handlerNum
].info
;
1344 if (info
!= ufmt_count
&& u_feof(f
)) {
1347 else if(spec
.fInfo
.fSkipArg
) {
1348 args
.ptrValue
= NULL
;
1353 /* set the spec's width to the # of items converted */
1354 spec
.fInfo
.fWidth
= cpConsumed
;
1355 /* fall through to next case */
1364 args
.ptrValue
= va_arg(ap
, void*);
1368 /* else args is ignored */
1369 args
.ptrValue
= NULL
;
1374 /* call the handler function */
1375 handler
= g_u_scanf_infos
[ handlerNum
].handler
;
1378 /* reset count to 1 so that += for alias works. */
1381 cpConsumed
+= (*handler
)(f
, &spec
.fInfo
, &args
, alias
, &count
, &argConsumed
);
1383 /* if the handler encountered an error condition, break */
1384 if(argConsumed
< 0) {
1389 /* add to the # of items converted */
1390 converted
+= argConsumed
;
1392 /* update the pointer in pattern */
1395 /* else do nothing */
1397 /* else do nothing */
1399 /* just ignore unknown tags */
1402 /* return # of items converted */
1406 #endif /* #if !UCONFIG_NO_FORMATTING */