2 *******************************************************************************
4 * Copyright (C) 1998-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 12/02/98 stephen Creation.
15 * 03/13/99 stephen Modified for new C API.
16 *******************************************************************************
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_FORMATTING
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unum.h"
26 #include "unicode/udat.h"
27 #include "unicode/uset.h"
36 /* flag characters for u_scanf */
37 #define FLAG_ASTERISK 0x002A
38 #define FLAG_PAREN 0x0028
40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
43 /* special characters for u_scanf */
44 #define SPEC_DOLLARSIGN 0x0024
47 #define DIGIT_ZERO 0x0030
48 #define DIGIT_ONE 0x0031
49 #define DIGIT_TWO 0x0032
50 #define DIGIT_THREE 0x0033
51 #define DIGIT_FOUR 0x0034
52 #define DIGIT_FIVE 0x0035
53 #define DIGIT_SIX 0x0036
54 #define DIGIT_SEVEN 0x0037
55 #define DIGIT_EIGHT 0x0038
56 #define DIGIT_NINE 0x0039
58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
69 /* u_scanf modifiers */
71 #define MOD_LOWERL 0x006C
74 #define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
79 * Struct encapsulating a single uscanf format specification.
81 typedef struct u_scanf_spec_info
{
82 int32_t fWidth
; /* Width */
84 UChar fSpec
; /* Format specification */
86 UChar fPadChar
; /* Padding character */
88 UBool fSkipArg
; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble
; /* L flag */
90 UBool fIsShort
; /* h flag */
91 UBool fIsLong
; /* l flag */
92 UBool fIsLongLong
; /* ll flag */
93 UBool fIsString
; /* TRUE if this is a NULL-terminated string. */
98 * Struct encapsulating a single u_scanf format specification.
100 typedef struct u_scanf_spec
{
101 u_scanf_spec_info fInfo
; /* Information on this spec */
102 int32_t fArgPos
; /* Position of data in arg list */
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
110 * @return The number of characters contained in this specifier.
113 u_scanf_parse_spec (const UChar
*fmt
,
116 const UChar
*s
= fmt
;
118 u_scanf_spec_info
*info
= &(spec
->fInfo
);
120 /* initialize spec to default values */
124 info
->fSpec
= 0x0000;
125 info
->fPadChar
= 0x0020;
126 info
->fSkipArg
= FALSE
;
127 info
->fIsLongDouble
= FALSE
;
128 info
->fIsShort
= FALSE
;
129 info
->fIsLong
= FALSE
;
130 info
->fIsLongLong
= FALSE
;
131 info
->fIsString
= TRUE
;
134 /* skip over the initial '%' */
137 /* Check for positional argument */
140 /* Save the current position */
143 /* handle positional parameters */
145 spec
->fArgPos
= (int) (*s
++ - DIGIT_ZERO
);
149 spec
->fArgPos
+= (int) (*s
++ - DIGIT_ZERO
);
153 /* if there is no '$', don't read anything */
154 if(*s
!= SPEC_DOLLARSIGN
) {
163 /* Get any format flags */
169 info
->fSkipArg
= TRUE
;
172 /* pad character specified */
175 /* first four characters are hex values for pad char */
176 info
->fPadChar
= (UChar
)ufmt_digitvalue(*s
++);
177 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
178 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
179 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
181 /* final character is ignored */
190 info
->fWidth
= (int) (*s
++ - DIGIT_ZERO
);
194 info
->fWidth
+= (int) (*s
++ - DIGIT_ZERO
);
198 /* Get any modifiers */
204 info
->fIsShort
= TRUE
;
207 /* long or long long */
209 if(*s
== MOD_LOWERL
) {
210 info
->fIsLongLong
= TRUE
;
211 /* skip over the next 'l' */
215 info
->fIsLong
= TRUE
;
220 info
->fIsLongDouble
= TRUE
;
225 /* finally, get the specifier letter */
228 /* return # of characters in this specifier */
229 return (int32_t)(s
- fmt
);
232 #define UP_PERCENT 0x0025
235 /* ANSI style formatting */
236 /* Use US-ASCII characters only for formatting */
239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
263 /* non-ANSI extensions */
264 /* Use US-ASCII characters only for formatting */
267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272 /* C K is old format */
273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274 /* S U is old format */
275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
278 #define UFMT_EMPTY {ufmt_empty, NULL}
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
294 * @return The number of code points consumed during reading.
296 typedef int32_t (*u_scanf_handler
) (UFILE
*stream
,
297 u_scanf_spec_info
*info
,
300 int32_t *fmtConsumed
,
301 int32_t *argConverted
);
303 typedef struct u_scanf_info
{
305 u_scanf_handler handler
;
308 #define USCANF_NUM_FMT_HANDLERS 108
309 #define USCANF_SYMBOL_BUFFER_SIZE 8
311 /* We do not use handlers for 0-0x1f */
312 #define USCANF_BASE_FMT_HANDLERS 0x20
316 u_scanf_skip_leading_ws(UFILE
*input
,
323 /* skip all leading ws in the input */
324 while( (isNotEOF
= ufile_getch(input
, &c
)) && (c
== pad
|| u_isWhitespace(c
)) )
329 /* put the final character back on the input */
336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
338 u_scanf_skip_leading_positive_sign(UFILE
*input
,
339 UNumberFormat
*format
,
345 UChar plusSymbol
[USCANF_SYMBOL_BUFFER_SIZE
];
347 UErrorCode localStatus
= U_ZERO_ERROR
;
349 if (U_SUCCESS(*status
)) {
350 symbolLen
= unum_getSymbol(format
,
351 UNUM_PLUS_SIGN_SYMBOL
,
353 sizeof(plusSymbol
)/sizeof(*plusSymbol
),
356 if (U_SUCCESS(localStatus
)) {
357 /* skip all leading ws in the input */
358 while( (isNotEOF
= ufile_getch(input
, &c
)) && (count
< symbolLen
&& c
== plusSymbol
[count
]) )
363 /* put the final character back on the input */
374 u_scanf_simple_percent_handler(UFILE
*input
,
375 u_scanf_spec_info
*info
,
378 int32_t *fmtConsumed
,
379 int32_t *argConverted
)
381 /* make sure the next character in the input is a percent */
383 if(u_fgetc(input
) != 0x0025) {
390 u_scanf_count_handler(UFILE
*input
,
391 u_scanf_spec_info
*info
,
394 int32_t *fmtConsumed
,
395 int32_t *argConverted
)
397 /* in the special case of count, the u_scanf_spec_info's width */
398 /* will contain the # of items converted thus far */
399 if (!info
->fSkipArg
) {
401 *(int16_t*)(args
[0].ptrValue
) = (int16_t)(UINT16_MAX
& info
->fWidth
);
402 else if (info
->fIsLongLong
)
403 *(int64_t*)(args
[0].ptrValue
) = info
->fWidth
;
405 *(int32_t*)(args
[0].ptrValue
) = (int32_t)(UINT32_MAX
& info
->fWidth
);
409 /* we converted 0 args */
414 u_scanf_double_handler(UFILE
*input
,
415 u_scanf_spec_info
*info
,
418 int32_t *fmtConsumed
,
419 int32_t *argConverted
)
423 UNumberFormat
*format
;
424 int32_t parsePos
= 0;
426 UErrorCode status
= U_ZERO_ERROR
;
429 /* skip all ws in the input */
430 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
432 /* fill the input's internal buffer */
433 ufile_fill_uchar_buffer(input
);
435 /* determine the size of the input's buffer */
436 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
438 /* truncate to the width, if specified */
439 if(info
->fWidth
!= -1)
440 len
= ufmt_min(len
, info
->fWidth
);
442 /* get the formatter */
443 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
452 /* parse the number */
453 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
455 if (!info
->fSkipArg
) {
457 *(double*)(args
[0].ptrValue
) = num
;
458 else if (info
->fIsLongDouble
)
459 *(long double*)(args
[0].ptrValue
) = num
;
461 *(float*)(args
[0].ptrValue
) = (float)num
;
464 /* mask off any necessary bits */
465 /* if(! info->fIsLong_double)
468 /* update the input's position to reflect consumed data */
469 input
->str
.fPos
+= parsePos
;
471 /* we converted 1 arg */
472 *argConverted
= !info
->fSkipArg
;
473 return parsePos
+ skipped
;
477 u_scanf_scientific_handler(UFILE
*input
,
478 u_scanf_spec_info
*info
,
481 int32_t *fmtConsumed
,
482 int32_t *argConverted
)
486 UNumberFormat
*format
;
487 int32_t parsePos
= 0;
489 UErrorCode status
= U_ZERO_ERROR
;
492 /* skip all ws in the input */
493 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
495 /* fill the input's internal buffer */
496 ufile_fill_uchar_buffer(input
);
498 /* determine the size of the input's buffer */
499 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
501 /* truncate to the width, if specified */
502 if(info
->fWidth
!= -1)
503 len
= ufmt_min(len
, info
->fWidth
);
505 /* get the formatter */
506 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
512 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
513 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
515 /* parse the number */
516 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
518 if (!info
->fSkipArg
) {
520 *(double*)(args
[0].ptrValue
) = num
;
521 else if (info
->fIsLongDouble
)
522 *(long double*)(args
[0].ptrValue
) = num
;
524 *(float*)(args
[0].ptrValue
) = (float)num
;
527 /* mask off any necessary bits */
528 /* if(! info->fIsLong_double)
531 /* update the input's position to reflect consumed data */
532 input
->str
.fPos
+= parsePos
;
534 /* we converted 1 arg */
535 *argConverted
= !info
->fSkipArg
;
536 return parsePos
+ skipped
;
540 u_scanf_scidbl_handler(UFILE
*input
,
541 u_scanf_spec_info
*info
,
544 int32_t *fmtConsumed
,
545 int32_t *argConverted
)
549 UNumberFormat
*scientificFormat
, *genericFormat
;
550 /*int32_t scientificResult, genericResult;*/
551 double scientificResult
, genericResult
;
552 int32_t scientificParsePos
= 0, genericParsePos
= 0, parsePos
= 0;
554 UErrorCode scientificStatus
= U_ZERO_ERROR
;
555 UErrorCode genericStatus
= U_ZERO_ERROR
;
558 /* since we can't determine by scanning the characters whether */
559 /* a number was formatted in the 'f' or 'g' styles, parse the */
560 /* string with both formatters, and assume whichever one */
561 /* parsed the most is the correct formatter to use */
564 /* skip all ws in the input */
565 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
567 /* fill the input's internal buffer */
568 ufile_fill_uchar_buffer(input
);
570 /* determine the size of the input's buffer */
571 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
573 /* truncate to the width, if specified */
574 if(info
->fWidth
!= -1)
575 len
= ufmt_min(len
, info
->fWidth
);
577 /* get the formatters */
578 scientificFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
579 genericFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
582 if(scientificFormat
== 0 || genericFormat
== 0)
585 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
586 skipped
+= u_scanf_skip_leading_positive_sign(input
, genericFormat
, &genericStatus
);
588 /* parse the number using each format*/
590 scientificResult
= unum_parseDouble(scientificFormat
, input
->str
.fPos
, len
,
591 &scientificParsePos
, &scientificStatus
);
593 genericResult
= unum_parseDouble(genericFormat
, input
->str
.fPos
, len
,
594 &genericParsePos
, &genericStatus
);
596 /* determine which parse made it farther */
597 if(scientificParsePos
> genericParsePos
) {
598 /* stash the result in num */
599 num
= scientificResult
;
600 /* update the input's position to reflect consumed data */
601 parsePos
+= scientificParsePos
;
604 /* stash the result in num */
606 /* update the input's position to reflect consumed data */
607 parsePos
+= genericParsePos
;
609 input
->str
.fPos
+= parsePos
;
611 if (!info
->fSkipArg
) {
613 *(double*)(args
[0].ptrValue
) = num
;
614 else if (info
->fIsLongDouble
)
615 *(long double*)(args
[0].ptrValue
) = num
;
617 *(float*)(args
[0].ptrValue
) = (float)num
;
620 /* mask off any necessary bits */
621 /* if(! info->fIsLong_double)
624 /* we converted 1 arg */
625 *argConverted
= !info
->fSkipArg
;
626 return parsePos
+ skipped
;
630 u_scanf_integer_handler(UFILE
*input
,
631 u_scanf_spec_info
*info
,
634 int32_t *fmtConsumed
,
635 int32_t *argConverted
)
638 void *num
= (void*) (args
[0].ptrValue
);
639 UNumberFormat
*format
;
640 int32_t parsePos
= 0;
642 UErrorCode status
= U_ZERO_ERROR
;
646 /* skip all ws in the input */
647 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
649 /* fill the input's internal buffer */
650 ufile_fill_uchar_buffer(input
);
652 /* determine the size of the input's buffer */
653 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
655 /* truncate to the width, if specified */
656 if(info
->fWidth
!= -1)
657 len
= ufmt_min(len
, info
->fWidth
);
659 /* get the formatter */
660 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
666 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
667 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
669 /* parse the number */
670 result
= unum_parseInt64(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
672 /* mask off any necessary bits */
673 if (!info
->fSkipArg
) {
675 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
676 else if (info
->fIsLongLong
)
677 *(int64_t*)num
= result
;
679 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
682 /* update the input's position to reflect consumed data */
683 input
->str
.fPos
+= parsePos
;
685 /* we converted 1 arg */
686 *argConverted
= !info
->fSkipArg
;
687 return parsePos
+ skipped
;
691 u_scanf_uinteger_handler(UFILE
*input
,
692 u_scanf_spec_info
*info
,
695 int32_t *fmtConsumed
,
696 int32_t *argConverted
)
698 /* TODO Fix this when Numberformat handles uint64_t */
699 return u_scanf_integer_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
703 u_scanf_percent_handler(UFILE
*input
,
704 u_scanf_spec_info
*info
,
707 int32_t *fmtConsumed
,
708 int32_t *argConverted
)
712 UNumberFormat
*format
;
713 int32_t parsePos
= 0;
715 UErrorCode status
= U_ZERO_ERROR
;
718 /* skip all ws in the input */
719 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
721 /* fill the input's internal buffer */
722 ufile_fill_uchar_buffer(input
);
724 /* determine the size of the input's buffer */
725 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
727 /* truncate to the width, if specified */
728 if(info
->fWidth
!= -1)
729 len
= ufmt_min(len
, info
->fWidth
);
731 /* get the formatter */
732 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_PERCENT
);
738 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
739 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
741 /* parse the number */
742 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
744 if (!info
->fSkipArg
) {
745 *(double*)(args
[0].ptrValue
) = num
;
748 /* mask off any necessary bits */
749 /* if(! info->fIsLong_double)
752 /* update the input's position to reflect consumed data */
753 input
->str
.fPos
+= parsePos
;
755 /* we converted 1 arg */
756 *argConverted
= !info
->fSkipArg
;
761 u_scanf_string_handler(UFILE
*input
,
762 u_scanf_spec_info
*info
,
765 int32_t *fmtConsumed
,
766 int32_t *argConverted
)
770 char *arg
= (char*)(args
[0].ptrValue
);
773 UErrorCode status
= U_ZERO_ERROR
;
777 UBool isNotEOF
= FALSE
;
779 /* skip all ws in the input */
780 if (info
->fIsString
) {
781 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
784 /* get the string one character at a time, truncating to the width */
787 /* open the default converter */
788 conv
= u_getDefaultConverter(&status
);
790 if(U_FAILURE(status
))
793 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
794 && (isNotEOF
= ufile_getch(input
, &c
))
795 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
798 if (!info
->fSkipArg
) {
799 /* put the character from the input onto the target */
801 /* Since we do this one character at a time, do it this way. */
802 if (info
->fWidth
> 0) {
803 limit
= alias
+ info
->fWidth
- count
;
806 limit
= alias
+ ucnv_getMaxCharSize(conv
);
809 /* convert the character to the default codepage */
810 ucnv_fromUnicode(conv
, &alias
, limit
, &source
, source
+ 1,
811 NULL
, TRUE
, &status
);
813 if(U_FAILURE(status
)) {
815 u_releaseDefaultConverter(conv
);
820 /* increment the count */
824 /* put the final character we read back on the input */
825 if (!info
->fSkipArg
) {
826 if ((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
)
829 /* add the terminator */
830 if (info
->fIsString
) {
836 u_releaseDefaultConverter(conv
);
838 /* we converted 1 arg */
839 *argConverted
= !info
->fSkipArg
;
840 return count
+ skipped
;
844 u_scanf_char_handler(UFILE
*input
,
845 u_scanf_spec_info
*info
,
848 int32_t *fmtConsumed
,
849 int32_t *argConverted
)
851 if (info
->fWidth
< 0) {
854 info
->fIsString
= FALSE
;
855 return u_scanf_string_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
859 u_scanf_ustring_handler(UFILE
*input
,
860 u_scanf_spec_info
*info
,
863 int32_t *fmtConsumed
,
864 int32_t *argConverted
)
866 UChar
*arg
= (UChar
*)(args
[0].ptrValue
);
871 UBool isNotEOF
= FALSE
;
873 /* skip all ws in the input */
874 if (info
->fIsString
) {
875 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
878 /* get the string one character at a time, truncating to the width */
881 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
882 && (isNotEOF
= ufile_getch(input
, &c
))
883 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
886 /* put the character from the input onto the target */
887 if (!info
->fSkipArg
) {
891 /* increment the count */
895 /* put the final character we read back on the input */
896 if (!info
->fSkipArg
) {
897 if((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
) {
901 /* add the terminator */
902 if (info
->fIsString
) {
907 /* we converted 1 arg */
908 *argConverted
= !info
->fSkipArg
;
909 return count
+ skipped
;
913 u_scanf_uchar_handler(UFILE
*input
,
914 u_scanf_spec_info
*info
,
917 int32_t *fmtConsumed
,
918 int32_t *argConverted
)
920 if (info
->fWidth
< 0) {
923 info
->fIsString
= FALSE
;
924 return u_scanf_ustring_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
928 u_scanf_spellout_handler(UFILE
*input
,
929 u_scanf_spec_info
*info
,
932 int32_t *fmtConsumed
,
933 int32_t *argConverted
)
937 UNumberFormat
*format
;
938 int32_t parsePos
= 0;
940 UErrorCode status
= U_ZERO_ERROR
;
943 /* skip all ws in the input */
944 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
946 /* fill the input's internal buffer */
947 ufile_fill_uchar_buffer(input
);
949 /* determine the size of the input's buffer */
950 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
952 /* truncate to the width, if specified */
953 if(info
->fWidth
!= -1)
954 len
= ufmt_min(len
, info
->fWidth
);
956 /* get the formatter */
957 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SPELLOUT
);
963 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
964 /* This is not applicable to RBNF. */
965 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
967 /* parse the number */
968 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
970 if (!info
->fSkipArg
) {
971 *(double*)(args
[0].ptrValue
) = num
;
974 /* mask off any necessary bits */
975 /* if(! info->fIsLong_double)
978 /* update the input's position to reflect consumed data */
979 input
->str
.fPos
+= parsePos
;
981 /* we converted 1 arg */
982 *argConverted
= !info
->fSkipArg
;
983 return parsePos
+ skipped
;
987 u_scanf_hex_handler(UFILE
*input
,
988 u_scanf_spec_info
*info
,
991 int32_t *fmtConsumed
,
992 int32_t *argConverted
)
996 void *num
= (void*) (args
[0].ptrValue
);
999 /* skip all ws in the input */
1000 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1002 /* fill the input's internal buffer */
1003 ufile_fill_uchar_buffer(input
);
1005 /* determine the size of the input's buffer */
1006 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1008 /* truncate to the width, if specified */
1009 if(info
->fWidth
!= -1)
1010 len
= ufmt_min(len
, info
->fWidth
);
1012 /* check for alternate form */
1013 if( *(input
->str
.fPos
) == 0x0030 &&
1014 (*(input
->str
.fPos
+ 1) == 0x0078 || *(input
->str
.fPos
+ 1) == 0x0058) ) {
1016 /* skip the '0' and 'x' or 'X' if present */
1017 input
->str
.fPos
+= 2;
1021 /* parse the number */
1022 result
= ufmt_uto64(input
->str
.fPos
, &len
, 16);
1024 /* update the input's position to reflect consumed data */
1025 input
->str
.fPos
+= len
;
1027 /* mask off any necessary bits */
1028 if (!info
->fSkipArg
) {
1030 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1031 else if (info
->fIsLongLong
)
1032 *(int64_t*)num
= result
;
1034 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1037 /* we converted 1 arg */
1038 *argConverted
= !info
->fSkipArg
;
1039 return len
+ skipped
;
1043 u_scanf_octal_handler(UFILE
*input
,
1044 u_scanf_spec_info
*info
,
1047 int32_t *fmtConsumed
,
1048 int32_t *argConverted
)
1052 void *num
= (void*) (args
[0].ptrValue
);
1055 /* skip all ws in the input */
1056 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1058 /* fill the input's internal buffer */
1059 ufile_fill_uchar_buffer(input
);
1061 /* determine the size of the input's buffer */
1062 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1064 /* truncate to the width, if specified */
1065 if(info
->fWidth
!= -1)
1066 len
= ufmt_min(len
, info
->fWidth
);
1068 /* parse the number */
1069 result
= ufmt_uto64(input
->str
.fPos
, &len
, 8);
1071 /* update the input's position to reflect consumed data */
1072 input
->str
.fPos
+= len
;
1074 /* mask off any necessary bits */
1075 if (!info
->fSkipArg
) {
1077 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1078 else if (info
->fIsLongLong
)
1079 *(int64_t*)num
= result
;
1081 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1084 /* we converted 1 arg */
1085 *argConverted
= !info
->fSkipArg
;
1086 return len
+ skipped
;
1090 u_scanf_pointer_handler(UFILE
*input
,
1091 u_scanf_spec_info
*info
,
1094 int32_t *fmtConsumed
,
1095 int32_t *argConverted
)
1100 void **p
= (void**)(args
[0].ptrValue
);
1103 /* skip all ws in the input */
1104 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1106 /* fill the input's internal buffer */
1107 ufile_fill_uchar_buffer(input
);
1109 /* determine the size of the input's buffer */
1110 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1112 /* truncate to the width, if specified */
1113 if(info
->fWidth
!= -1) {
1114 len
= ufmt_min(len
, info
->fWidth
);
1117 /* Make sure that we don't consume too much */
1118 if (len
> (int32_t)(sizeof(void*)*2)) {
1119 len
= (int32_t)(sizeof(void*)*2);
1122 /* parse the pointer - assign to temporary value */
1123 result
= ufmt_utop(input
->str
.fPos
, &len
);
1125 if (!info
->fSkipArg
) {
1129 /* update the input's position to reflect consumed data */
1130 input
->str
.fPos
+= len
;
1132 /* we converted 1 arg */
1133 *argConverted
= !info
->fSkipArg
;
1134 return len
+ skipped
;
1138 u_scanf_scanset_handler(UFILE
*input
,
1139 u_scanf_spec_info
*info
,
1142 int32_t *fmtConsumed
,
1143 int32_t *argConverted
)
1146 UErrorCode status
= U_ZERO_ERROR
;
1147 int32_t chLeft
= INT32_MAX
;
1149 UChar
*alias
= (UChar
*) (args
[0].ptrValue
);
1150 UBool isNotEOF
= FALSE
;
1151 UBool readCharacter
= FALSE
;
1153 /* Create an empty set */
1154 scanset
= uset_open(0, -1);
1156 /* Back up one to get the [ */
1159 /* truncate to the width, if specified and alias the target */
1160 if(info
->fWidth
>= 0) {
1161 chLeft
= info
->fWidth
;
1164 /* parse the scanset from the fmt string */
1165 *fmtConsumed
= uset_applyPattern(scanset
, fmt
, -1, 0, &status
);
1167 /* verify that the parse was successful */
1168 if (U_SUCCESS(status
)) {
1171 /* grab characters one at a time and make sure they are in the scanset */
1173 if ((isNotEOF
= ufile_getch32(input
, &c
)) && uset_contains(scanset
, c
)) {
1174 readCharacter
= TRUE
;
1175 if (!info
->fSkipArg
) {
1177 UBool isError
= FALSE
;
1179 U16_APPEND(alias
, idx
, chLeft
, c
, isError
);
1185 chLeft
-= (1 + U_IS_SUPPLEMENTARY(c
));
1188 /* if the character's not in the scanset, break out */
1193 /* put the final character we read back on the input */
1194 if(isNotEOF
&& chLeft
> 0) {
1195 u_fungetc(c
, input
);
1199 uset_close(scanset
);
1201 /* if we didn't match at least 1 character, fail */
1204 /* otherwise, add the terminator */
1205 else if (!info
->fSkipArg
) {
1209 /* we converted 1 arg */
1210 *argConverted
= !info
->fSkipArg
;
1211 return (info
->fWidth
>= 0 ? info
->fWidth
: INT32_MAX
) - chLeft
;
1214 /* Use US-ASCII characters only for formatting. Most codepages have
1215 characters 20-7F from Unicode. Using any other codepage specific
1216 characters will make it very difficult to format the string on
1217 non-Unicode machines */
1218 static const u_scanf_info g_u_scanf_infos
[USCANF_NUM_FMT_HANDLERS
] = {
1220 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1221 UFMT_EMPTY
, UFMT_SIMPLE_PERCENT
,UFMT_EMPTY
, UFMT_EMPTY
,
1222 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1223 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1226 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1227 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1228 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1229 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1232 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
,
1233 UFMT_EMPTY
, UFMT_SCIENTIFIC
, UFMT_EMPTY
, UFMT_SCIDBL
,
1234 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1235 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
/*deprecated*/,
1237 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1239 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1242 UFMT_PERCENT
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_USTRING
,
1243 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1244 UFMT_EMPTY
, UFMT_USTRING
/*deprecated*/,UFMT_SPELLOUT
, UFMT_EMPTY
,
1246 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SPELLOUT
, UFMT_EMPTY
,
1248 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SCANSET
,
1249 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1252 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_CHAR
,
1253 UFMT_INT
, UFMT_SCIENTIFIC
, UFMT_DOUBLE
, UFMT_SCIDBL
,
1254 UFMT_EMPTY
, UFMT_INT
, UFMT_EMPTY
, UFMT_EMPTY
,
1255 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_COUNT
, UFMT_OCTAL
,
1258 UFMT_POINTER
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_STRING
,
1259 UFMT_EMPTY
, UFMT_UINT
, UFMT_EMPTY
, UFMT_EMPTY
,
1260 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1261 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1265 u_scanf_parse(UFILE
*f
,
1266 const UChar
*patternSpecification
,
1270 int32_t count
, converted
, argConsumed
, cpConsumed
;
1271 uint16_t handlerNum
;
1275 ufmt_type_info info
;
1276 u_scanf_handler handler
;
1278 /* alias the pattern */
1279 alias
= patternSpecification
;
1281 /* haven't converted anything yet */
1286 /* iterate through the pattern */
1289 /* match any characters up to the next '%' */
1290 while(*alias
!= UP_PERCENT
&& *alias
!= 0x0000 && u_fgetc(f
) == *alias
) {
1294 /* if we aren't at a '%', or if we're at end of string, break*/
1295 if(*alias
!= UP_PERCENT
|| *alias
== 0x0000)
1298 /* parse the specifier */
1299 count
= u_scanf_parse_spec(alias
, &spec
);
1301 /* update the pointer in pattern */
1304 handlerNum
= (uint16_t)(spec
.fInfo
.fSpec
- USCANF_BASE_FMT_HANDLERS
);
1305 if (handlerNum
< USCANF_NUM_FMT_HANDLERS
) {
1306 /* skip the argument, if necessary */
1307 /* query the info function for argument information */
1308 info
= g_u_scanf_infos
[ handlerNum
].info
;
1309 if (info
!= ufmt_count
&& u_feof(f
)) {
1312 else if(spec
.fInfo
.fSkipArg
) {
1313 args
.ptrValue
= NULL
;
1318 /* set the spec's width to the # of items converted */
1319 spec
.fInfo
.fWidth
= cpConsumed
;
1320 /* fall through to next case */
1329 args
.ptrValue
= va_arg(ap
, void*);
1333 /* else args is ignored */
1334 args
.ptrValue
= NULL
;
1339 /* call the handler function */
1340 handler
= g_u_scanf_infos
[ handlerNum
].handler
;
1343 /* reset count to 1 so that += for alias works. */
1346 cpConsumed
+= (*handler
)(f
, &spec
.fInfo
, &args
, alias
, &count
, &argConsumed
);
1348 /* if the handler encountered an error condition, break */
1349 if(argConsumed
< 0) {
1354 /* add to the # of items converted */
1355 converted
+= argConsumed
;
1357 /* update the pointer in pattern */
1360 /* else do nothing */
1362 /* else do nothing */
1364 /* just ignore unknown tags */
1367 /* return # of items converted */
1371 #endif /* #if !UCONFIG_NO_FORMATTING */