1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 12/02/98 stephen Creation.
17 * 03/13/99 stephen Modified for new C API.
18 *******************************************************************************
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unum.h"
28 #include "unicode/udat.h"
29 #include "unicode/uset.h"
38 /* flag characters for u_scanf */
39 #define FLAG_ASTERISK 0x002A
40 #define FLAG_PAREN 0x0028
42 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
45 /* special characters for u_scanf */
46 #define SPEC_DOLLARSIGN 0x0024
49 #define DIGIT_ZERO 0x0030
50 #define DIGIT_ONE 0x0031
51 #define DIGIT_TWO 0x0032
52 #define DIGIT_THREE 0x0033
53 #define DIGIT_FOUR 0x0034
54 #define DIGIT_FIVE 0x0035
55 #define DIGIT_SIX 0x0036
56 #define DIGIT_SEVEN 0x0037
57 #define DIGIT_EIGHT 0x0038
58 #define DIGIT_NINE 0x0039
60 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
63 (s) == DIGIT_THREE || \
64 (s) == DIGIT_FOUR || \
65 (s) == DIGIT_FIVE || \
67 (s) == DIGIT_SEVEN || \
68 (s) == DIGIT_EIGHT || \
71 /* u_scanf modifiers */
73 #define MOD_LOWERL 0x006C
76 #define ISMOD(s) (s) == MOD_H || \
77 (s) == MOD_LOWERL || \
81 * Struct encapsulating a single uscanf format specification.
83 typedef struct u_scanf_spec_info
{
84 int32_t fWidth
; /* Width */
86 UChar fSpec
; /* Format specification */
88 UChar fPadChar
; /* Padding character */
90 UBool fSkipArg
; /* TRUE if arg should be skipped */
91 UBool fIsLongDouble
; /* L flag */
92 UBool fIsShort
; /* h flag */
93 UBool fIsLong
; /* l flag */
94 UBool fIsLongLong
; /* ll flag */
95 UBool fIsString
; /* TRUE if this is a NULL-terminated string. */
100 * Struct encapsulating a single u_scanf format specification.
102 typedef struct u_scanf_spec
{
103 u_scanf_spec_info fInfo
; /* Information on this spec */
104 int32_t fArgPos
; /* Position of data in arg list */
108 * Parse a single u_scanf format specifier in Unicode.
109 * @param fmt A pointer to a '%' character in a u_scanf format specification.
110 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
112 * @return The number of characters contained in this specifier.
115 u_scanf_parse_spec (const UChar
*fmt
,
118 const UChar
*s
= fmt
;
120 u_scanf_spec_info
*info
= &(spec
->fInfo
);
122 /* initialize spec to default values */
126 info
->fSpec
= 0x0000;
127 info
->fPadChar
= 0x0020;
128 info
->fSkipArg
= FALSE
;
129 info
->fIsLongDouble
= FALSE
;
130 info
->fIsShort
= FALSE
;
131 info
->fIsLong
= FALSE
;
132 info
->fIsLongLong
= FALSE
;
133 info
->fIsString
= TRUE
;
136 /* skip over the initial '%' */
139 /* Check for positional argument */
142 /* Save the current position */
145 /* handle positional parameters */
147 spec
->fArgPos
= (int) (*s
++ - DIGIT_ZERO
);
151 spec
->fArgPos
+= (int) (*s
++ - DIGIT_ZERO
);
155 /* if there is no '$', don't read anything */
156 if(*s
!= SPEC_DOLLARSIGN
) {
165 /* Get any format flags */
171 info
->fSkipArg
= TRUE
;
174 /* pad character specified */
177 /* first four characters are hex values for pad char */
178 info
->fPadChar
= (UChar
)ufmt_digitvalue(*s
++);
179 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
180 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
181 info
->fPadChar
= (UChar
)((info
->fPadChar
* 16) + ufmt_digitvalue(*s
++));
183 /* final character is ignored */
192 info
->fWidth
= (int) (*s
++ - DIGIT_ZERO
);
196 info
->fWidth
+= (int) (*s
++ - DIGIT_ZERO
);
200 /* Get any modifiers */
206 info
->fIsShort
= TRUE
;
209 /* long or long long */
211 if(*s
== MOD_LOWERL
) {
212 info
->fIsLongLong
= TRUE
;
213 /* skip over the next 'l' */
217 info
->fIsLong
= TRUE
;
222 info
->fIsLongDouble
= TRUE
;
227 /* finally, get the specifier letter */
230 /* return # of characters in this specifier */
231 return (int32_t)(s
- fmt
);
234 #define UP_PERCENT 0x0025
237 /* ANSI style formatting */
238 /* Use US-ASCII characters only for formatting */
241 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
243 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
245 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
247 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
249 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
251 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
253 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
255 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
257 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
259 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
261 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
263 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
265 /* non-ANSI extensions */
266 /* Use US-ASCII characters only for formatting */
269 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
271 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
273 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
274 /* C K is old format */
275 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
276 /* S U is old format */
277 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
280 #define UFMT_EMPTY {ufmt_empty, NULL}
283 * A u_scanf handler function.
284 * A u_scanf handler is responsible for handling a single u_scanf
285 * format specification, for example 'd' or 's'.
286 * @param stream The UFILE to which to write output.
287 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
288 * information on the format specification.
289 * @param args A pointer to the argument data
290 * @param fmt A pointer to the first character in the format string
291 * following the spec.
292 * @param fmtConsumed On output, set to the number of characters consumed
293 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
294 * @param argConverted The number of arguments converted and assigned, or -1 if an
296 * @return The number of code points consumed during reading.
298 typedef int32_t (*u_scanf_handler
) (UFILE
*stream
,
299 u_scanf_spec_info
*info
,
302 int32_t *fmtConsumed
,
303 int32_t *argConverted
);
305 typedef struct u_scanf_info
{
307 u_scanf_handler handler
;
310 #define USCANF_NUM_FMT_HANDLERS 108
311 #define USCANF_SYMBOL_BUFFER_SIZE 8
313 /* We do not use handlers for 0-0x1f */
314 #define USCANF_BASE_FMT_HANDLERS 0x20
318 u_scanf_skip_leading_ws(UFILE
*input
,
325 /* skip all leading ws in the input */
326 while( (isNotEOF
= ufile_getch(input
, &c
)) && (c
== pad
|| u_isWhitespace(c
)) )
331 /* put the final character back on the input */
338 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
340 u_scanf_skip_leading_positive_sign(UFILE
*input
,
341 UNumberFormat
*format
,
347 UChar plusSymbol
[USCANF_SYMBOL_BUFFER_SIZE
];
349 UErrorCode localStatus
= U_ZERO_ERROR
;
351 if (U_SUCCESS(*status
)) {
352 symbolLen
= unum_getSymbol(format
,
353 UNUM_PLUS_SIGN_SYMBOL
,
355 UPRV_LENGTHOF(plusSymbol
),
358 if (U_SUCCESS(localStatus
)) {
359 /* skip all leading ws in the input */
360 while( (isNotEOF
= ufile_getch(input
, &c
)) && (count
< symbolLen
&& c
== plusSymbol
[count
]) )
365 /* put the final character back on the input */
376 u_scanf_simple_percent_handler(UFILE
*input
,
377 u_scanf_spec_info
*info
,
380 int32_t *fmtConsumed
,
381 int32_t *argConverted
)
388 /* make sure the next character in the input is a percent */
390 if(u_fgetc(input
) != 0x0025) {
397 u_scanf_count_handler(UFILE
*input
,
398 u_scanf_spec_info
*info
,
401 int32_t *fmtConsumed
,
402 int32_t *argConverted
)
408 /* in the special case of count, the u_scanf_spec_info's width */
409 /* will contain the # of items converted thus far */
410 if (!info
->fSkipArg
) {
412 *(int16_t*)(args
[0].ptrValue
) = (int16_t)(UINT16_MAX
& info
->fWidth
);
413 else if (info
->fIsLongLong
)
414 *(int64_t*)(args
[0].ptrValue
) = info
->fWidth
;
416 *(int32_t*)(args
[0].ptrValue
) = (int32_t)(UINT32_MAX
& info
->fWidth
);
420 /* we converted 0 args */
425 u_scanf_double_handler(UFILE
*input
,
426 u_scanf_spec_info
*info
,
429 int32_t *fmtConsumed
,
430 int32_t *argConverted
)
437 UNumberFormat
*format
;
438 int32_t parsePos
= 0;
440 UErrorCode status
= U_ZERO_ERROR
;
443 /* skip all ws in the input */
444 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
446 /* fill the input's internal buffer */
447 ufile_fill_uchar_buffer(input
);
449 /* determine the size of the input's buffer */
450 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
452 /* truncate to the width, if specified */
453 if(info
->fWidth
!= -1)
454 len
= ufmt_min(len
, info
->fWidth
);
456 /* get the formatter */
457 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
463 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
464 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
466 /* parse the number */
467 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
469 if (!info
->fSkipArg
) {
471 *(double*)(args
[0].ptrValue
) = num
;
472 else if (info
->fIsLongDouble
)
473 *(long double*)(args
[0].ptrValue
) = num
;
475 *(float*)(args
[0].ptrValue
) = (float)num
;
478 /* mask off any necessary bits */
479 /* if(! info->fIsLong_double)
482 /* update the input's position to reflect consumed data */
483 input
->str
.fPos
+= parsePos
;
485 /* we converted 1 arg */
486 *argConverted
= !info
->fSkipArg
;
487 return parsePos
+ skipped
;
490 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
493 u_scanf_scientific_handler(UFILE
*input
,
494 u_scanf_spec_info
*info
,
497 int32_t *fmtConsumed
,
498 int32_t *argConverted
)
505 UNumberFormat
*format
;
506 int32_t parsePos
= 0;
508 UErrorCode status
= U_ZERO_ERROR
;
509 UChar srcExpBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
510 int32_t srcLen
, expLen
;
511 UChar expBuf
[UPRINTF_SYMBOL_BUFFER_SIZE
];
514 /* skip all ws in the input */
515 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
517 /* fill the input's internal buffer */
518 ufile_fill_uchar_buffer(input
);
520 /* determine the size of the input's buffer */
521 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
523 /* truncate to the width, if specified */
524 if(info
->fWidth
!= -1)
525 len
= ufmt_min(len
, info
->fWidth
);
527 /* get the formatter */
528 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
534 /* set the appropriate flags on the formatter */
536 srcLen
= unum_getSymbol(format
,
537 UNUM_EXPONENTIAL_SYMBOL
,
542 /* Upper/lower case the e */
543 if (info
->fSpec
== (UChar
)0x65 /* e */) {
544 expLen
= u_strToLower(expBuf
, (int32_t)sizeof(expBuf
),
546 input
->str
.fBundle
.fLocale
,
550 expLen
= u_strToUpper(expBuf
, (int32_t)sizeof(expBuf
),
552 input
->str
.fBundle
.fLocale
,
556 unum_setSymbol(format
,
557 UNUM_EXPONENTIAL_SYMBOL
,
565 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
566 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
568 /* parse the number */
569 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
571 if (!info
->fSkipArg
) {
573 *(double*)(args
[0].ptrValue
) = num
;
574 else if (info
->fIsLongDouble
)
575 *(long double*)(args
[0].ptrValue
) = num
;
577 *(float*)(args
[0].ptrValue
) = (float)num
;
580 /* mask off any necessary bits */
581 /* if(! info->fIsLong_double)
584 /* update the input's position to reflect consumed data */
585 input
->str
.fPos
+= parsePos
;
587 /* we converted 1 arg */
588 *argConverted
= !info
->fSkipArg
;
589 return parsePos
+ skipped
;
593 u_scanf_scidbl_handler(UFILE
*input
,
594 u_scanf_spec_info
*info
,
597 int32_t *fmtConsumed
,
598 int32_t *argConverted
)
605 UNumberFormat
*scientificFormat
, *genericFormat
;
606 /*int32_t scientificResult, genericResult;*/
607 double scientificResult
, genericResult
;
608 int32_t scientificParsePos
= 0, genericParsePos
= 0, parsePos
= 0;
610 UErrorCode scientificStatus
= U_ZERO_ERROR
;
611 UErrorCode genericStatus
= U_ZERO_ERROR
;
614 /* since we can't determine by scanning the characters whether */
615 /* a number was formatted in the 'f' or 'g' styles, parse the */
616 /* string with both formatters, and assume whichever one */
617 /* parsed the most is the correct formatter to use */
620 /* skip all ws in the input */
621 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
623 /* fill the input's internal buffer */
624 ufile_fill_uchar_buffer(input
);
626 /* determine the size of the input's buffer */
627 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
629 /* truncate to the width, if specified */
630 if(info
->fWidth
!= -1)
631 len
= ufmt_min(len
, info
->fWidth
);
633 /* get the formatters */
634 scientificFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SCIENTIFIC
);
635 genericFormat
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
638 if(scientificFormat
== 0 || genericFormat
== 0)
641 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
642 skipped
+= u_scanf_skip_leading_positive_sign(input
, genericFormat
, &genericStatus
);
644 /* parse the number using each format*/
646 scientificResult
= unum_parseDouble(scientificFormat
, input
->str
.fPos
, len
,
647 &scientificParsePos
, &scientificStatus
);
649 genericResult
= unum_parseDouble(genericFormat
, input
->str
.fPos
, len
,
650 &genericParsePos
, &genericStatus
);
652 /* determine which parse made it farther */
653 if(scientificParsePos
> genericParsePos
) {
654 /* stash the result in num */
655 num
= scientificResult
;
656 /* update the input's position to reflect consumed data */
657 parsePos
+= scientificParsePos
;
660 /* stash the result in num */
662 /* update the input's position to reflect consumed data */
663 parsePos
+= genericParsePos
;
665 input
->str
.fPos
+= parsePos
;
667 if (!info
->fSkipArg
) {
669 *(double*)(args
[0].ptrValue
) = num
;
670 else if (info
->fIsLongDouble
)
671 *(long double*)(args
[0].ptrValue
) = num
;
673 *(float*)(args
[0].ptrValue
) = (float)num
;
676 /* mask off any necessary bits */
677 /* if(! info->fIsLong_double)
680 /* we converted 1 arg */
681 *argConverted
= !info
->fSkipArg
;
682 return parsePos
+ skipped
;
686 u_scanf_integer_handler(UFILE
*input
,
687 u_scanf_spec_info
*info
,
690 int32_t *fmtConsumed
,
691 int32_t *argConverted
)
697 void *num
= (void*) (args
[0].ptrValue
);
698 UNumberFormat
*format
;
699 int32_t parsePos
= 0;
701 UErrorCode status
= U_ZERO_ERROR
;
705 /* skip all ws in the input */
706 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
708 /* fill the input's internal buffer */
709 ufile_fill_uchar_buffer(input
);
711 /* determine the size of the input's buffer */
712 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
714 /* truncate to the width, if specified */
715 if(info
->fWidth
!= -1)
716 len
= ufmt_min(len
, info
->fWidth
);
718 /* get the formatter */
719 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_DECIMAL
);
725 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
726 skipped
+= u_scanf_skip_leading_positive_sign(input
, format
, &status
);
728 /* parse the number */
729 result
= unum_parseInt64(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
731 /* mask off any necessary bits */
732 if (!info
->fSkipArg
) {
734 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
735 else if (info
->fIsLongLong
)
736 *(int64_t*)num
= result
;
738 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
741 /* update the input's position to reflect consumed data */
742 input
->str
.fPos
+= parsePos
;
744 /* we converted 1 arg */
745 *argConverted
= !info
->fSkipArg
;
746 return parsePos
+ skipped
;
750 u_scanf_uinteger_handler(UFILE
*input
,
751 u_scanf_spec_info
*info
,
754 int32_t *fmtConsumed
,
755 int32_t *argConverted
)
757 /* TODO Fix this when Numberformat handles uint64_t */
758 return u_scanf_integer_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
762 u_scanf_percent_handler(UFILE
*input
,
763 u_scanf_spec_info
*info
,
766 int32_t *fmtConsumed
,
767 int32_t *argConverted
)
774 UNumberFormat
*format
;
775 int32_t parsePos
= 0;
776 UErrorCode status
= U_ZERO_ERROR
;
779 /* skip all ws in the input */
780 u_scanf_skip_leading_ws(input
, info
->fPadChar
);
782 /* fill the input's internal buffer */
783 ufile_fill_uchar_buffer(input
);
785 /* determine the size of the input's buffer */
786 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
788 /* truncate to the width, if specified */
789 if(info
->fWidth
!= -1)
790 len
= ufmt_min(len
, info
->fWidth
);
792 /* get the formatter */
793 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_PERCENT
);
799 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
800 u_scanf_skip_leading_positive_sign(input
, format
, &status
);
802 /* parse the number */
803 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
805 if (!info
->fSkipArg
) {
806 *(double*)(args
[0].ptrValue
) = num
;
809 /* mask off any necessary bits */
810 /* if(! info->fIsLong_double)
813 /* update the input's position to reflect consumed data */
814 input
->str
.fPos
+= parsePos
;
816 /* we converted 1 arg */
817 *argConverted
= !info
->fSkipArg
;
822 u_scanf_string_handler(UFILE
*input
,
823 u_scanf_spec_info
*info
,
826 int32_t *fmtConsumed
,
827 int32_t *argConverted
)
834 char *arg
= (char*)(args
[0].ptrValue
);
837 UErrorCode status
= U_ZERO_ERROR
;
841 UBool isNotEOF
= FALSE
;
843 /* skip all ws in the input */
844 if (info
->fIsString
) {
845 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
848 /* get the string one character at a time, truncating to the width */
851 /* open the default converter */
852 conv
= u_getDefaultConverter(&status
);
854 if(U_FAILURE(status
))
857 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
858 && (isNotEOF
= ufile_getch(input
, &c
))
859 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
862 if (!info
->fSkipArg
) {
863 /* put the character from the input onto the target */
865 /* Since we do this one character at a time, do it this way. */
866 if (info
->fWidth
> 0) {
867 limit
= alias
+ info
->fWidth
- count
;
870 limit
= alias
+ ucnv_getMaxCharSize(conv
);
873 /* convert the character to the default codepage */
874 ucnv_fromUnicode(conv
, &alias
, limit
, &source
, source
+ 1,
875 NULL
, TRUE
, &status
);
877 if(U_FAILURE(status
)) {
879 u_releaseDefaultConverter(conv
);
884 /* increment the count */
888 /* put the final character we read back on the input */
889 if (!info
->fSkipArg
) {
890 if ((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
)
893 /* add the terminator */
894 if (info
->fIsString
) {
900 u_releaseDefaultConverter(conv
);
902 /* we converted 1 arg */
903 *argConverted
= !info
->fSkipArg
;
904 return count
+ skipped
;
908 u_scanf_char_handler(UFILE
*input
,
909 u_scanf_spec_info
*info
,
912 int32_t *fmtConsumed
,
913 int32_t *argConverted
)
915 if (info
->fWidth
< 0) {
918 info
->fIsString
= FALSE
;
919 return u_scanf_string_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
923 u_scanf_ustring_handler(UFILE
*input
,
924 u_scanf_spec_info
*info
,
927 int32_t *fmtConsumed
,
928 int32_t *argConverted
)
933 UChar
*arg
= (UChar
*)(args
[0].ptrValue
);
938 UBool isNotEOF
= FALSE
;
940 /* skip all ws in the input */
941 if (info
->fIsString
) {
942 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
945 /* get the string one character at a time, truncating to the width */
948 while( (info
->fWidth
== -1 || count
< info
->fWidth
)
949 && (isNotEOF
= ufile_getch(input
, &c
))
950 && (!info
->fIsString
|| (c
!= info
->fPadChar
&& !u_isWhitespace(c
))))
953 /* put the character from the input onto the target */
954 if (!info
->fSkipArg
) {
958 /* increment the count */
962 /* put the final character we read back on the input */
963 if (!info
->fSkipArg
) {
964 if((info
->fWidth
== -1 || count
< info
->fWidth
) && isNotEOF
) {
968 /* add the terminator */
969 if (info
->fIsString
) {
974 /* we converted 1 arg */
975 *argConverted
= !info
->fSkipArg
;
976 return count
+ skipped
;
980 u_scanf_uchar_handler(UFILE
*input
,
981 u_scanf_spec_info
*info
,
984 int32_t *fmtConsumed
,
985 int32_t *argConverted
)
987 if (info
->fWidth
< 0) {
990 info
->fIsString
= FALSE
;
991 return u_scanf_ustring_handler(input
, info
, args
, fmt
, fmtConsumed
, argConverted
);
995 u_scanf_spellout_handler(UFILE
*input
,
996 u_scanf_spec_info
*info
,
999 int32_t *fmtConsumed
,
1000 int32_t *argConverted
)
1007 UNumberFormat
*format
;
1008 int32_t parsePos
= 0;
1010 UErrorCode status
= U_ZERO_ERROR
;
1013 /* skip all ws in the input */
1014 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1016 /* fill the input's internal buffer */
1017 ufile_fill_uchar_buffer(input
);
1019 /* determine the size of the input's buffer */
1020 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1022 /* truncate to the width, if specified */
1023 if(info
->fWidth
!= -1)
1024 len
= ufmt_min(len
, info
->fWidth
);
1026 /* get the formatter */
1027 format
= u_locbund_getNumberFormat(&input
->str
.fBundle
, UNUM_SPELLOUT
);
1033 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1034 /* This is not applicable to RBNF. */
1035 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1037 /* parse the number */
1038 num
= unum_parseDouble(format
, input
->str
.fPos
, len
, &parsePos
, &status
);
1040 if (!info
->fSkipArg
) {
1041 *(double*)(args
[0].ptrValue
) = num
;
1044 /* mask off any necessary bits */
1045 /* if(! info->fIsLong_double)
1048 /* update the input's position to reflect consumed data */
1049 input
->str
.fPos
+= parsePos
;
1051 /* we converted 1 arg */
1052 *argConverted
= !info
->fSkipArg
;
1053 return parsePos
+ skipped
;
1057 u_scanf_hex_handler(UFILE
*input
,
1058 u_scanf_spec_info
*info
,
1061 int32_t *fmtConsumed
,
1062 int32_t *argConverted
)
1069 void *num
= (void*) (args
[0].ptrValue
);
1072 /* skip all ws in the input */
1073 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1075 /* fill the input's internal buffer */
1076 ufile_fill_uchar_buffer(input
);
1078 /* determine the size of the input's buffer */
1079 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1081 /* truncate to the width, if specified */
1082 if(info
->fWidth
!= -1)
1083 len
= ufmt_min(len
, info
->fWidth
);
1085 /* check for alternate form */
1086 if( *(input
->str
.fPos
) == 0x0030 &&
1087 (*(input
->str
.fPos
+ 1) == 0x0078 || *(input
->str
.fPos
+ 1) == 0x0058) ) {
1089 /* skip the '0' and 'x' or 'X' if present */
1090 input
->str
.fPos
+= 2;
1094 /* parse the number */
1095 result
= ufmt_uto64(input
->str
.fPos
, &len
, 16);
1097 /* update the input's position to reflect consumed data */
1098 input
->str
.fPos
+= len
;
1100 /* mask off any necessary bits */
1101 if (!info
->fSkipArg
) {
1103 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1104 else if (info
->fIsLongLong
)
1105 *(int64_t*)num
= result
;
1107 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1110 /* we converted 1 arg */
1111 *argConverted
= !info
->fSkipArg
;
1112 return len
+ skipped
;
1116 u_scanf_octal_handler(UFILE
*input
,
1117 u_scanf_spec_info
*info
,
1120 int32_t *fmtConsumed
,
1121 int32_t *argConverted
)
1128 void *num
= (void*) (args
[0].ptrValue
);
1131 /* skip all ws in the input */
1132 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1134 /* fill the input's internal buffer */
1135 ufile_fill_uchar_buffer(input
);
1137 /* determine the size of the input's buffer */
1138 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1140 /* truncate to the width, if specified */
1141 if(info
->fWidth
!= -1)
1142 len
= ufmt_min(len
, info
->fWidth
);
1144 /* parse the number */
1145 result
= ufmt_uto64(input
->str
.fPos
, &len
, 8);
1147 /* update the input's position to reflect consumed data */
1148 input
->str
.fPos
+= len
;
1150 /* mask off any necessary bits */
1151 if (!info
->fSkipArg
) {
1153 *(int16_t*)num
= (int16_t)(UINT16_MAX
& result
);
1154 else if (info
->fIsLongLong
)
1155 *(int64_t*)num
= result
;
1157 *(int32_t*)num
= (int32_t)(UINT32_MAX
& result
);
1160 /* we converted 1 arg */
1161 *argConverted
= !info
->fSkipArg
;
1162 return len
+ skipped
;
1166 u_scanf_pointer_handler(UFILE
*input
,
1167 u_scanf_spec_info
*info
,
1170 int32_t *fmtConsumed
,
1171 int32_t *argConverted
)
1179 void **p
= (void**)(args
[0].ptrValue
);
1182 /* skip all ws in the input */
1183 skipped
= u_scanf_skip_leading_ws(input
, info
->fPadChar
);
1185 /* fill the input's internal buffer */
1186 ufile_fill_uchar_buffer(input
);
1188 /* determine the size of the input's buffer */
1189 len
= (int32_t)(input
->str
.fLimit
- input
->str
.fPos
);
1191 /* truncate to the width, if specified */
1192 if(info
->fWidth
!= -1) {
1193 len
= ufmt_min(len
, info
->fWidth
);
1196 /* Make sure that we don't consume too much */
1197 if (len
> (int32_t)(sizeof(void*)*2)) {
1198 len
= (int32_t)(sizeof(void*)*2);
1201 /* parse the pointer - assign to temporary value */
1202 result
= ufmt_utop(input
->str
.fPos
, &len
);
1204 if (!info
->fSkipArg
) {
1208 /* update the input's position to reflect consumed data */
1209 input
->str
.fPos
+= len
;
1211 /* we converted 1 arg */
1212 *argConverted
= !info
->fSkipArg
;
1213 return len
+ skipped
;
1217 u_scanf_scanset_handler(UFILE
*input
,
1218 u_scanf_spec_info
*info
,
1221 int32_t *fmtConsumed
,
1222 int32_t *argConverted
)
1225 UErrorCode status
= U_ZERO_ERROR
;
1226 int32_t chLeft
= INT32_MAX
;
1228 UChar
*alias
= (UChar
*) (args
[0].ptrValue
);
1229 UBool isNotEOF
= FALSE
;
1230 UBool readCharacter
= FALSE
;
1232 /* Create an empty set */
1233 scanset
= uset_open(0, -1);
1235 /* Back up one to get the [ */
1238 /* truncate to the width, if specified and alias the target */
1239 if(info
->fWidth
>= 0) {
1240 chLeft
= info
->fWidth
;
1243 /* parse the scanset from the fmt string */
1244 *fmtConsumed
= uset_applyPattern(scanset
, fmt
, -1, 0, &status
);
1246 /* verify that the parse was successful */
1247 if (U_SUCCESS(status
)) {
1250 /* grab characters one at a time and make sure they are in the scanset */
1252 if ((isNotEOF
= ufile_getch32(input
, &c
)) && uset_contains(scanset
, c
)) {
1253 readCharacter
= TRUE
;
1254 if (!info
->fSkipArg
) {
1256 UBool isError
= FALSE
;
1258 U16_APPEND(alias
, idx
, chLeft
, c
, isError
);
1264 chLeft
-= (1 + U_IS_SUPPLEMENTARY(c
));
1267 /* if the character's not in the scanset, break out */
1272 /* put the final character we read back on the input */
1273 if(isNotEOF
&& chLeft
> 0) {
1274 u_fungetc(c
, input
);
1278 uset_close(scanset
);
1280 /* if we didn't match at least 1 character, fail */
1283 /* otherwise, add the terminator */
1284 else if (!info
->fSkipArg
) {
1288 /* we converted 1 arg */
1289 *argConverted
= !info
->fSkipArg
;
1290 return (info
->fWidth
>= 0 ? info
->fWidth
: INT32_MAX
) - chLeft
;
1293 /* Use US-ASCII characters only for formatting. Most codepages have
1294 characters 20-7F from Unicode. Using any other codepage specific
1295 characters will make it very difficult to format the string on
1296 non-Unicode machines */
1297 static const u_scanf_info g_u_scanf_infos
[USCANF_NUM_FMT_HANDLERS
] = {
1299 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1300 UFMT_EMPTY
, UFMT_SIMPLE_PERCENT
,UFMT_EMPTY
, UFMT_EMPTY
,
1301 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1302 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1305 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1306 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1307 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1308 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1311 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
,
1312 UFMT_EMPTY
, UFMT_SCIENTIFIC
, UFMT_EMPTY
, UFMT_SCIDBL
,
1313 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1314 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_UCHAR
/*deprecated*/,
1316 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1318 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1321 UFMT_PERCENT
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_USTRING
,
1322 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1323 UFMT_EMPTY
, UFMT_USTRING
/*deprecated*/,UFMT_SPELLOUT
, UFMT_EMPTY
,
1325 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SPELLOUT
, UFMT_EMPTY
,
1327 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_SCANSET
,
1328 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1331 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_CHAR
,
1332 UFMT_INT
, UFMT_SCIENTIFIC
, UFMT_DOUBLE
, UFMT_SCIDBL
,
1333 UFMT_EMPTY
, UFMT_INT
, UFMT_EMPTY
, UFMT_EMPTY
,
1334 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_COUNT
, UFMT_OCTAL
,
1337 UFMT_POINTER
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_STRING
,
1338 UFMT_EMPTY
, UFMT_UINT
, UFMT_EMPTY
, UFMT_EMPTY
,
1339 UFMT_HEX
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1340 UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
, UFMT_EMPTY
,
1344 u_scanf_parse(UFILE
*f
,
1345 const UChar
*patternSpecification
,
1349 int32_t count
, converted
, argConsumed
, cpConsumed
;
1350 uint16_t handlerNum
;
1354 ufmt_type_info info
;
1355 u_scanf_handler handler
;
1357 /* alias the pattern */
1358 alias
= patternSpecification
;
1360 /* haven't converted anything yet */
1365 /* iterate through the pattern */
1368 /* match any characters up to the next '%' */
1369 while(*alias
!= UP_PERCENT
&& *alias
!= 0x0000 && u_fgetc(f
) == *alias
) {
1373 /* if we aren't at a '%', or if we're at end of string, break*/
1374 if(*alias
!= UP_PERCENT
|| *alias
== 0x0000)
1377 /* parse the specifier */
1378 count
= u_scanf_parse_spec(alias
, &spec
);
1380 /* update the pointer in pattern */
1383 handlerNum
= (uint16_t)(spec
.fInfo
.fSpec
- USCANF_BASE_FMT_HANDLERS
);
1384 if (handlerNum
< USCANF_NUM_FMT_HANDLERS
) {
1385 /* skip the argument, if necessary */
1386 /* query the info function for argument information */
1387 info
= g_u_scanf_infos
[ handlerNum
].info
;
1388 if (info
!= ufmt_count
&& u_feof(f
)) {
1391 else if(spec
.fInfo
.fSkipArg
) {
1392 args
.ptrValue
= NULL
;
1397 /* set the spec's width to the # of items converted */
1398 spec
.fInfo
.fWidth
= cpConsumed
;
1408 args
.ptrValue
= va_arg(ap
, void*);
1412 /* else args is ignored */
1413 args
.ptrValue
= NULL
;
1418 /* call the handler function */
1419 handler
= g_u_scanf_infos
[ handlerNum
].handler
;
1422 /* reset count to 1 so that += for alias works. */
1425 cpConsumed
+= (*handler
)(f
, &spec
.fInfo
, &args
, alias
, &count
, &argConsumed
);
1427 /* if the handler encountered an error condition, break */
1428 if(argConsumed
< 0) {
1433 /* add to the # of items converted */
1434 converted
+= argConsumed
;
1436 /* update the pointer in pattern */
1439 /* else do nothing */
1441 /* else do nothing */
1443 /* just ignore unknown tags */
1446 /* return # of items converted */
1450 #endif /* #if !UCONFIG_NO_FORMATTING */