]> git.saurik.com Git - apple/icu.git/blob - icuSources/io/uscanf_p.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / io / uscanf_p.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File uscnnf_p.c
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 12/02/98 stephen Creation.
17 * 03/13/99 stephen Modified for new C API.
18 *******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
24
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unum.h"
28 #include "unicode/udat.h"
29 #include "unicode/uset.h"
30 #include "uscanf.h"
31 #include "ufmt_cmn.h"
32 #include "ufile.h"
33 #include "locbund.h"
34
35 #include "cmemory.h"
36 #include "ustr_cnv.h"
37
38 /* flag characters for u_scanf */
39 #define FLAG_ASTERISK 0x002A
40 #define FLAG_PAREN 0x0028
41
42 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
43 (s) == FLAG_PAREN
44
45 /* special characters for u_scanf */
46 #define SPEC_DOLLARSIGN 0x0024
47
48 /* unicode digits */
49 #define DIGIT_ZERO 0x0030
50 #define DIGIT_ONE 0x0031
51 #define DIGIT_TWO 0x0032
52 #define DIGIT_THREE 0x0033
53 #define DIGIT_FOUR 0x0034
54 #define DIGIT_FIVE 0x0035
55 #define DIGIT_SIX 0x0036
56 #define DIGIT_SEVEN 0x0037
57 #define DIGIT_EIGHT 0x0038
58 #define DIGIT_NINE 0x0039
59
60 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
61 (s) == DIGIT_ONE || \
62 (s) == DIGIT_TWO || \
63 (s) == DIGIT_THREE || \
64 (s) == DIGIT_FOUR || \
65 (s) == DIGIT_FIVE || \
66 (s) == DIGIT_SIX || \
67 (s) == DIGIT_SEVEN || \
68 (s) == DIGIT_EIGHT || \
69 (s) == DIGIT_NINE
70
71 /* u_scanf modifiers */
72 #define MOD_H 0x0068
73 #define MOD_LOWERL 0x006C
74 #define MOD_L 0x004C
75
76 #define ISMOD(s) (s) == MOD_H || \
77 (s) == MOD_LOWERL || \
78 (s) == MOD_L
79
80 /**
81 * Struct encapsulating a single uscanf format specification.
82 */
83 typedef struct u_scanf_spec_info {
84 int32_t fWidth; /* Width */
85
86 UChar fSpec; /* Format specification */
87
88 UChar fPadChar; /* Padding character */
89
90 UBool fSkipArg; /* TRUE if arg should be skipped */
91 UBool fIsLongDouble; /* L flag */
92 UBool fIsShort; /* h flag */
93 UBool fIsLong; /* l flag */
94 UBool fIsLongLong; /* ll flag */
95 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
96 } u_scanf_spec_info;
97
98
99 /**
100 * Struct encapsulating a single u_scanf format specification.
101 */
102 typedef struct u_scanf_spec {
103 u_scanf_spec_info fInfo; /* Information on this spec */
104 int32_t fArgPos; /* Position of data in arg list */
105 } u_scanf_spec;
106
107 /**
108 * Parse a single u_scanf format specifier in Unicode.
109 * @param fmt A pointer to a '%' character in a u_scanf format specification.
110 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
111 * format specifier.
112 * @return The number of characters contained in this specifier.
113 */
114 static int32_t
115 u_scanf_parse_spec (const UChar *fmt,
116 u_scanf_spec *spec)
117 {
118 const UChar *s = fmt;
119 const UChar *backup;
120 u_scanf_spec_info *info = &(spec->fInfo);
121
122 /* initialize spec to default values */
123 spec->fArgPos = -1;
124
125 info->fWidth = -1;
126 info->fSpec = 0x0000;
127 info->fPadChar = 0x0020;
128 info->fSkipArg = FALSE;
129 info->fIsLongDouble = FALSE;
130 info->fIsShort = FALSE;
131 info->fIsLong = FALSE;
132 info->fIsLongLong = FALSE;
133 info->fIsString = TRUE;
134
135
136 /* skip over the initial '%' */
137 s++;
138
139 /* Check for positional argument */
140 if(ISDIGIT(*s)) {
141
142 /* Save the current position */
143 backup = s;
144
145 /* handle positional parameters */
146 if(ISDIGIT(*s)) {
147 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
148
149 while(ISDIGIT(*s)) {
150 spec->fArgPos *= 10;
151 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
152 }
153 }
154
155 /* if there is no '$', don't read anything */
156 if(*s != SPEC_DOLLARSIGN) {
157 spec->fArgPos = -1;
158 s = backup;
159 }
160 /* munge the '$' */
161 else
162 s++;
163 }
164
165 /* Get any format flags */
166 while(ISFLAG(*s)) {
167 switch(*s++) {
168
169 /* skip argument */
170 case FLAG_ASTERISK:
171 info->fSkipArg = TRUE;
172 break;
173
174 /* pad character specified */
175 case FLAG_PAREN:
176
177 /* first four characters are hex values for pad char */
178 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
181 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
182
183 /* final character is ignored */
184 s++;
185
186 break;
187 }
188 }
189
190 /* Get the width */
191 if(ISDIGIT(*s)){
192 info->fWidth = (int) (*s++ - DIGIT_ZERO);
193
194 while(ISDIGIT(*s)) {
195 info->fWidth *= 10;
196 info->fWidth += (int) (*s++ - DIGIT_ZERO);
197 }
198 }
199
200 /* Get any modifiers */
201 if(ISMOD(*s)) {
202 switch(*s++) {
203
204 /* short */
205 case MOD_H:
206 info->fIsShort = TRUE;
207 break;
208
209 /* long or long long */
210 case MOD_LOWERL:
211 if(*s == MOD_LOWERL) {
212 info->fIsLongLong = TRUE;
213 /* skip over the next 'l' */
214 s++;
215 }
216 else
217 info->fIsLong = TRUE;
218 break;
219
220 /* long double */
221 case MOD_L:
222 info->fIsLongDouble = TRUE;
223 break;
224 }
225 }
226
227 /* finally, get the specifier letter */
228 info->fSpec = *s++;
229
230 /* return # of characters in this specifier */
231 return (int32_t)(s - fmt);
232 }
233
234 #define UP_PERCENT 0x0025
235
236
237 /* ANSI style formatting */
238 /* Use US-ASCII characters only for formatting */
239
240 /* % */
241 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
242 /* s */
243 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
244 /* c */
245 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
246 /* d, i */
247 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
248 /* u */
249 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
250 /* o */
251 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
252 /* x, X */
253 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
254 /* f */
255 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
256 /* e, E */
257 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
258 /* g, G */
259 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
260 /* n */
261 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
262 /* [ */
263 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
264
265 /* non-ANSI extensions */
266 /* Use US-ASCII characters only for formatting */
267
268 /* p */
269 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
270 /* V */
271 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
272 /* P */
273 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
274 /* C K is old format */
275 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
276 /* S U is old format */
277 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
278
279
280 #define UFMT_EMPTY {ufmt_empty, NULL}
281
282 /**
283 * A u_scanf handler function.
284 * A u_scanf handler is responsible for handling a single u_scanf
285 * format specification, for example 'd' or 's'.
286 * @param stream The UFILE to which to write output.
287 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
288 * information on the format specification.
289 * @param args A pointer to the argument data
290 * @param fmt A pointer to the first character in the format string
291 * following the spec.
292 * @param fmtConsumed On output, set to the number of characters consumed
293 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
294 * @param argConverted The number of arguments converted and assigned, or -1 if an
295 * error occurred.
296 * @return The number of code points consumed during reading.
297 */
298 typedef int32_t (*u_scanf_handler) (UFILE *stream,
299 u_scanf_spec_info *info,
300 ufmt_args *args,
301 const UChar *fmt,
302 int32_t *fmtConsumed,
303 int32_t *argConverted);
304
305 typedef struct u_scanf_info {
306 ufmt_type_info info;
307 u_scanf_handler handler;
308 } u_scanf_info;
309
310 #define USCANF_NUM_FMT_HANDLERS 108
311 #define USCANF_SYMBOL_BUFFER_SIZE 8
312
313 /* We do not use handlers for 0-0x1f */
314 #define USCANF_BASE_FMT_HANDLERS 0x20
315
316
317 static int32_t
318 u_scanf_skip_leading_ws(UFILE *input,
319 UChar pad)
320 {
321 UChar c;
322 int32_t count = 0;
323 UBool isNotEOF;
324
325 /* skip all leading ws in the input */
326 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
327 {
328 count++;
329 }
330
331 /* put the final character back on the input */
332 if(isNotEOF)
333 u_fungetc(c, input);
334
335 return count;
336 }
337
338 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
339 static int32_t
340 u_scanf_skip_leading_positive_sign(UFILE *input,
341 UNumberFormat *format,
342 UErrorCode *status)
343 {
344 UChar c;
345 int32_t count = 0;
346 UBool isNotEOF;
347 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
348 int32_t symbolLen;
349 UErrorCode localStatus = U_ZERO_ERROR;
350
351 if (U_SUCCESS(*status)) {
352 symbolLen = unum_getSymbol(format,
353 UNUM_PLUS_SIGN_SYMBOL,
354 plusSymbol,
355 UPRV_LENGTHOF(plusSymbol),
356 &localStatus);
357
358 if (U_SUCCESS(localStatus)) {
359 /* skip all leading ws in the input */
360 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
361 {
362 count++;
363 }
364
365 /* put the final character back on the input */
366 if(isNotEOF) {
367 u_fungetc(c, input);
368 }
369 }
370 }
371
372 return count;
373 }
374
375 static int32_t
376 u_scanf_simple_percent_handler(UFILE *input,
377 u_scanf_spec_info *info,
378 ufmt_args *args,
379 const UChar *fmt,
380 int32_t *fmtConsumed,
381 int32_t *argConverted)
382 {
383 (void)info;
384 (void)args;
385 (void)fmt;
386 (void)fmtConsumed;
387
388 /* make sure the next character in the input is a percent */
389 *argConverted = 0;
390 if(u_fgetc(input) != 0x0025) {
391 *argConverted = -1;
392 }
393 return 1;
394 }
395
396 static int32_t
397 u_scanf_count_handler(UFILE *input,
398 u_scanf_spec_info *info,
399 ufmt_args *args,
400 const UChar *fmt,
401 int32_t *fmtConsumed,
402 int32_t *argConverted)
403 {
404 (void)input;
405 (void)fmt;
406 (void)fmtConsumed;
407
408 /* in the special case of count, the u_scanf_spec_info's width */
409 /* will contain the # of items converted thus far */
410 if (!info->fSkipArg) {
411 if (info->fIsShort)
412 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
413 else if (info->fIsLongLong)
414 *(int64_t*)(args[0].ptrValue) = info->fWidth;
415 else
416 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
417 }
418 *argConverted = 0;
419
420 /* we converted 0 args */
421 return 0;
422 }
423
424 static int32_t
425 u_scanf_double_handler(UFILE *input,
426 u_scanf_spec_info *info,
427 ufmt_args *args,
428 const UChar *fmt,
429 int32_t *fmtConsumed,
430 int32_t *argConverted)
431 {
432 (void)fmt;
433 (void)fmtConsumed;
434
435 int32_t len;
436 double num;
437 UNumberFormat *format;
438 int32_t parsePos = 0;
439 int32_t skipped;
440 UErrorCode status = U_ZERO_ERROR;
441
442
443 /* skip all ws in the input */
444 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
445
446 /* fill the input's internal buffer */
447 ufile_fill_uchar_buffer(input);
448
449 /* determine the size of the input's buffer */
450 len = (int32_t)(input->str.fLimit - input->str.fPos);
451
452 /* truncate to the width, if specified */
453 if(info->fWidth != -1)
454 len = ufmt_min(len, info->fWidth);
455
456 /* get the formatter */
457 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
458
459 /* handle error */
460 if(format == 0)
461 return 0;
462
463 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
464 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
465
466 /* parse the number */
467 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
468
469 if (!info->fSkipArg) {
470 if (info->fIsLong)
471 *(double*)(args[0].ptrValue) = num;
472 else if (info->fIsLongDouble)
473 *(long double*)(args[0].ptrValue) = num;
474 else
475 *(float*)(args[0].ptrValue) = (float)num;
476 }
477
478 /* mask off any necessary bits */
479 /* if(! info->fIsLong_double)
480 num &= DBL_MAX;*/
481
482 /* update the input's position to reflect consumed data */
483 input->str.fPos += parsePos;
484
485 /* we converted 1 arg */
486 *argConverted = !info->fSkipArg;
487 return parsePos + skipped;
488 }
489
490 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
491
492 static int32_t
493 u_scanf_scientific_handler(UFILE *input,
494 u_scanf_spec_info *info,
495 ufmt_args *args,
496 const UChar *fmt,
497 int32_t *fmtConsumed,
498 int32_t *argConverted)
499 {
500 (void)fmt;
501 (void)fmtConsumed;
502
503 int32_t len;
504 double num;
505 UNumberFormat *format;
506 int32_t parsePos = 0;
507 int32_t skipped;
508 UErrorCode status = U_ZERO_ERROR;
509 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
510 int32_t srcLen, expLen;
511 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
512
513
514 /* skip all ws in the input */
515 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
516
517 /* fill the input's internal buffer */
518 ufile_fill_uchar_buffer(input);
519
520 /* determine the size of the input's buffer */
521 len = (int32_t)(input->str.fLimit - input->str.fPos);
522
523 /* truncate to the width, if specified */
524 if(info->fWidth != -1)
525 len = ufmt_min(len, info->fWidth);
526
527 /* get the formatter */
528 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
529
530 /* handle error */
531 if(format == 0)
532 return 0;
533
534 /* set the appropriate flags on the formatter */
535
536 srcLen = unum_getSymbol(format,
537 UNUM_EXPONENTIAL_SYMBOL,
538 srcExpBuf,
539 sizeof(srcExpBuf),
540 &status);
541
542 /* Upper/lower case the e */
543 if (info->fSpec == (UChar)0x65 /* e */) {
544 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
545 srcExpBuf, srcLen,
546 input->str.fBundle.fLocale,
547 &status);
548 }
549 else {
550 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
551 srcExpBuf, srcLen,
552 input->str.fBundle.fLocale,
553 &status);
554 }
555
556 unum_setSymbol(format,
557 UNUM_EXPONENTIAL_SYMBOL,
558 expBuf,
559 expLen,
560 &status);
561
562
563
564
565 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
566 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
567
568 /* parse the number */
569 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
570
571 if (!info->fSkipArg) {
572 if (info->fIsLong)
573 *(double*)(args[0].ptrValue) = num;
574 else if (info->fIsLongDouble)
575 *(long double*)(args[0].ptrValue) = num;
576 else
577 *(float*)(args[0].ptrValue) = (float)num;
578 }
579
580 /* mask off any necessary bits */
581 /* if(! info->fIsLong_double)
582 num &= DBL_MAX;*/
583
584 /* update the input's position to reflect consumed data */
585 input->str.fPos += parsePos;
586
587 /* we converted 1 arg */
588 *argConverted = !info->fSkipArg;
589 return parsePos + skipped;
590 }
591
592 static int32_t
593 u_scanf_scidbl_handler(UFILE *input,
594 u_scanf_spec_info *info,
595 ufmt_args *args,
596 const UChar *fmt,
597 int32_t *fmtConsumed,
598 int32_t *argConverted)
599 {
600 (void)fmt;
601 (void)fmtConsumed;
602
603 int32_t len;
604 double num;
605 UNumberFormat *scientificFormat, *genericFormat;
606 /*int32_t scientificResult, genericResult;*/
607 double scientificResult, genericResult;
608 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
609 int32_t skipped;
610 UErrorCode scientificStatus = U_ZERO_ERROR;
611 UErrorCode genericStatus = U_ZERO_ERROR;
612
613
614 /* since we can't determine by scanning the characters whether */
615 /* a number was formatted in the 'f' or 'g' styles, parse the */
616 /* string with both formatters, and assume whichever one */
617 /* parsed the most is the correct formatter to use */
618
619
620 /* skip all ws in the input */
621 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
622
623 /* fill the input's internal buffer */
624 ufile_fill_uchar_buffer(input);
625
626 /* determine the size of the input's buffer */
627 len = (int32_t)(input->str.fLimit - input->str.fPos);
628
629 /* truncate to the width, if specified */
630 if(info->fWidth != -1)
631 len = ufmt_min(len, info->fWidth);
632
633 /* get the formatters */
634 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
635 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
636
637 /* handle error */
638 if(scientificFormat == 0 || genericFormat == 0)
639 return 0;
640
641 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
642 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
643
644 /* parse the number using each format*/
645
646 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
647 &scientificParsePos, &scientificStatus);
648
649 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
650 &genericParsePos, &genericStatus);
651
652 /* determine which parse made it farther */
653 if(scientificParsePos > genericParsePos) {
654 /* stash the result in num */
655 num = scientificResult;
656 /* update the input's position to reflect consumed data */
657 parsePos += scientificParsePos;
658 }
659 else {
660 /* stash the result in num */
661 num = genericResult;
662 /* update the input's position to reflect consumed data */
663 parsePos += genericParsePos;
664 }
665 input->str.fPos += parsePos;
666
667 if (!info->fSkipArg) {
668 if (info->fIsLong)
669 *(double*)(args[0].ptrValue) = num;
670 else if (info->fIsLongDouble)
671 *(long double*)(args[0].ptrValue) = num;
672 else
673 *(float*)(args[0].ptrValue) = (float)num;
674 }
675
676 /* mask off any necessary bits */
677 /* if(! info->fIsLong_double)
678 num &= DBL_MAX;*/
679
680 /* we converted 1 arg */
681 *argConverted = !info->fSkipArg;
682 return parsePos + skipped;
683 }
684
685 static int32_t
686 u_scanf_integer_handler(UFILE *input,
687 u_scanf_spec_info *info,
688 ufmt_args *args,
689 const UChar *fmt,
690 int32_t *fmtConsumed,
691 int32_t *argConverted)
692 {
693 (void)fmt;
694 (void)fmtConsumed;
695
696 int32_t len;
697 void *num = (void*) (args[0].ptrValue);
698 UNumberFormat *format;
699 int32_t parsePos = 0;
700 int32_t skipped;
701 UErrorCode status = U_ZERO_ERROR;
702 int64_t result;
703
704
705 /* skip all ws in the input */
706 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
707
708 /* fill the input's internal buffer */
709 ufile_fill_uchar_buffer(input);
710
711 /* determine the size of the input's buffer */
712 len = (int32_t)(input->str.fLimit - input->str.fPos);
713
714 /* truncate to the width, if specified */
715 if(info->fWidth != -1)
716 len = ufmt_min(len, info->fWidth);
717
718 /* get the formatter */
719 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
720
721 /* handle error */
722 if(format == 0)
723 return 0;
724
725 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
726 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
727
728 /* parse the number */
729 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
730
731 /* mask off any necessary bits */
732 if (!info->fSkipArg) {
733 if (info->fIsShort)
734 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
735 else if (info->fIsLongLong)
736 *(int64_t*)num = result;
737 else
738 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
739 }
740
741 /* update the input's position to reflect consumed data */
742 input->str.fPos += parsePos;
743
744 /* we converted 1 arg */
745 *argConverted = !info->fSkipArg;
746 return parsePos + skipped;
747 }
748
749 static int32_t
750 u_scanf_uinteger_handler(UFILE *input,
751 u_scanf_spec_info *info,
752 ufmt_args *args,
753 const UChar *fmt,
754 int32_t *fmtConsumed,
755 int32_t *argConverted)
756 {
757 /* TODO Fix this when Numberformat handles uint64_t */
758 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
759 }
760
761 static int32_t
762 u_scanf_percent_handler(UFILE *input,
763 u_scanf_spec_info *info,
764 ufmt_args *args,
765 const UChar *fmt,
766 int32_t *fmtConsumed,
767 int32_t *argConverted)
768 {
769 (void)fmt;
770 (void)fmtConsumed;
771
772 int32_t len;
773 double num;
774 UNumberFormat *format;
775 int32_t parsePos = 0;
776 UErrorCode status = U_ZERO_ERROR;
777
778
779 /* skip all ws in the input */
780 u_scanf_skip_leading_ws(input, info->fPadChar);
781
782 /* fill the input's internal buffer */
783 ufile_fill_uchar_buffer(input);
784
785 /* determine the size of the input's buffer */
786 len = (int32_t)(input->str.fLimit - input->str.fPos);
787
788 /* truncate to the width, if specified */
789 if(info->fWidth != -1)
790 len = ufmt_min(len, info->fWidth);
791
792 /* get the formatter */
793 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
794
795 /* handle error */
796 if(format == 0)
797 return 0;
798
799 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
800 u_scanf_skip_leading_positive_sign(input, format, &status);
801
802 /* parse the number */
803 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
804
805 if (!info->fSkipArg) {
806 *(double*)(args[0].ptrValue) = num;
807 }
808
809 /* mask off any necessary bits */
810 /* if(! info->fIsLong_double)
811 num &= DBL_MAX;*/
812
813 /* update the input's position to reflect consumed data */
814 input->str.fPos += parsePos;
815
816 /* we converted 1 arg */
817 *argConverted = !info->fSkipArg;
818 return parsePos;
819 }
820
821 static int32_t
822 u_scanf_string_handler(UFILE *input,
823 u_scanf_spec_info *info,
824 ufmt_args *args,
825 const UChar *fmt,
826 int32_t *fmtConsumed,
827 int32_t *argConverted)
828 {
829 (void)fmt;
830 (void)fmtConsumed;
831
832 const UChar *source;
833 UConverter *conv;
834 char *arg = (char*)(args[0].ptrValue);
835 char *alias = arg;
836 char *limit;
837 UErrorCode status = U_ZERO_ERROR;
838 int32_t count;
839 int32_t skipped = 0;
840 UChar c;
841 UBool isNotEOF = FALSE;
842
843 /* skip all ws in the input */
844 if (info->fIsString) {
845 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
846 }
847
848 /* get the string one character at a time, truncating to the width */
849 count = 0;
850
851 /* open the default converter */
852 conv = u_getDefaultConverter(&status);
853
854 if(U_FAILURE(status))
855 return -1;
856
857 while( (info->fWidth == -1 || count < info->fWidth)
858 && (isNotEOF = ufile_getch(input, &c))
859 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
860 {
861
862 if (!info->fSkipArg) {
863 /* put the character from the input onto the target */
864 source = &c;
865 /* Since we do this one character at a time, do it this way. */
866 if (info->fWidth > 0) {
867 limit = alias + info->fWidth - count;
868 }
869 else {
870 limit = alias + ucnv_getMaxCharSize(conv);
871 }
872
873 /* convert the character to the default codepage */
874 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
875 NULL, TRUE, &status);
876
877 if(U_FAILURE(status)) {
878 /* clean up */
879 u_releaseDefaultConverter(conv);
880 return -1;
881 }
882 }
883
884 /* increment the count */
885 ++count;
886 }
887
888 /* put the final character we read back on the input */
889 if (!info->fSkipArg) {
890 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
891 u_fungetc(c, input);
892
893 /* add the terminator */
894 if (info->fIsString) {
895 *alias = 0x00;
896 }
897 }
898
899 /* clean up */
900 u_releaseDefaultConverter(conv);
901
902 /* we converted 1 arg */
903 *argConverted = !info->fSkipArg;
904 return count + skipped;
905 }
906
907 static int32_t
908 u_scanf_char_handler(UFILE *input,
909 u_scanf_spec_info *info,
910 ufmt_args *args,
911 const UChar *fmt,
912 int32_t *fmtConsumed,
913 int32_t *argConverted)
914 {
915 if (info->fWidth < 0) {
916 info->fWidth = 1;
917 }
918 info->fIsString = FALSE;
919 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
920 }
921
922 static int32_t
923 u_scanf_ustring_handler(UFILE *input,
924 u_scanf_spec_info *info,
925 ufmt_args *args,
926 const UChar *fmt,
927 int32_t *fmtConsumed,
928 int32_t *argConverted)
929 {
930 (void)fmt;
931 (void)fmtConsumed;
932
933 UChar *arg = (UChar*)(args[0].ptrValue);
934 UChar *alias = arg;
935 int32_t count;
936 int32_t skipped = 0;
937 UChar c;
938 UBool isNotEOF = FALSE;
939
940 /* skip all ws in the input */
941 if (info->fIsString) {
942 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
943 }
944
945 /* get the string one character at a time, truncating to the width */
946 count = 0;
947
948 while( (info->fWidth == -1 || count < info->fWidth)
949 && (isNotEOF = ufile_getch(input, &c))
950 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
951 {
952
953 /* put the character from the input onto the target */
954 if (!info->fSkipArg) {
955 *alias++ = c;
956 }
957
958 /* increment the count */
959 ++count;
960 }
961
962 /* put the final character we read back on the input */
963 if (!info->fSkipArg) {
964 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
965 u_fungetc(c, input);
966 }
967
968 /* add the terminator */
969 if (info->fIsString) {
970 *alias = 0x0000;
971 }
972 }
973
974 /* we converted 1 arg */
975 *argConverted = !info->fSkipArg;
976 return count + skipped;
977 }
978
979 static int32_t
980 u_scanf_uchar_handler(UFILE *input,
981 u_scanf_spec_info *info,
982 ufmt_args *args,
983 const UChar *fmt,
984 int32_t *fmtConsumed,
985 int32_t *argConverted)
986 {
987 if (info->fWidth < 0) {
988 info->fWidth = 1;
989 }
990 info->fIsString = FALSE;
991 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
992 }
993
994 static int32_t
995 u_scanf_spellout_handler(UFILE *input,
996 u_scanf_spec_info *info,
997 ufmt_args *args,
998 const UChar *fmt,
999 int32_t *fmtConsumed,
1000 int32_t *argConverted)
1001 {
1002 (void)fmt;
1003 (void)fmtConsumed;
1004
1005 int32_t len;
1006 double num;
1007 UNumberFormat *format;
1008 int32_t parsePos = 0;
1009 int32_t skipped;
1010 UErrorCode status = U_ZERO_ERROR;
1011
1012
1013 /* skip all ws in the input */
1014 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1015
1016 /* fill the input's internal buffer */
1017 ufile_fill_uchar_buffer(input);
1018
1019 /* determine the size of the input's buffer */
1020 len = (int32_t)(input->str.fLimit - input->str.fPos);
1021
1022 /* truncate to the width, if specified */
1023 if(info->fWidth != -1)
1024 len = ufmt_min(len, info->fWidth);
1025
1026 /* get the formatter */
1027 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
1028
1029 /* handle error */
1030 if(format == 0)
1031 return 0;
1032
1033 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1034 /* This is not applicable to RBNF. */
1035 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1036
1037 /* parse the number */
1038 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1039
1040 if (!info->fSkipArg) {
1041 *(double*)(args[0].ptrValue) = num;
1042 }
1043
1044 /* mask off any necessary bits */
1045 /* if(! info->fIsLong_double)
1046 num &= DBL_MAX;*/
1047
1048 /* update the input's position to reflect consumed data */
1049 input->str.fPos += parsePos;
1050
1051 /* we converted 1 arg */
1052 *argConverted = !info->fSkipArg;
1053 return parsePos + skipped;
1054 }
1055
1056 static int32_t
1057 u_scanf_hex_handler(UFILE *input,
1058 u_scanf_spec_info *info,
1059 ufmt_args *args,
1060 const UChar *fmt,
1061 int32_t *fmtConsumed,
1062 int32_t *argConverted)
1063 {
1064 (void)fmt;
1065 (void)fmtConsumed;
1066
1067 int32_t len;
1068 int32_t skipped;
1069 void *num = (void*) (args[0].ptrValue);
1070 int64_t result;
1071
1072 /* skip all ws in the input */
1073 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1074
1075 /* fill the input's internal buffer */
1076 ufile_fill_uchar_buffer(input);
1077
1078 /* determine the size of the input's buffer */
1079 len = (int32_t)(input->str.fLimit - input->str.fPos);
1080
1081 /* truncate to the width, if specified */
1082 if(info->fWidth != -1)
1083 len = ufmt_min(len, info->fWidth);
1084
1085 /* check for alternate form */
1086 if( *(input->str.fPos) == 0x0030 &&
1087 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1088
1089 /* skip the '0' and 'x' or 'X' if present */
1090 input->str.fPos += 2;
1091 len -= 2;
1092 }
1093
1094 /* parse the number */
1095 result = ufmt_uto64(input->str.fPos, &len, 16);
1096
1097 /* update the input's position to reflect consumed data */
1098 input->str.fPos += len;
1099
1100 /* mask off any necessary bits */
1101 if (!info->fSkipArg) {
1102 if (info->fIsShort)
1103 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1104 else if (info->fIsLongLong)
1105 *(int64_t*)num = result;
1106 else
1107 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1108 }
1109
1110 /* we converted 1 arg */
1111 *argConverted = !info->fSkipArg;
1112 return len + skipped;
1113 }
1114
1115 static int32_t
1116 u_scanf_octal_handler(UFILE *input,
1117 u_scanf_spec_info *info,
1118 ufmt_args *args,
1119 const UChar *fmt,
1120 int32_t *fmtConsumed,
1121 int32_t *argConverted)
1122 {
1123 (void)fmt;
1124 (void)fmtConsumed;
1125
1126 int32_t len;
1127 int32_t skipped;
1128 void *num = (void*) (args[0].ptrValue);
1129 int64_t result;
1130
1131 /* skip all ws in the input */
1132 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1133
1134 /* fill the input's internal buffer */
1135 ufile_fill_uchar_buffer(input);
1136
1137 /* determine the size of the input's buffer */
1138 len = (int32_t)(input->str.fLimit - input->str.fPos);
1139
1140 /* truncate to the width, if specified */
1141 if(info->fWidth != -1)
1142 len = ufmt_min(len, info->fWidth);
1143
1144 /* parse the number */
1145 result = ufmt_uto64(input->str.fPos, &len, 8);
1146
1147 /* update the input's position to reflect consumed data */
1148 input->str.fPos += len;
1149
1150 /* mask off any necessary bits */
1151 if (!info->fSkipArg) {
1152 if (info->fIsShort)
1153 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1154 else if (info->fIsLongLong)
1155 *(int64_t*)num = result;
1156 else
1157 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1158 }
1159
1160 /* we converted 1 arg */
1161 *argConverted = !info->fSkipArg;
1162 return len + skipped;
1163 }
1164
1165 static int32_t
1166 u_scanf_pointer_handler(UFILE *input,
1167 u_scanf_spec_info *info,
1168 ufmt_args *args,
1169 const UChar *fmt,
1170 int32_t *fmtConsumed,
1171 int32_t *argConverted)
1172 {
1173 (void)fmt;
1174 (void)fmtConsumed;
1175
1176 int32_t len;
1177 int32_t skipped;
1178 void *result;
1179 void **p = (void**)(args[0].ptrValue);
1180
1181
1182 /* skip all ws in the input */
1183 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1184
1185 /* fill the input's internal buffer */
1186 ufile_fill_uchar_buffer(input);
1187
1188 /* determine the size of the input's buffer */
1189 len = (int32_t)(input->str.fLimit - input->str.fPos);
1190
1191 /* truncate to the width, if specified */
1192 if(info->fWidth != -1) {
1193 len = ufmt_min(len, info->fWidth);
1194 }
1195
1196 /* Make sure that we don't consume too much */
1197 if (len > (int32_t)(sizeof(void*)*2)) {
1198 len = (int32_t)(sizeof(void*)*2);
1199 }
1200
1201 /* parse the pointer - assign to temporary value */
1202 result = ufmt_utop(input->str.fPos, &len);
1203
1204 if (!info->fSkipArg) {
1205 *p = result;
1206 }
1207
1208 /* update the input's position to reflect consumed data */
1209 input->str.fPos += len;
1210
1211 /* we converted 1 arg */
1212 *argConverted = !info->fSkipArg;
1213 return len + skipped;
1214 }
1215
1216 static int32_t
1217 u_scanf_scanset_handler(UFILE *input,
1218 u_scanf_spec_info *info,
1219 ufmt_args *args,
1220 const UChar *fmt,
1221 int32_t *fmtConsumed,
1222 int32_t *argConverted)
1223 {
1224 USet *scanset;
1225 UErrorCode status = U_ZERO_ERROR;
1226 int32_t chLeft = INT32_MAX;
1227 UChar32 c;
1228 UChar *alias = (UChar*) (args[0].ptrValue);
1229 UBool isNotEOF = FALSE;
1230 UBool readCharacter = FALSE;
1231
1232 /* Create an empty set */
1233 scanset = uset_open(0, -1);
1234
1235 /* Back up one to get the [ */
1236 fmt--;
1237
1238 /* truncate to the width, if specified and alias the target */
1239 if(info->fWidth >= 0) {
1240 chLeft = info->fWidth;
1241 }
1242
1243 /* parse the scanset from the fmt string */
1244 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1245
1246 /* verify that the parse was successful */
1247 if (U_SUCCESS(status)) {
1248 c=0;
1249
1250 /* grab characters one at a time and make sure they are in the scanset */
1251 while(chLeft > 0) {
1252 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1253 readCharacter = TRUE;
1254 if (!info->fSkipArg) {
1255 int32_t idx = 0;
1256 UBool isError = FALSE;
1257
1258 U16_APPEND(alias, idx, chLeft, c, isError);
1259 if (isError) {
1260 break;
1261 }
1262 alias += idx;
1263 }
1264 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1265 }
1266 else {
1267 /* if the character's not in the scanset, break out */
1268 break;
1269 }
1270 }
1271
1272 /* put the final character we read back on the input */
1273 if(isNotEOF && chLeft > 0) {
1274 u_fungetc(c, input);
1275 }
1276 }
1277
1278 uset_close(scanset);
1279
1280 /* if we didn't match at least 1 character, fail */
1281 if(!readCharacter)
1282 return -1;
1283 /* otherwise, add the terminator */
1284 else if (!info->fSkipArg) {
1285 *alias = 0x00;
1286 }
1287
1288 /* we converted 1 arg */
1289 *argConverted = !info->fSkipArg;
1290 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1291 }
1292
1293 /* Use US-ASCII characters only for formatting. Most codepages have
1294 characters 20-7F from Unicode. Using any other codepage specific
1295 characters will make it very difficult to format the string on
1296 non-Unicode machines */
1297 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1298 /* 0x20 */
1299 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1300 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
1301 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1302 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1303
1304 /* 0x30 */
1305 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1306 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1307 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1308 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1309
1310 /* 0x40 */
1311 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
1312 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
1313 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1314 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
1315 #else
1316 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1317 #endif
1318 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1319
1320 /* 0x50 */
1321 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
1322 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1323 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
1324 #else
1325 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
1326 #endif
1327 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
1328 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1329
1330 /* 0x60 */
1331 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
1332 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
1333 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
1334 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
1335
1336 /* 0x70 */
1337 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
1338 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
1339 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1340 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1341 };
1342
1343 U_CFUNC int32_t
1344 u_scanf_parse(UFILE *f,
1345 const UChar *patternSpecification,
1346 va_list ap)
1347 {
1348 const UChar *alias;
1349 int32_t count, converted, argConsumed, cpConsumed;
1350 uint16_t handlerNum;
1351
1352 ufmt_args args;
1353 u_scanf_spec spec;
1354 ufmt_type_info info;
1355 u_scanf_handler handler;
1356
1357 /* alias the pattern */
1358 alias = patternSpecification;
1359
1360 /* haven't converted anything yet */
1361 argConsumed = 0;
1362 converted = 0;
1363 cpConsumed = 0;
1364
1365 /* iterate through the pattern */
1366 for(;;) {
1367
1368 /* match any characters up to the next '%' */
1369 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1370 alias++;
1371 }
1372
1373 /* if we aren't at a '%', or if we're at end of string, break*/
1374 if(*alias != UP_PERCENT || *alias == 0x0000)
1375 break;
1376
1377 /* parse the specifier */
1378 count = u_scanf_parse_spec(alias, &spec);
1379
1380 /* update the pointer in pattern */
1381 alias += count;
1382
1383 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1384 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1385 /* skip the argument, if necessary */
1386 /* query the info function for argument information */
1387 info = g_u_scanf_infos[ handlerNum ].info;
1388 if (info != ufmt_count && u_feof(f)) {
1389 break;
1390 }
1391 else if(spec.fInfo.fSkipArg) {
1392 args.ptrValue = NULL;
1393 }
1394 else {
1395 switch(info) {
1396 case ufmt_count:
1397 /* set the spec's width to the # of items converted */
1398 spec.fInfo.fWidth = cpConsumed;
1399 U_FALLTHROUGH;
1400 case ufmt_char:
1401 case ufmt_uchar:
1402 case ufmt_int:
1403 case ufmt_string:
1404 case ufmt_ustring:
1405 case ufmt_pointer:
1406 case ufmt_float:
1407 case ufmt_double:
1408 args.ptrValue = va_arg(ap, void*);
1409 break;
1410
1411 default:
1412 /* else args is ignored */
1413 args.ptrValue = NULL;
1414 break;
1415 }
1416 }
1417
1418 /* call the handler function */
1419 handler = g_u_scanf_infos[ handlerNum ].handler;
1420 if(handler != 0) {
1421
1422 /* reset count to 1 so that += for alias works. */
1423 count = 1;
1424
1425 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1426
1427 /* if the handler encountered an error condition, break */
1428 if(argConsumed < 0) {
1429 converted = -1;
1430 break;
1431 }
1432
1433 /* add to the # of items converted */
1434 converted += argConsumed;
1435
1436 /* update the pointer in pattern */
1437 alias += count-1;
1438 }
1439 /* else do nothing */
1440 }
1441 /* else do nothing */
1442
1443 /* just ignore unknown tags */
1444 }
1445
1446 /* return # of items converted */
1447 return converted;
1448 }
1449
1450 #endif /* #if !UCONFIG_NO_FORMATTING */