]> git.saurik.com Git - apple/icu.git/blob - icuSources/io/uscanf_p.c
ICU-461.13.tar.gz
[apple/icu.git] / icuSources / io / uscanf_p.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File uscnnf_p.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 12/02/98 stephen Creation.
15 * 03/13/99 stephen Modified for new C API.
16 *******************************************************************************
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_FORMATTING
22
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unum.h"
26 #include "unicode/udat.h"
27 #include "unicode/uset.h"
28 #include "uscanf.h"
29 #include "ufmt_cmn.h"
30 #include "ufile.h"
31 #include "locbund.h"
32
33 #include "cmemory.h"
34 #include "ustr_cnv.h"
35
36 /* flag characters for u_scanf */
37 #define FLAG_ASTERISK 0x002A
38 #define FLAG_PAREN 0x0028
39
40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
41 (s) == FLAG_PAREN
42
43 /* special characters for u_scanf */
44 #define SPEC_DOLLARSIGN 0x0024
45
46 /* unicode digits */
47 #define DIGIT_ZERO 0x0030
48 #define DIGIT_ONE 0x0031
49 #define DIGIT_TWO 0x0032
50 #define DIGIT_THREE 0x0033
51 #define DIGIT_FOUR 0x0034
52 #define DIGIT_FIVE 0x0035
53 #define DIGIT_SIX 0x0036
54 #define DIGIT_SEVEN 0x0037
55 #define DIGIT_EIGHT 0x0038
56 #define DIGIT_NINE 0x0039
57
58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
59 (s) == DIGIT_ONE || \
60 (s) == DIGIT_TWO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
64 (s) == DIGIT_SIX || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
67 (s) == DIGIT_NINE
68
69 /* u_scanf modifiers */
70 #define MOD_H 0x0068
71 #define MOD_LOWERL 0x006C
72 #define MOD_L 0x004C
73
74 #define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
76 (s) == MOD_L
77
78 /**
79 * Struct encapsulating a single uscanf format specification.
80 */
81 typedef struct u_scanf_spec_info {
82 int32_t fWidth; /* Width */
83
84 UChar fSpec; /* Format specification */
85
86 UChar fPadChar; /* Padding character */
87
88 UBool fSkipArg; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble; /* L flag */
90 UBool fIsShort; /* h flag */
91 UBool fIsLong; /* l flag */
92 UBool fIsLongLong; /* ll flag */
93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
94 } u_scanf_spec_info;
95
96
97 /**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100 typedef struct u_scanf_spec {
101 u_scanf_spec_info fInfo; /* Information on this spec */
102 int32_t fArgPos; /* Position of data in arg list */
103 } u_scanf_spec;
104
105 /**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112 static int32_t
113 u_scanf_parse_spec (const UChar *fmt,
114 u_scanf_spec *spec)
115 {
116 const UChar *s = fmt;
117 const UChar *backup;
118 u_scanf_spec_info *info = &(spec->fInfo);
119
120 /* initialize spec to default values */
121 spec->fArgPos = -1;
122
123 info->fWidth = -1;
124 info->fSpec = 0x0000;
125 info->fPadChar = 0x0020;
126 info->fSkipArg = FALSE;
127 info->fIsLongDouble = FALSE;
128 info->fIsShort = FALSE;
129 info->fIsLong = FALSE;
130 info->fIsLongLong = FALSE;
131 info->fIsString = TRUE;
132
133
134 /* skip over the initial '%' */
135 s++;
136
137 /* Check for positional argument */
138 if(ISDIGIT(*s)) {
139
140 /* Save the current position */
141 backup = s;
142
143 /* handle positional parameters */
144 if(ISDIGIT(*s)) {
145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147 while(ISDIGIT(*s)) {
148 spec->fArgPos *= 10;
149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150 }
151 }
152
153 /* if there is no '$', don't read anything */
154 if(*s != SPEC_DOLLARSIGN) {
155 spec->fArgPos = -1;
156 s = backup;
157 }
158 /* munge the '$' */
159 else
160 s++;
161 }
162
163 /* Get any format flags */
164 while(ISFLAG(*s)) {
165 switch(*s++) {
166
167 /* skip argument */
168 case FLAG_ASTERISK:
169 info->fSkipArg = TRUE;
170 break;
171
172 /* pad character specified */
173 case FLAG_PAREN:
174
175 /* first four characters are hex values for pad char */
176 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181 /* final character is ignored */
182 s++;
183
184 break;
185 }
186 }
187
188 /* Get the width */
189 if(ISDIGIT(*s)){
190 info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192 while(ISDIGIT(*s)) {
193 info->fWidth *= 10;
194 info->fWidth += (int) (*s++ - DIGIT_ZERO);
195 }
196 }
197
198 /* Get any modifiers */
199 if(ISMOD(*s)) {
200 switch(*s++) {
201
202 /* short */
203 case MOD_H:
204 info->fIsShort = TRUE;
205 break;
206
207 /* long or long long */
208 case MOD_LOWERL:
209 if(*s == MOD_LOWERL) {
210 info->fIsLongLong = TRUE;
211 /* skip over the next 'l' */
212 s++;
213 }
214 else
215 info->fIsLong = TRUE;
216 break;
217
218 /* long double */
219 case MOD_L:
220 info->fIsLongDouble = TRUE;
221 break;
222 }
223 }
224
225 /* finally, get the specifier letter */
226 info->fSpec = *s++;
227
228 /* return # of characters in this specifier */
229 return (int32_t)(s - fmt);
230 }
231
232 #define UP_PERCENT 0x0025
233
234
235 /* ANSI style formatting */
236 /* Use US-ASCII characters only for formatting */
237
238 /* % */
239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240 /* s */
241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
242 /* c */
243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
244 /* d, i */
245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
246 /* u */
247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
248 /* o */
249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
250 /* x, X */
251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
252 /* f */
253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
254 /* e, E */
255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
256 /* g, G */
257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
258 /* n */
259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
260 /* [ */
261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
262
263 /* non-ANSI extensions */
264 /* Use US-ASCII characters only for formatting */
265
266 /* p */
267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
268 /* V */
269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
270 /* P */
271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272 /* C K is old format */
273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274 /* S U is old format */
275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
276
277
278 #define UFMT_EMPTY {ufmt_empty, NULL}
279
280 /**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296 typedef int32_t (*u_scanf_handler) (UFILE *stream,
297 u_scanf_spec_info *info,
298 ufmt_args *args,
299 const UChar *fmt,
300 int32_t *fmtConsumed,
301 int32_t *argConverted);
302
303 typedef struct u_scanf_info {
304 ufmt_type_info info;
305 u_scanf_handler handler;
306 } u_scanf_info;
307
308 #define USCANF_NUM_FMT_HANDLERS 108
309 #define USCANF_SYMBOL_BUFFER_SIZE 8
310
311 /* We do not use handlers for 0-0x1f */
312 #define USCANF_BASE_FMT_HANDLERS 0x20
313
314
315 static int32_t
316 u_scanf_skip_leading_ws(UFILE *input,
317 UChar pad)
318 {
319 UChar c;
320 int32_t count = 0;
321 UBool isNotEOF;
322
323 /* skip all leading ws in the input */
324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
325 {
326 count++;
327 }
328
329 /* put the final character back on the input */
330 if(isNotEOF)
331 u_fungetc(c, input);
332
333 return count;
334 }
335
336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
337 static int32_t
338 u_scanf_skip_leading_positive_sign(UFILE *input,
339 UNumberFormat *format,
340 UErrorCode *status)
341 {
342 UChar c;
343 int32_t count = 0;
344 UBool isNotEOF;
345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
346 int32_t symbolLen;
347 UErrorCode localStatus = U_ZERO_ERROR;
348
349 if (U_SUCCESS(*status)) {
350 symbolLen = unum_getSymbol(format,
351 UNUM_PLUS_SIGN_SYMBOL,
352 plusSymbol,
353 sizeof(plusSymbol)/sizeof(*plusSymbol),
354 &localStatus);
355
356 if (U_SUCCESS(localStatus)) {
357 /* skip all leading ws in the input */
358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
359 {
360 count++;
361 }
362
363 /* put the final character back on the input */
364 if(isNotEOF) {
365 u_fungetc(c, input);
366 }
367 }
368 }
369
370 return count;
371 }
372
373 static int32_t
374 u_scanf_simple_percent_handler(UFILE *input,
375 u_scanf_spec_info *info,
376 ufmt_args *args,
377 const UChar *fmt,
378 int32_t *fmtConsumed,
379 int32_t *argConverted)
380 {
381 /* make sure the next character in the input is a percent */
382 *argConverted = 0;
383 if(u_fgetc(input) != 0x0025) {
384 *argConverted = -1;
385 }
386 return 1;
387 }
388
389 static int32_t
390 u_scanf_count_handler(UFILE *input,
391 u_scanf_spec_info *info,
392 ufmt_args *args,
393 const UChar *fmt,
394 int32_t *fmtConsumed,
395 int32_t *argConverted)
396 {
397 /* in the special case of count, the u_scanf_spec_info's width */
398 /* will contain the # of items converted thus far */
399 if (!info->fSkipArg) {
400 if (info->fIsShort)
401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
402 else if (info->fIsLongLong)
403 *(int64_t*)(args[0].ptrValue) = info->fWidth;
404 else
405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
406 }
407 *argConverted = 0;
408
409 /* we converted 0 args */
410 return 0;
411 }
412
413 static int32_t
414 u_scanf_double_handler(UFILE *input,
415 u_scanf_spec_info *info,
416 ufmt_args *args,
417 const UChar *fmt,
418 int32_t *fmtConsumed,
419 int32_t *argConverted)
420 {
421 int32_t len;
422 double num;
423 UNumberFormat *format;
424 int32_t parsePos = 0;
425 int32_t skipped;
426 UErrorCode status = U_ZERO_ERROR;
427
428
429 /* skip all ws in the input */
430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
431
432 /* fill the input's internal buffer */
433 ufile_fill_uchar_buffer(input);
434
435 /* determine the size of the input's buffer */
436 len = (int32_t)(input->str.fLimit - input->str.fPos);
437
438 /* truncate to the width, if specified */
439 if(info->fWidth != -1)
440 len = ufmt_min(len, info->fWidth);
441
442 /* get the formatter */
443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
444
445 /* handle error */
446 if(format == 0)
447 return 0;
448
449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
451
452 /* parse the number */
453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
454
455 if (!info->fSkipArg) {
456 if (info->fIsLong)
457 *(double*)(args[0].ptrValue) = num;
458 else if (info->fIsLongDouble)
459 *(long double*)(args[0].ptrValue) = num;
460 else
461 *(float*)(args[0].ptrValue) = (float)num;
462 }
463
464 /* mask off any necessary bits */
465 /* if(! info->fIsLong_double)
466 num &= DBL_MAX;*/
467
468 /* update the input's position to reflect consumed data */
469 input->str.fPos += parsePos;
470
471 /* we converted 1 arg */
472 *argConverted = !info->fSkipArg;
473 return parsePos + skipped;
474 }
475
476 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
477
478 static int32_t
479 u_scanf_scientific_handler(UFILE *input,
480 u_scanf_spec_info *info,
481 ufmt_args *args,
482 const UChar *fmt,
483 int32_t *fmtConsumed,
484 int32_t *argConverted)
485 {
486 int32_t len;
487 double num;
488 UNumberFormat *format;
489 int32_t parsePos = 0;
490 int32_t skipped;
491 UErrorCode status = U_ZERO_ERROR;
492 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
493 int32_t srcLen, expLen;
494 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
495
496
497 /* skip all ws in the input */
498 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
499
500 /* fill the input's internal buffer */
501 ufile_fill_uchar_buffer(input);
502
503 /* determine the size of the input's buffer */
504 len = (int32_t)(input->str.fLimit - input->str.fPos);
505
506 /* truncate to the width, if specified */
507 if(info->fWidth != -1)
508 len = ufmt_min(len, info->fWidth);
509
510 /* get the formatter */
511 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
512
513 /* handle error */
514 if(format == 0)
515 return 0;
516
517 /* set the appropriate flags on the formatter */
518
519 srcLen = unum_getSymbol(format,
520 UNUM_EXPONENTIAL_SYMBOL,
521 srcExpBuf,
522 sizeof(srcExpBuf),
523 &status);
524
525 /* Upper/lower case the e */
526 if (info->fSpec == (UChar)0x65 /* e */) {
527 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
528 srcExpBuf, srcLen,
529 input->str.fBundle.fLocale,
530 &status);
531 }
532 else {
533 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
534 srcExpBuf, srcLen,
535 input->str.fBundle.fLocale,
536 &status);
537 }
538
539 unum_setSymbol(format,
540 UNUM_EXPONENTIAL_SYMBOL,
541 expBuf,
542 expLen,
543 &status);
544
545
546
547
548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
549 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
550
551 /* parse the number */
552 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
553
554 if (!info->fSkipArg) {
555 if (info->fIsLong)
556 *(double*)(args[0].ptrValue) = num;
557 else if (info->fIsLongDouble)
558 *(long double*)(args[0].ptrValue) = num;
559 else
560 *(float*)(args[0].ptrValue) = (float)num;
561 }
562
563 /* mask off any necessary bits */
564 /* if(! info->fIsLong_double)
565 num &= DBL_MAX;*/
566
567 /* update the input's position to reflect consumed data */
568 input->str.fPos += parsePos;
569
570 /* we converted 1 arg */
571 *argConverted = !info->fSkipArg;
572 return parsePos + skipped;
573 }
574
575 static int32_t
576 u_scanf_scidbl_handler(UFILE *input,
577 u_scanf_spec_info *info,
578 ufmt_args *args,
579 const UChar *fmt,
580 int32_t *fmtConsumed,
581 int32_t *argConverted)
582 {
583 int32_t len;
584 double num;
585 UNumberFormat *scientificFormat, *genericFormat;
586 /*int32_t scientificResult, genericResult;*/
587 double scientificResult, genericResult;
588 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
589 int32_t skipped;
590 UErrorCode scientificStatus = U_ZERO_ERROR;
591 UErrorCode genericStatus = U_ZERO_ERROR;
592
593
594 /* since we can't determine by scanning the characters whether */
595 /* a number was formatted in the 'f' or 'g' styles, parse the */
596 /* string with both formatters, and assume whichever one */
597 /* parsed the most is the correct formatter to use */
598
599
600 /* skip all ws in the input */
601 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
602
603 /* fill the input's internal buffer */
604 ufile_fill_uchar_buffer(input);
605
606 /* determine the size of the input's buffer */
607 len = (int32_t)(input->str.fLimit - input->str.fPos);
608
609 /* truncate to the width, if specified */
610 if(info->fWidth != -1)
611 len = ufmt_min(len, info->fWidth);
612
613 /* get the formatters */
614 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
615 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
616
617 /* handle error */
618 if(scientificFormat == 0 || genericFormat == 0)
619 return 0;
620
621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
622 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
623
624 /* parse the number using each format*/
625
626 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
627 &scientificParsePos, &scientificStatus);
628
629 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
630 &genericParsePos, &genericStatus);
631
632 /* determine which parse made it farther */
633 if(scientificParsePos > genericParsePos) {
634 /* stash the result in num */
635 num = scientificResult;
636 /* update the input's position to reflect consumed data */
637 parsePos += scientificParsePos;
638 }
639 else {
640 /* stash the result in num */
641 num = genericResult;
642 /* update the input's position to reflect consumed data */
643 parsePos += genericParsePos;
644 }
645 input->str.fPos += parsePos;
646
647 if (!info->fSkipArg) {
648 if (info->fIsLong)
649 *(double*)(args[0].ptrValue) = num;
650 else if (info->fIsLongDouble)
651 *(long double*)(args[0].ptrValue) = num;
652 else
653 *(float*)(args[0].ptrValue) = (float)num;
654 }
655
656 /* mask off any necessary bits */
657 /* if(! info->fIsLong_double)
658 num &= DBL_MAX;*/
659
660 /* we converted 1 arg */
661 *argConverted = !info->fSkipArg;
662 return parsePos + skipped;
663 }
664
665 static int32_t
666 u_scanf_integer_handler(UFILE *input,
667 u_scanf_spec_info *info,
668 ufmt_args *args,
669 const UChar *fmt,
670 int32_t *fmtConsumed,
671 int32_t *argConverted)
672 {
673 int32_t len;
674 void *num = (void*) (args[0].ptrValue);
675 UNumberFormat *format;
676 int32_t parsePos = 0;
677 int32_t skipped;
678 UErrorCode status = U_ZERO_ERROR;
679 int64_t result;
680
681
682 /* skip all ws in the input */
683 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
684
685 /* fill the input's internal buffer */
686 ufile_fill_uchar_buffer(input);
687
688 /* determine the size of the input's buffer */
689 len = (int32_t)(input->str.fLimit - input->str.fPos);
690
691 /* truncate to the width, if specified */
692 if(info->fWidth != -1)
693 len = ufmt_min(len, info->fWidth);
694
695 /* get the formatter */
696 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
697
698 /* handle error */
699 if(format == 0)
700 return 0;
701
702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
703 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
704
705 /* parse the number */
706 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
707
708 /* mask off any necessary bits */
709 if (!info->fSkipArg) {
710 if (info->fIsShort)
711 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
712 else if (info->fIsLongLong)
713 *(int64_t*)num = result;
714 else
715 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
716 }
717
718 /* update the input's position to reflect consumed data */
719 input->str.fPos += parsePos;
720
721 /* we converted 1 arg */
722 *argConverted = !info->fSkipArg;
723 return parsePos + skipped;
724 }
725
726 static int32_t
727 u_scanf_uinteger_handler(UFILE *input,
728 u_scanf_spec_info *info,
729 ufmt_args *args,
730 const UChar *fmt,
731 int32_t *fmtConsumed,
732 int32_t *argConverted)
733 {
734 /* TODO Fix this when Numberformat handles uint64_t */
735 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
736 }
737
738 static int32_t
739 u_scanf_percent_handler(UFILE *input,
740 u_scanf_spec_info *info,
741 ufmt_args *args,
742 const UChar *fmt,
743 int32_t *fmtConsumed,
744 int32_t *argConverted)
745 {
746 int32_t len;
747 double num;
748 UNumberFormat *format;
749 int32_t parsePos = 0;
750 int32_t skipped;
751 UErrorCode status = U_ZERO_ERROR;
752
753
754 /* skip all ws in the input */
755 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
756
757 /* fill the input's internal buffer */
758 ufile_fill_uchar_buffer(input);
759
760 /* determine the size of the input's buffer */
761 len = (int32_t)(input->str.fLimit - input->str.fPos);
762
763 /* truncate to the width, if specified */
764 if(info->fWidth != -1)
765 len = ufmt_min(len, info->fWidth);
766
767 /* get the formatter */
768 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
769
770 /* handle error */
771 if(format == 0)
772 return 0;
773
774 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
775 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
776
777 /* parse the number */
778 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
779
780 if (!info->fSkipArg) {
781 *(double*)(args[0].ptrValue) = num;
782 }
783
784 /* mask off any necessary bits */
785 /* if(! info->fIsLong_double)
786 num &= DBL_MAX;*/
787
788 /* update the input's position to reflect consumed data */
789 input->str.fPos += parsePos;
790
791 /* we converted 1 arg */
792 *argConverted = !info->fSkipArg;
793 return parsePos;
794 }
795
796 static int32_t
797 u_scanf_string_handler(UFILE *input,
798 u_scanf_spec_info *info,
799 ufmt_args *args,
800 const UChar *fmt,
801 int32_t *fmtConsumed,
802 int32_t *argConverted)
803 {
804 const UChar *source;
805 UConverter *conv;
806 char *arg = (char*)(args[0].ptrValue);
807 char *alias = arg;
808 char *limit;
809 UErrorCode status = U_ZERO_ERROR;
810 int32_t count;
811 int32_t skipped = 0;
812 UChar c;
813 UBool isNotEOF = FALSE;
814
815 /* skip all ws in the input */
816 if (info->fIsString) {
817 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
818 }
819
820 /* get the string one character at a time, truncating to the width */
821 count = 0;
822
823 /* open the default converter */
824 conv = u_getDefaultConverter(&status);
825
826 if(U_FAILURE(status))
827 return -1;
828
829 while( (info->fWidth == -1 || count < info->fWidth)
830 && (isNotEOF = ufile_getch(input, &c))
831 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
832 {
833
834 if (!info->fSkipArg) {
835 /* put the character from the input onto the target */
836 source = &c;
837 /* Since we do this one character at a time, do it this way. */
838 if (info->fWidth > 0) {
839 limit = alias + info->fWidth - count;
840 }
841 else {
842 limit = alias + ucnv_getMaxCharSize(conv);
843 }
844
845 /* convert the character to the default codepage */
846 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
847 NULL, TRUE, &status);
848
849 if(U_FAILURE(status)) {
850 /* clean up */
851 u_releaseDefaultConverter(conv);
852 return -1;
853 }
854 }
855
856 /* increment the count */
857 ++count;
858 }
859
860 /* put the final character we read back on the input */
861 if (!info->fSkipArg) {
862 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
863 u_fungetc(c, input);
864
865 /* add the terminator */
866 if (info->fIsString) {
867 *alias = 0x00;
868 }
869 }
870
871 /* clean up */
872 u_releaseDefaultConverter(conv);
873
874 /* we converted 1 arg */
875 *argConverted = !info->fSkipArg;
876 return count + skipped;
877 }
878
879 static int32_t
880 u_scanf_char_handler(UFILE *input,
881 u_scanf_spec_info *info,
882 ufmt_args *args,
883 const UChar *fmt,
884 int32_t *fmtConsumed,
885 int32_t *argConverted)
886 {
887 if (info->fWidth < 0) {
888 info->fWidth = 1;
889 }
890 info->fIsString = FALSE;
891 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
892 }
893
894 static int32_t
895 u_scanf_ustring_handler(UFILE *input,
896 u_scanf_spec_info *info,
897 ufmt_args *args,
898 const UChar *fmt,
899 int32_t *fmtConsumed,
900 int32_t *argConverted)
901 {
902 UChar *arg = (UChar*)(args[0].ptrValue);
903 UChar *alias = arg;
904 int32_t count;
905 int32_t skipped = 0;
906 UChar c;
907 UBool isNotEOF = FALSE;
908
909 /* skip all ws in the input */
910 if (info->fIsString) {
911 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
912 }
913
914 /* get the string one character at a time, truncating to the width */
915 count = 0;
916
917 while( (info->fWidth == -1 || count < info->fWidth)
918 && (isNotEOF = ufile_getch(input, &c))
919 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
920 {
921
922 /* put the character from the input onto the target */
923 if (!info->fSkipArg) {
924 *alias++ = c;
925 }
926
927 /* increment the count */
928 ++count;
929 }
930
931 /* put the final character we read back on the input */
932 if (!info->fSkipArg) {
933 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
934 u_fungetc(c, input);
935 }
936
937 /* add the terminator */
938 if (info->fIsString) {
939 *alias = 0x0000;
940 }
941 }
942
943 /* we converted 1 arg */
944 *argConverted = !info->fSkipArg;
945 return count + skipped;
946 }
947
948 static int32_t
949 u_scanf_uchar_handler(UFILE *input,
950 u_scanf_spec_info *info,
951 ufmt_args *args,
952 const UChar *fmt,
953 int32_t *fmtConsumed,
954 int32_t *argConverted)
955 {
956 if (info->fWidth < 0) {
957 info->fWidth = 1;
958 }
959 info->fIsString = FALSE;
960 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
961 }
962
963 static int32_t
964 u_scanf_spellout_handler(UFILE *input,
965 u_scanf_spec_info *info,
966 ufmt_args *args,
967 const UChar *fmt,
968 int32_t *fmtConsumed,
969 int32_t *argConverted)
970 {
971 int32_t len;
972 double num;
973 UNumberFormat *format;
974 int32_t parsePos = 0;
975 int32_t skipped;
976 UErrorCode status = U_ZERO_ERROR;
977
978
979 /* skip all ws in the input */
980 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
981
982 /* fill the input's internal buffer */
983 ufile_fill_uchar_buffer(input);
984
985 /* determine the size of the input's buffer */
986 len = (int32_t)(input->str.fLimit - input->str.fPos);
987
988 /* truncate to the width, if specified */
989 if(info->fWidth != -1)
990 len = ufmt_min(len, info->fWidth);
991
992 /* get the formatter */
993 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
994
995 /* handle error */
996 if(format == 0)
997 return 0;
998
999 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1000 /* This is not applicable to RBNF. */
1001 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1002
1003 /* parse the number */
1004 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1005
1006 if (!info->fSkipArg) {
1007 *(double*)(args[0].ptrValue) = num;
1008 }
1009
1010 /* mask off any necessary bits */
1011 /* if(! info->fIsLong_double)
1012 num &= DBL_MAX;*/
1013
1014 /* update the input's position to reflect consumed data */
1015 input->str.fPos += parsePos;
1016
1017 /* we converted 1 arg */
1018 *argConverted = !info->fSkipArg;
1019 return parsePos + skipped;
1020 }
1021
1022 static int32_t
1023 u_scanf_hex_handler(UFILE *input,
1024 u_scanf_spec_info *info,
1025 ufmt_args *args,
1026 const UChar *fmt,
1027 int32_t *fmtConsumed,
1028 int32_t *argConverted)
1029 {
1030 int32_t len;
1031 int32_t skipped;
1032 void *num = (void*) (args[0].ptrValue);
1033 int64_t result;
1034
1035 /* skip all ws in the input */
1036 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1037
1038 /* fill the input's internal buffer */
1039 ufile_fill_uchar_buffer(input);
1040
1041 /* determine the size of the input's buffer */
1042 len = (int32_t)(input->str.fLimit - input->str.fPos);
1043
1044 /* truncate to the width, if specified */
1045 if(info->fWidth != -1)
1046 len = ufmt_min(len, info->fWidth);
1047
1048 /* check for alternate form */
1049 if( *(input->str.fPos) == 0x0030 &&
1050 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1051
1052 /* skip the '0' and 'x' or 'X' if present */
1053 input->str.fPos += 2;
1054 len -= 2;
1055 }
1056
1057 /* parse the number */
1058 result = ufmt_uto64(input->str.fPos, &len, 16);
1059
1060 /* update the input's position to reflect consumed data */
1061 input->str.fPos += len;
1062
1063 /* mask off any necessary bits */
1064 if (!info->fSkipArg) {
1065 if (info->fIsShort)
1066 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1067 else if (info->fIsLongLong)
1068 *(int64_t*)num = result;
1069 else
1070 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1071 }
1072
1073 /* we converted 1 arg */
1074 *argConverted = !info->fSkipArg;
1075 return len + skipped;
1076 }
1077
1078 static int32_t
1079 u_scanf_octal_handler(UFILE *input,
1080 u_scanf_spec_info *info,
1081 ufmt_args *args,
1082 const UChar *fmt,
1083 int32_t *fmtConsumed,
1084 int32_t *argConverted)
1085 {
1086 int32_t len;
1087 int32_t skipped;
1088 void *num = (void*) (args[0].ptrValue);
1089 int64_t result;
1090
1091 /* skip all ws in the input */
1092 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1093
1094 /* fill the input's internal buffer */
1095 ufile_fill_uchar_buffer(input);
1096
1097 /* determine the size of the input's buffer */
1098 len = (int32_t)(input->str.fLimit - input->str.fPos);
1099
1100 /* truncate to the width, if specified */
1101 if(info->fWidth != -1)
1102 len = ufmt_min(len, info->fWidth);
1103
1104 /* parse the number */
1105 result = ufmt_uto64(input->str.fPos, &len, 8);
1106
1107 /* update the input's position to reflect consumed data */
1108 input->str.fPos += len;
1109
1110 /* mask off any necessary bits */
1111 if (!info->fSkipArg) {
1112 if (info->fIsShort)
1113 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1114 else if (info->fIsLongLong)
1115 *(int64_t*)num = result;
1116 else
1117 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1118 }
1119
1120 /* we converted 1 arg */
1121 *argConverted = !info->fSkipArg;
1122 return len + skipped;
1123 }
1124
1125 static int32_t
1126 u_scanf_pointer_handler(UFILE *input,
1127 u_scanf_spec_info *info,
1128 ufmt_args *args,
1129 const UChar *fmt,
1130 int32_t *fmtConsumed,
1131 int32_t *argConverted)
1132 {
1133 int32_t len;
1134 int32_t skipped;
1135 void *result;
1136 void **p = (void**)(args[0].ptrValue);
1137
1138
1139 /* skip all ws in the input */
1140 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1141
1142 /* fill the input's internal buffer */
1143 ufile_fill_uchar_buffer(input);
1144
1145 /* determine the size of the input's buffer */
1146 len = (int32_t)(input->str.fLimit - input->str.fPos);
1147
1148 /* truncate to the width, if specified */
1149 if(info->fWidth != -1) {
1150 len = ufmt_min(len, info->fWidth);
1151 }
1152
1153 /* Make sure that we don't consume too much */
1154 if (len > (int32_t)(sizeof(void*)*2)) {
1155 len = (int32_t)(sizeof(void*)*2);
1156 }
1157
1158 /* parse the pointer - assign to temporary value */
1159 result = ufmt_utop(input->str.fPos, &len);
1160
1161 if (!info->fSkipArg) {
1162 *p = result;
1163 }
1164
1165 /* update the input's position to reflect consumed data */
1166 input->str.fPos += len;
1167
1168 /* we converted 1 arg */
1169 *argConverted = !info->fSkipArg;
1170 return len + skipped;
1171 }
1172
1173 static int32_t
1174 u_scanf_scanset_handler(UFILE *input,
1175 u_scanf_spec_info *info,
1176 ufmt_args *args,
1177 const UChar *fmt,
1178 int32_t *fmtConsumed,
1179 int32_t *argConverted)
1180 {
1181 USet *scanset;
1182 UErrorCode status = U_ZERO_ERROR;
1183 int32_t chLeft = INT32_MAX;
1184 UChar32 c;
1185 UChar *alias = (UChar*) (args[0].ptrValue);
1186 UBool isNotEOF = FALSE;
1187 UBool readCharacter = FALSE;
1188
1189 /* Create an empty set */
1190 scanset = uset_open(0, -1);
1191
1192 /* Back up one to get the [ */
1193 fmt--;
1194
1195 /* truncate to the width, if specified and alias the target */
1196 if(info->fWidth >= 0) {
1197 chLeft = info->fWidth;
1198 }
1199
1200 /* parse the scanset from the fmt string */
1201 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1202
1203 /* verify that the parse was successful */
1204 if (U_SUCCESS(status)) {
1205 c=0;
1206
1207 /* grab characters one at a time and make sure they are in the scanset */
1208 while(chLeft > 0) {
1209 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1210 readCharacter = TRUE;
1211 if (!info->fSkipArg) {
1212 int32_t idx = 0;
1213 UBool isError = FALSE;
1214
1215 U16_APPEND(alias, idx, chLeft, c, isError);
1216 if (isError) {
1217 break;
1218 }
1219 alias += idx;
1220 }
1221 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1222 }
1223 else {
1224 /* if the character's not in the scanset, break out */
1225 break;
1226 }
1227 }
1228
1229 /* put the final character we read back on the input */
1230 if(isNotEOF && chLeft > 0) {
1231 u_fungetc(c, input);
1232 }
1233 }
1234
1235 uset_close(scanset);
1236
1237 /* if we didn't match at least 1 character, fail */
1238 if(!readCharacter)
1239 return -1;
1240 /* otherwise, add the terminator */
1241 else if (!info->fSkipArg) {
1242 *alias = 0x00;
1243 }
1244
1245 /* we converted 1 arg */
1246 *argConverted = !info->fSkipArg;
1247 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1248 }
1249
1250 /* Use US-ASCII characters only for formatting. Most codepages have
1251 characters 20-7F from Unicode. Using any other codepage specific
1252 characters will make it very difficult to format the string on
1253 non-Unicode machines */
1254 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1255 /* 0x20 */
1256 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1257 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
1258 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1259 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1260
1261 /* 0x30 */
1262 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1263 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1264 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1265 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1266
1267 /* 0x40 */
1268 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
1269 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
1270 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1271 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
1272 #else
1273 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1274 #endif
1275 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1276
1277 /* 0x50 */
1278 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
1279 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1280 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
1281 #else
1282 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
1283 #endif
1284 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
1285 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1286
1287 /* 0x60 */
1288 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
1289 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
1290 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
1291 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
1292
1293 /* 0x70 */
1294 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
1295 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
1296 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1297 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1298 };
1299
1300 U_CFUNC int32_t
1301 u_scanf_parse(UFILE *f,
1302 const UChar *patternSpecification,
1303 va_list ap)
1304 {
1305 const UChar *alias;
1306 int32_t count, converted, argConsumed, cpConsumed;
1307 uint16_t handlerNum;
1308
1309 ufmt_args args;
1310 u_scanf_spec spec;
1311 ufmt_type_info info;
1312 u_scanf_handler handler;
1313
1314 /* alias the pattern */
1315 alias = patternSpecification;
1316
1317 /* haven't converted anything yet */
1318 argConsumed = 0;
1319 converted = 0;
1320 cpConsumed = 0;
1321
1322 /* iterate through the pattern */
1323 for(;;) {
1324
1325 /* match any characters up to the next '%' */
1326 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1327 alias++;
1328 }
1329
1330 /* if we aren't at a '%', or if we're at end of string, break*/
1331 if(*alias != UP_PERCENT || *alias == 0x0000)
1332 break;
1333
1334 /* parse the specifier */
1335 count = u_scanf_parse_spec(alias, &spec);
1336
1337 /* update the pointer in pattern */
1338 alias += count;
1339
1340 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1341 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1342 /* skip the argument, if necessary */
1343 /* query the info function for argument information */
1344 info = g_u_scanf_infos[ handlerNum ].info;
1345 if (info != ufmt_count && u_feof(f)) {
1346 break;
1347 }
1348 else if(spec.fInfo.fSkipArg) {
1349 args.ptrValue = NULL;
1350 }
1351 else {
1352 switch(info) {
1353 case ufmt_count:
1354 /* set the spec's width to the # of items converted */
1355 spec.fInfo.fWidth = cpConsumed;
1356 /* fall through to next case */
1357 case ufmt_char:
1358 case ufmt_uchar:
1359 case ufmt_int:
1360 case ufmt_string:
1361 case ufmt_ustring:
1362 case ufmt_pointer:
1363 case ufmt_float:
1364 case ufmt_double:
1365 args.ptrValue = va_arg(ap, void*);
1366 break;
1367
1368 default:
1369 /* else args is ignored */
1370 args.ptrValue = NULL;
1371 break;
1372 }
1373 }
1374
1375 /* call the handler function */
1376 handler = g_u_scanf_infos[ handlerNum ].handler;
1377 if(handler != 0) {
1378
1379 /* reset count to 1 so that += for alias works. */
1380 count = 1;
1381
1382 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1383
1384 /* if the handler encountered an error condition, break */
1385 if(argConsumed < 0) {
1386 converted = -1;
1387 break;
1388 }
1389
1390 /* add to the # of items converted */
1391 converted += argConsumed;
1392
1393 /* update the pointer in pattern */
1394 alias += count-1;
1395 }
1396 /* else do nothing */
1397 }
1398 /* else do nothing */
1399
1400 /* just ignore unknown tags */
1401 }
1402
1403 /* return # of items converted */
1404 return converted;
1405 }
1406
1407 #endif /* #if !UCONFIG_NO_FORMATTING */