]> git.saurik.com Git - apple/icu.git/blob - icuSources/io/uscanf_p.c
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / io / uscanf_p.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File uscnnf_p.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 12/02/98 stephen Creation.
15 * 03/13/99 stephen Modified for new C API.
16 *******************************************************************************
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_FORMATTING
22
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unum.h"
26 #include "unicode/udat.h"
27 #include "unicode/uset.h"
28 #include "uscanf.h"
29 #include "ufmt_cmn.h"
30 #include "ufile.h"
31 #include "locbund.h"
32
33 #include "cmemory.h"
34 #include "ustr_cnv.h"
35
36 /* flag characters for u_scanf */
37 #define FLAG_ASTERISK 0x002A
38 #define FLAG_PAREN 0x0028
39
40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
41 (s) == FLAG_PAREN
42
43 /* special characters for u_scanf */
44 #define SPEC_DOLLARSIGN 0x0024
45
46 /* unicode digits */
47 #define DIGIT_ZERO 0x0030
48 #define DIGIT_ONE 0x0031
49 #define DIGIT_TWO 0x0032
50 #define DIGIT_THREE 0x0033
51 #define DIGIT_FOUR 0x0034
52 #define DIGIT_FIVE 0x0035
53 #define DIGIT_SIX 0x0036
54 #define DIGIT_SEVEN 0x0037
55 #define DIGIT_EIGHT 0x0038
56 #define DIGIT_NINE 0x0039
57
58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
59 (s) == DIGIT_ONE || \
60 (s) == DIGIT_TWO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
64 (s) == DIGIT_SIX || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
67 (s) == DIGIT_NINE
68
69 /* u_scanf modifiers */
70 #define MOD_H 0x0068
71 #define MOD_LOWERL 0x006C
72 #define MOD_L 0x004C
73
74 #define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
76 (s) == MOD_L
77
78 /**
79 * Struct encapsulating a single uscanf format specification.
80 */
81 typedef struct u_scanf_spec_info {
82 int32_t fWidth; /* Width */
83
84 UChar fSpec; /* Format specification */
85
86 UChar fPadChar; /* Padding character */
87
88 UBool fSkipArg; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble; /* L flag */
90 UBool fIsShort; /* h flag */
91 UBool fIsLong; /* l flag */
92 UBool fIsLongLong; /* ll flag */
93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
94 } u_scanf_spec_info;
95
96
97 /**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100 typedef struct u_scanf_spec {
101 u_scanf_spec_info fInfo; /* Information on this spec */
102 int32_t fArgPos; /* Position of data in arg list */
103 } u_scanf_spec;
104
105 /**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112 static int32_t
113 u_scanf_parse_spec (const UChar *fmt,
114 u_scanf_spec *spec)
115 {
116 const UChar *s = fmt;
117 const UChar *backup;
118 u_scanf_spec_info *info = &(spec->fInfo);
119
120 /* initialize spec to default values */
121 spec->fArgPos = -1;
122
123 info->fWidth = -1;
124 info->fSpec = 0x0000;
125 info->fPadChar = 0x0020;
126 info->fSkipArg = FALSE;
127 info->fIsLongDouble = FALSE;
128 info->fIsShort = FALSE;
129 info->fIsLong = FALSE;
130 info->fIsLongLong = FALSE;
131 info->fIsString = TRUE;
132
133
134 /* skip over the initial '%' */
135 s++;
136
137 /* Check for positional argument */
138 if(ISDIGIT(*s)) {
139
140 /* Save the current position */
141 backup = s;
142
143 /* handle positional parameters */
144 if(ISDIGIT(*s)) {
145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147 while(ISDIGIT(*s)) {
148 spec->fArgPos *= 10;
149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150 }
151 }
152
153 /* if there is no '$', don't read anything */
154 if(*s != SPEC_DOLLARSIGN) {
155 spec->fArgPos = -1;
156 s = backup;
157 }
158 /* munge the '$' */
159 else
160 s++;
161 }
162
163 /* Get any format flags */
164 while(ISFLAG(*s)) {
165 switch(*s++) {
166
167 /* skip argument */
168 case FLAG_ASTERISK:
169 info->fSkipArg = TRUE;
170 break;
171
172 /* pad character specified */
173 case FLAG_PAREN:
174
175 /* first four characters are hex values for pad char */
176 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181 /* final character is ignored */
182 s++;
183
184 break;
185 }
186 }
187
188 /* Get the width */
189 if(ISDIGIT(*s)){
190 info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192 while(ISDIGIT(*s)) {
193 info->fWidth *= 10;
194 info->fWidth += (int) (*s++ - DIGIT_ZERO);
195 }
196 }
197
198 /* Get any modifiers */
199 if(ISMOD(*s)) {
200 switch(*s++) {
201
202 /* short */
203 case MOD_H:
204 info->fIsShort = TRUE;
205 break;
206
207 /* long or long long */
208 case MOD_LOWERL:
209 if(*s == MOD_LOWERL) {
210 info->fIsLongLong = TRUE;
211 /* skip over the next 'l' */
212 s++;
213 }
214 else
215 info->fIsLong = TRUE;
216 break;
217
218 /* long double */
219 case MOD_L:
220 info->fIsLongDouble = TRUE;
221 break;
222 }
223 }
224
225 /* finally, get the specifier letter */
226 info->fSpec = *s++;
227
228 /* return # of characters in this specifier */
229 return (int32_t)(s - fmt);
230 }
231
232 #define UP_PERCENT 0x0025
233
234
235 /* ANSI style formatting */
236 /* Use US-ASCII characters only for formatting */
237
238 /* % */
239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240 /* s */
241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
242 /* c */
243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
244 /* d, i */
245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
246 /* u */
247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
248 /* o */
249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
250 /* x, X */
251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
252 /* f */
253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
254 /* e, E */
255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
256 /* g, G */
257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
258 /* n */
259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
260 /* [ */
261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
262
263 /* non-ANSI extensions */
264 /* Use US-ASCII characters only for formatting */
265
266 /* p */
267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
268 /* V */
269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
270 /* P */
271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272 /* C K is old format */
273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274 /* S U is old format */
275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
276
277
278 #define UFMT_EMPTY {ufmt_empty, NULL}
279
280 /**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296 typedef int32_t (*u_scanf_handler) (UFILE *stream,
297 u_scanf_spec_info *info,
298 ufmt_args *args,
299 const UChar *fmt,
300 int32_t *fmtConsumed,
301 int32_t *argConverted);
302
303 typedef struct u_scanf_info {
304 ufmt_type_info info;
305 u_scanf_handler handler;
306 } u_scanf_info;
307
308 #define USCANF_NUM_FMT_HANDLERS 108
309
310 /* We do not use handlers for 0-0x1f */
311 #define USCANF_BASE_FMT_HANDLERS 0x20
312
313
314 static int32_t
315 u_scanf_skip_leading_ws(UFILE *input,
316 UChar pad)
317 {
318 UChar c;
319 int32_t count = 0;
320 UBool isNotEOF;
321
322 /* skip all leading ws in the input */
323 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
324 {
325 count++;
326 }
327
328 /* put the final character back on the input */
329 if(isNotEOF)
330 u_fungetc(c, input);
331
332 return count;
333 }
334
335 static int32_t
336 u_scanf_simple_percent_handler(UFILE *input,
337 u_scanf_spec_info *info,
338 ufmt_args *args,
339 const UChar *fmt,
340 int32_t *fmtConsumed,
341 int32_t *argConverted)
342 {
343 /* make sure the next character in the input is a percent */
344 *argConverted = 0;
345 if(u_fgetc(input) != 0x0025) {
346 *argConverted = -1;
347 }
348 return 1;
349 }
350
351 static int32_t
352 u_scanf_count_handler(UFILE *input,
353 u_scanf_spec_info *info,
354 ufmt_args *args,
355 const UChar *fmt,
356 int32_t *fmtConsumed,
357 int32_t *argConverted)
358 {
359 /* in the special case of count, the u_scanf_spec_info's width */
360 /* will contain the # of items converted thus far */
361 if (!info->fSkipArg) {
362 if (info->fIsShort)
363 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
364 else if (info->fIsLongLong)
365 *(int64_t*)(args[0].ptrValue) = info->fWidth;
366 else
367 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
368 }
369
370 /* we converted 0 args */
371 return 0;
372 }
373
374 static int32_t
375 u_scanf_double_handler(UFILE *input,
376 u_scanf_spec_info *info,
377 ufmt_args *args,
378 const UChar *fmt,
379 int32_t *fmtConsumed,
380 int32_t *argConverted)
381 {
382 int32_t len;
383 double num;
384 UNumberFormat *format;
385 int32_t parsePos = 0;
386 UErrorCode status = U_ZERO_ERROR;
387
388
389 /* skip all ws in the input */
390 u_scanf_skip_leading_ws(input, info->fPadChar);
391
392 /* fill the input's internal buffer */
393 ufile_fill_uchar_buffer(input);
394
395 /* determine the size of the input's buffer */
396 len = input->str.fLimit - input->str.fPos;
397
398 /* truncate to the width, if specified */
399 if(info->fWidth != -1)
400 len = ufmt_min(len, info->fWidth);
401
402 /* get the formatter */
403 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
404
405 /* handle error */
406 if(format == 0)
407 return 0;
408
409 /* parse the number */
410 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
411
412 if (!info->fSkipArg) {
413 *(double*)(args[0].ptrValue) = num;
414 }
415
416 /* mask off any necessary bits */
417 /* if(! info->fIsLong_double)
418 num &= DBL_MAX;*/
419
420 /* update the input's position to reflect consumed data */
421 input->str.fPos += parsePos;
422
423 /* we converted 1 arg */
424 *argConverted = !info->fSkipArg;
425 return parsePos;
426 }
427
428 static int32_t
429 u_scanf_scientific_handler(UFILE *input,
430 u_scanf_spec_info *info,
431 ufmt_args *args,
432 const UChar *fmt,
433 int32_t *fmtConsumed,
434 int32_t *argConverted)
435 {
436 int32_t len;
437 double num;
438 UNumberFormat *format;
439 int32_t parsePos = 0;
440 UErrorCode status = U_ZERO_ERROR;
441
442
443 /* skip all ws in the input */
444 u_scanf_skip_leading_ws(input, info->fPadChar);
445
446 /* fill the input's internal buffer */
447 ufile_fill_uchar_buffer(input);
448
449 /* determine the size of the input's buffer */
450 len = input->str.fLimit - input->str.fPos;
451
452 /* truncate to the width, if specified */
453 if(info->fWidth != -1)
454 len = ufmt_min(len, info->fWidth);
455
456 /* get the formatter */
457 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
458
459 /* handle error */
460 if(format == 0)
461 return 0;
462
463 /* parse the number */
464 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
465
466 if (!info->fSkipArg) {
467 *(double*)(args[0].ptrValue) = num;
468 }
469
470 /* mask off any necessary bits */
471 /* if(! info->fIsLong_double)
472 num &= DBL_MAX;*/
473
474 /* update the input's position to reflect consumed data */
475 input->str.fPos += parsePos;
476
477 /* we converted 1 arg */
478 *argConverted = !info->fSkipArg;
479 return parsePos;
480 }
481
482 static int32_t
483 u_scanf_scidbl_handler(UFILE *input,
484 u_scanf_spec_info *info,
485 ufmt_args *args,
486 const UChar *fmt,
487 int32_t *fmtConsumed,
488 int32_t *argConverted)
489 {
490 int32_t len;
491 double num;
492 UNumberFormat *scientificFormat, *genericFormat;
493 /*int32_t scientificResult, genericResult;*/
494 double scientificResult, genericResult;
495 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
496 UErrorCode scientificStatus = U_ZERO_ERROR;
497 UErrorCode genericStatus = U_ZERO_ERROR;
498
499
500 /* since we can't determine by scanning the characters whether */
501 /* a number was formatted in the 'f' or 'g' styles, parse the */
502 /* string with both formatters, and assume whichever one */
503 /* parsed the most is the correct formatter to use */
504
505
506 /* skip all ws in the input */
507 u_scanf_skip_leading_ws(input, info->fPadChar);
508
509 /* fill the input's internal buffer */
510 ufile_fill_uchar_buffer(input);
511
512 /* determine the size of the input's buffer */
513 len = input->str.fLimit - input->str.fPos;
514
515 /* truncate to the width, if specified */
516 if(info->fWidth != -1)
517 len = ufmt_min(len, info->fWidth);
518
519 /* get the formatters */
520 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
521 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
522
523 /* handle error */
524 if(scientificFormat == 0 || genericFormat == 0)
525 return 0;
526
527 /* parse the number using each format*/
528
529 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
530 &scientificParsePos, &scientificStatus);
531
532 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
533 &genericParsePos, &genericStatus);
534
535 /* determine which parse made it farther */
536 if(scientificParsePos > genericParsePos) {
537 /* stash the result in num */
538 num = scientificResult;
539 /* update the input's position to reflect consumed data */
540 parsePos += scientificParsePos;
541 }
542 else {
543 /* stash the result in num */
544 num = genericResult;
545 /* update the input's position to reflect consumed data */
546 parsePos += genericParsePos;
547 }
548 input->str.fPos += parsePos;
549
550 if (!info->fSkipArg) {
551 *(double*)(args[0].ptrValue) = num;
552 }
553
554 /* mask off any necessary bits */
555 /* if(! info->fIsLong_double)
556 num &= DBL_MAX;*/
557
558 /* we converted 1 arg */
559 *argConverted = !info->fSkipArg;
560 return parsePos;
561 }
562
563 static int32_t
564 u_scanf_integer_handler(UFILE *input,
565 u_scanf_spec_info *info,
566 ufmt_args *args,
567 const UChar *fmt,
568 int32_t *fmtConsumed,
569 int32_t *argConverted)
570 {
571 int32_t len;
572 void *num = (void*) (args[0].ptrValue);
573 UNumberFormat *format;
574 int32_t parsePos = 0;
575 UErrorCode status = U_ZERO_ERROR;
576 int64_t result;
577
578
579 /* skip all ws in the input */
580 u_scanf_skip_leading_ws(input, info->fPadChar);
581
582 /* fill the input's internal buffer */
583 ufile_fill_uchar_buffer(input);
584
585 /* determine the size of the input's buffer */
586 len = input->str.fLimit - input->str.fPos;
587
588 /* truncate to the width, if specified */
589 if(info->fWidth != -1)
590 len = ufmt_min(len, info->fWidth);
591
592 /* get the formatter */
593 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
594
595 /* handle error */
596 if(format == 0)
597 return 0;
598
599 /* parse the number */
600 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
601
602 /* mask off any necessary bits */
603 if (!info->fSkipArg) {
604 if (info->fIsShort)
605 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
606 else if (info->fIsLongLong)
607 *(int64_t*)num = result;
608 else
609 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
610 }
611
612 /* update the input's position to reflect consumed data */
613 input->str.fPos += parsePos;
614
615 /* we converted 1 arg */
616 *argConverted = !info->fSkipArg;
617 return parsePos;
618 }
619
620 static int32_t
621 u_scanf_uinteger_handler(UFILE *input,
622 u_scanf_spec_info *info,
623 ufmt_args *args,
624 const UChar *fmt,
625 int32_t *fmtConsumed,
626 int32_t *argConverted)
627 {
628 /* TODO Fix this when Numberformat handles uint64_t */
629 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
630 }
631
632 static int32_t
633 u_scanf_percent_handler(UFILE *input,
634 u_scanf_spec_info *info,
635 ufmt_args *args,
636 const UChar *fmt,
637 int32_t *fmtConsumed,
638 int32_t *argConverted)
639 {
640 int32_t len;
641 double num;
642 UNumberFormat *format;
643 int32_t parsePos = 0;
644 UErrorCode status = U_ZERO_ERROR;
645
646
647 /* skip all ws in the input */
648 u_scanf_skip_leading_ws(input, info->fPadChar);
649
650 /* fill the input's internal buffer */
651 ufile_fill_uchar_buffer(input);
652
653 /* determine the size of the input's buffer */
654 len = input->str.fLimit - input->str.fPos;
655
656 /* truncate to the width, if specified */
657 if(info->fWidth != -1)
658 len = ufmt_min(len, info->fWidth);
659
660 /* get the formatter */
661 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
662
663 /* handle error */
664 if(format == 0)
665 return 0;
666
667 /* parse the number */
668 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
669
670 if (!info->fSkipArg) {
671 *(double*)(args[0].ptrValue) = num;
672 }
673
674 /* mask off any necessary bits */
675 /* if(! info->fIsLong_double)
676 num &= DBL_MAX;*/
677
678 /* update the input's position to reflect consumed data */
679 input->str.fPos += parsePos;
680
681 /* we converted 1 arg */
682 *argConverted = !info->fSkipArg;
683 return parsePos;
684 }
685
686 static int32_t
687 u_scanf_string_handler(UFILE *input,
688 u_scanf_spec_info *info,
689 ufmt_args *args,
690 const UChar *fmt,
691 int32_t *fmtConsumed,
692 int32_t *argConverted)
693 {
694 const UChar *source;
695 UConverter *conv;
696 char *arg = (char*)(args[0].ptrValue);
697 char *alias = arg;
698 char *limit;
699 UErrorCode status = U_ZERO_ERROR;
700 int32_t count;
701 UChar c;
702 UBool isNotEOF = FALSE;
703
704 /* skip all ws in the input */
705 if (info->fIsString) {
706 u_scanf_skip_leading_ws(input, info->fPadChar);
707 }
708
709 /* get the string one character at a time, truncating to the width */
710 count = 0;
711
712 /* open the default converter */
713 conv = u_getDefaultConverter(&status);
714
715 if(U_FAILURE(status))
716 return -1;
717
718 while( (info->fWidth == -1 || count < info->fWidth)
719 && (isNotEOF = ufile_getch(input, &c))
720 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
721 {
722
723 if (!info->fSkipArg) {
724 /* put the character from the input onto the target */
725 source = &c;
726 /* Since we do this one character at a time, do it this way. */
727 if (info->fWidth > 0) {
728 limit = alias + info->fWidth - count;
729 }
730 else {
731 limit = alias + ucnv_getMaxCharSize(conv);
732 }
733
734 /* convert the character to the default codepage */
735 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
736 NULL, TRUE, &status);
737
738 if(U_FAILURE(status)) {
739 /* clean up */
740 u_releaseDefaultConverter(conv);
741 return -1;
742 }
743 }
744
745 /* increment the count */
746 ++count;
747 }
748
749 /* put the final character we read back on the input */
750 if (!info->fSkipArg) {
751 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
752 u_fungetc(c, input);
753
754 /* add the terminator */
755 if (info->fIsString) {
756 *alias = 0x00;
757 }
758 }
759
760 /* clean up */
761 u_releaseDefaultConverter(conv);
762
763 /* we converted 1 arg */
764 *argConverted = !info->fSkipArg;
765 return count;
766 }
767
768 static int32_t
769 u_scanf_char_handler(UFILE *input,
770 u_scanf_spec_info *info,
771 ufmt_args *args,
772 const UChar *fmt,
773 int32_t *fmtConsumed,
774 int32_t *argConverted)
775 {
776 if (info->fWidth < 0) {
777 info->fWidth = 1;
778 }
779 info->fIsString = FALSE;
780 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
781 }
782
783 static int32_t
784 u_scanf_ustring_handler(UFILE *input,
785 u_scanf_spec_info *info,
786 ufmt_args *args,
787 const UChar *fmt,
788 int32_t *fmtConsumed,
789 int32_t *argConverted)
790 {
791 UChar *arg = (UChar*)(args[0].ptrValue);
792 UChar *alias = arg;
793 int32_t count;
794 UChar c;
795 UBool isNotEOF = FALSE;
796
797 /* skip all ws in the input */
798 if (info->fIsString) {
799 u_scanf_skip_leading_ws(input, info->fPadChar);
800 }
801
802 /* get the string one character at a time, truncating to the width */
803 count = 0;
804
805 while( (info->fWidth == -1 || count < info->fWidth)
806 && (isNotEOF = ufile_getch(input, &c))
807 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
808 {
809
810 /* put the character from the input onto the target */
811 if (!info->fSkipArg) {
812 *alias++ = c;
813 }
814
815 /* increment the count */
816 ++count;
817 }
818
819 /* put the final character we read back on the input */
820 if (!info->fSkipArg) {
821 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
822 u_fungetc(c, input);
823 }
824
825 /* add the terminator */
826 if (info->fIsString) {
827 *alias = 0x0000;
828 }
829 }
830
831 /* we converted 1 arg */
832 *argConverted = !info->fSkipArg;
833 return count;
834 }
835
836 static int32_t
837 u_scanf_uchar_handler(UFILE *input,
838 u_scanf_spec_info *info,
839 ufmt_args *args,
840 const UChar *fmt,
841 int32_t *fmtConsumed,
842 int32_t *argConverted)
843 {
844 if (info->fWidth < 0) {
845 info->fWidth = 1;
846 }
847 info->fIsString = FALSE;
848 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
849 }
850
851 static int32_t
852 u_scanf_spellout_handler(UFILE *input,
853 u_scanf_spec_info *info,
854 ufmt_args *args,
855 const UChar *fmt,
856 int32_t *fmtConsumed,
857 int32_t *argConverted)
858 {
859 int32_t len;
860 double num;
861 UNumberFormat *format;
862 int32_t parsePos = 0;
863 UErrorCode status = U_ZERO_ERROR;
864
865
866 /* skip all ws in the input */
867 u_scanf_skip_leading_ws(input, info->fPadChar);
868
869 /* fill the input's internal buffer */
870 ufile_fill_uchar_buffer(input);
871
872 /* determine the size of the input's buffer */
873 len = input->str.fLimit - input->str.fPos;
874
875 /* truncate to the width, if specified */
876 if(info->fWidth != -1)
877 len = ufmt_min(len, info->fWidth);
878
879 /* get the formatter */
880 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
881
882 /* handle error */
883 if(format == 0)
884 return 0;
885
886 /* parse the number */
887 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
888
889 if (!info->fSkipArg) {
890 *(double*)(args[0].ptrValue) = num;
891 }
892
893 /* mask off any necessary bits */
894 /* if(! info->fIsLong_double)
895 num &= DBL_MAX;*/
896
897 /* update the input's position to reflect consumed data */
898 input->str.fPos += parsePos;
899
900 /* we converted 1 arg */
901 *argConverted = !info->fSkipArg;
902 return parsePos;
903 }
904
905 static int32_t
906 u_scanf_hex_handler(UFILE *input,
907 u_scanf_spec_info *info,
908 ufmt_args *args,
909 const UChar *fmt,
910 int32_t *fmtConsumed,
911 int32_t *argConverted)
912 {
913 int32_t len;
914 void *num = (void*) (args[0].ptrValue);
915 int64_t result;
916
917 /* skip all ws in the input */
918 u_scanf_skip_leading_ws(input, info->fPadChar);
919
920 /* fill the input's internal buffer */
921 ufile_fill_uchar_buffer(input);
922
923 /* determine the size of the input's buffer */
924 len = input->str.fLimit - input->str.fPos;
925
926 /* truncate to the width, if specified */
927 if(info->fWidth != -1)
928 len = ufmt_min(len, info->fWidth);
929
930 /* check for alternate form */
931 if( *(input->str.fPos) == 0x0030 &&
932 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
933
934 /* skip the '0' and 'x' or 'X' if present */
935 input->str.fPos += 2;
936 len -= 2;
937 }
938
939 /* parse the number */
940 result = ufmt_uto64(input->str.fPos, &len, 16);
941
942 /* update the input's position to reflect consumed data */
943 input->str.fPos += len;
944
945 /* mask off any necessary bits */
946 if (!info->fSkipArg) {
947 if (info->fIsShort)
948 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
949 else if (info->fIsLongLong)
950 *(int64_t*)num = result;
951 else
952 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
953 }
954
955 /* we converted 1 arg */
956 *argConverted = !info->fSkipArg;
957 return len;
958 }
959
960 static int32_t
961 u_scanf_octal_handler(UFILE *input,
962 u_scanf_spec_info *info,
963 ufmt_args *args,
964 const UChar *fmt,
965 int32_t *fmtConsumed,
966 int32_t *argConverted)
967 {
968 int32_t len;
969 void *num = (void*) (args[0].ptrValue);
970 int64_t result;
971
972 /* skip all ws in the input */
973 u_scanf_skip_leading_ws(input, info->fPadChar);
974
975 /* fill the input's internal buffer */
976 ufile_fill_uchar_buffer(input);
977
978 /* determine the size of the input's buffer */
979 len = input->str.fLimit - input->str.fPos;
980
981 /* truncate to the width, if specified */
982 if(info->fWidth != -1)
983 len = ufmt_min(len, info->fWidth);
984
985 /* parse the number */
986 result = ufmt_uto64(input->str.fPos, &len, 8);
987
988 /* update the input's position to reflect consumed data */
989 input->str.fPos += len;
990
991 /* mask off any necessary bits */
992 if (!info->fSkipArg) {
993 if (info->fIsShort)
994 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
995 else if (info->fIsLongLong)
996 *(int64_t*)num = result;
997 else
998 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
999 }
1000
1001 /* we converted 1 arg */
1002 *argConverted = !info->fSkipArg;
1003 return len;
1004 }
1005
1006 static int32_t
1007 u_scanf_pointer_handler(UFILE *input,
1008 u_scanf_spec_info *info,
1009 ufmt_args *args,
1010 const UChar *fmt,
1011 int32_t *fmtConsumed,
1012 int32_t *argConverted)
1013 {
1014 int32_t len;
1015 void *result;
1016 void **p = (void**)(args[0].ptrValue);
1017
1018
1019 /* skip all ws in the input */
1020 u_scanf_skip_leading_ws(input, info->fPadChar);
1021
1022 /* fill the input's internal buffer */
1023 ufile_fill_uchar_buffer(input);
1024
1025 /* determine the size of the input's buffer */
1026 len = input->str.fLimit - input->str.fPos;
1027
1028 /* truncate to the width, if specified */
1029 if(info->fWidth != -1) {
1030 len = ufmt_min(len, info->fWidth);
1031 }
1032
1033 #ifdef OS400
1034 /* TODO: Fix this code so that it will work on all platforms */
1035 {
1036 int64_t result[2];
1037 int32_t lenOrig = len;
1038
1039 /* Make sure that we don't consume too much */
1040 if (len > (int32_t)(sizeof(int64_t)*2)) {
1041 len = (int32_t)(sizeof(int64_t)*2);
1042 }
1043
1044 /* parse the pointer - set first half of big endian pointer */
1045 result[0] = (int64_t)ufmt_utop(input->str.fPos, &len);
1046
1047 /* update the input's position to reflect consumed data */
1048 input->str.fPos += len;
1049 len = lenOrig - len;
1050
1051 /* Make sure that we don't consume too much */
1052 if (len > (int32_t)(sizeof(int64_t)*2)) {
1053 len = (int32_t)(sizeof(int64_t)*2);
1054 }
1055
1056 /* parse the pointer - set second half of big endian pointer */
1057 result[1] = (int64_t)ufmt_utop(input->str.fPos, &len);
1058
1059 if (!info->fSkipArg) {
1060 p = *((void **)result);
1061 }
1062 }
1063 #else
1064 /* Make sure that we don't consume too much */
1065 if (len > (int32_t)(sizeof(void*)*2)) {
1066 len = (int32_t)(sizeof(void*)*2);
1067 }
1068
1069 /* parse the pointer - assign to temporary value */
1070 result = ufmt_utop(input->str.fPos, &len);
1071
1072 if (!info->fSkipArg) {
1073 *p = result;
1074 }
1075
1076 #endif
1077
1078 /* update the input's position to reflect consumed data */
1079 input->str.fPos += len;
1080
1081 /* we converted 1 arg */
1082 *argConverted = !info->fSkipArg;
1083 return len;
1084 }
1085
1086 static int32_t
1087 u_scanf_scanset_handler(UFILE *input,
1088 u_scanf_spec_info *info,
1089 ufmt_args *args,
1090 const UChar *fmt,
1091 int32_t *fmtConsumed,
1092 int32_t *argConverted)
1093 {
1094 USet *scanset;
1095 UErrorCode status = U_ZERO_ERROR;
1096 int32_t chLeft = INT32_MAX;
1097 UChar32 c;
1098 UChar *alias = (UChar*) (args[0].ptrValue);
1099 UBool isNotEOF = FALSE;
1100 UBool readCharacter = FALSE;
1101
1102 /* Create an empty set */
1103 scanset = uset_open(0, -1);
1104
1105 /* Back up one to get the [ */
1106 fmt--;
1107
1108 /* truncate to the width, if specified and alias the target */
1109 if(info->fWidth >= 0) {
1110 chLeft = info->fWidth;
1111 }
1112
1113 /* parse the scanset from the fmt string */
1114 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1115
1116 /* verify that the parse was successful */
1117 if (U_SUCCESS(status)) {
1118 c=0;
1119
1120 /* grab characters one at a time and make sure they are in the scanset */
1121 while(chLeft > 0) {
1122 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1123 readCharacter = TRUE;
1124 if (!info->fSkipArg) {
1125 int32_t idx = 0;
1126 UBool isError = FALSE;
1127
1128 U16_APPEND(alias, idx, chLeft, c, isError);
1129 if (isError) {
1130 break;
1131 }
1132 alias += idx;
1133 }
1134 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1135 }
1136 else {
1137 /* if the character's not in the scanset, break out */
1138 break;
1139 }
1140 }
1141
1142 /* put the final character we read back on the input */
1143 if(isNotEOF && chLeft > 0) {
1144 u_fungetc(c, input);
1145 }
1146 }
1147
1148 uset_close(scanset);
1149
1150 /* if we didn't match at least 1 character, fail */
1151 if(!readCharacter)
1152 return -1;
1153 /* otherwise, add the terminator */
1154 else if (!info->fSkipArg) {
1155 *alias = 0x00;
1156 }
1157
1158 /* we converted 1 arg */
1159 *argConverted = !info->fSkipArg;
1160 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1161 }
1162
1163 /* Use US-ASCII characters only for formatting. Most codepages have
1164 characters 20-7F from Unicode. Using any other codepage specific
1165 characters will make it very difficult to format the string on
1166 non-Unicode machines */
1167 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1168 /* 0x20 */
1169 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1170 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
1171 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1172 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1173
1174 /* 0x30 */
1175 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1176 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1177 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1178 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1179
1180 /* 0x40 */
1181 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
1182 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
1183 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1184 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
1185 #else
1186 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1187 #endif
1188 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1189
1190 /* 0x50 */
1191 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
1192 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1193 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
1194 #else
1195 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
1196 #endif
1197 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
1198 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1199
1200 /* 0x60 */
1201 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
1202 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
1203 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
1204 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
1205
1206 /* 0x70 */
1207 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
1208 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
1209 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1210 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1211 };
1212
1213 U_CFUNC int32_t
1214 u_scanf_parse(UFILE *f,
1215 const UChar *patternSpecification,
1216 va_list ap)
1217 {
1218 const UChar *alias;
1219 int32_t count, converted, argConsumed, cpConsumed;
1220 uint16_t handlerNum;
1221
1222 ufmt_args args;
1223 u_scanf_spec spec;
1224 ufmt_type_info info;
1225 u_scanf_handler handler;
1226
1227 /* alias the pattern */
1228 alias = patternSpecification;
1229
1230 /* haven't converted anything yet */
1231 argConsumed = 0;
1232 converted = 0;
1233 cpConsumed = 0;
1234
1235 /* iterate through the pattern */
1236 for(;;) {
1237
1238 /* match any characters up to the next '%' */
1239 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1240 alias++;
1241 }
1242
1243 /* if we aren't at a '%', or if we're at end of string, break*/
1244 if(*alias != UP_PERCENT || *alias == 0x0000)
1245 break;
1246
1247 /* parse the specifier */
1248 count = u_scanf_parse_spec(alias, &spec);
1249
1250 /* update the pointer in pattern */
1251 alias += count;
1252
1253 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1254 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1255 /* skip the argument, if necessary */
1256 /* query the info function for argument information */
1257 info = g_u_scanf_infos[ handlerNum ].info;
1258 if (info != ufmt_count && u_feof(f)) {
1259 break;
1260 }
1261 else if(spec.fInfo.fSkipArg) {
1262 args.ptrValue = NULL;
1263 }
1264 else {
1265 switch(info) {
1266 case ufmt_count:
1267 /* set the spec's width to the # of items converted */
1268 spec.fInfo.fWidth = cpConsumed;
1269 /* fall through to next case */
1270 case ufmt_char:
1271 case ufmt_uchar:
1272 case ufmt_int:
1273 case ufmt_string:
1274 case ufmt_ustring:
1275 case ufmt_pointer:
1276 case ufmt_float:
1277 case ufmt_double:
1278 args.ptrValue = va_arg(ap, void*);
1279 break;
1280
1281 default:
1282 /* else args is ignored */
1283 args.ptrValue = NULL;
1284 break;
1285 }
1286 }
1287
1288 /* call the handler function */
1289 handler = g_u_scanf_infos[ handlerNum ].handler;
1290 if(handler != 0) {
1291
1292 /* reset count to 1 so that += for alias works. */
1293 count = 1;
1294
1295 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1296
1297 /* if the handler encountered an error condition, break */
1298 if(argConsumed < 0) {
1299 converted = -1;
1300 break;
1301 }
1302
1303 /* add to the # of items converted */
1304 converted += argConsumed;
1305
1306 /* update the pointer in pattern */
1307 alias += count-1;
1308 }
1309 /* else do nothing */
1310 }
1311 /* else do nothing */
1312
1313 /* just ignore unknown tags */
1314 }
1315
1316 /* return # of items converted */
1317 return converted;
1318 }
1319
1320 #endif /* #if !UCONFIG_NO_FORMATTING */