]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/io/uscanf_p.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / io / uscanf_p.c
... / ...
CommitLineData
1/*
2*******************************************************************************
3*
4* Copyright (C) 1998-2006, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File uscnnf_p.c
10*
11* Modification History:
12*
13* Date Name Description
14* 12/02/98 stephen Creation.
15* 03/13/99 stephen Modified for new C API.
16*******************************************************************************
17*/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_FORMATTING
22
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25#include "unicode/unum.h"
26#include "unicode/udat.h"
27#include "unicode/uset.h"
28#include "uscanf.h"
29#include "ufmt_cmn.h"
30#include "ufile.h"
31#include "locbund.h"
32
33#include "cmemory.h"
34#include "ustr_cnv.h"
35
36/* flag characters for u_scanf */
37#define FLAG_ASTERISK 0x002A
38#define FLAG_PAREN 0x0028
39
40#define ISFLAG(s) (s) == FLAG_ASTERISK || \
41 (s) == FLAG_PAREN
42
43/* special characters for u_scanf */
44#define SPEC_DOLLARSIGN 0x0024
45
46/* unicode digits */
47#define DIGIT_ZERO 0x0030
48#define DIGIT_ONE 0x0031
49#define DIGIT_TWO 0x0032
50#define DIGIT_THREE 0x0033
51#define DIGIT_FOUR 0x0034
52#define DIGIT_FIVE 0x0035
53#define DIGIT_SIX 0x0036
54#define DIGIT_SEVEN 0x0037
55#define DIGIT_EIGHT 0x0038
56#define DIGIT_NINE 0x0039
57
58#define ISDIGIT(s) (s) == DIGIT_ZERO || \
59 (s) == DIGIT_ONE || \
60 (s) == DIGIT_TWO || \
61 (s) == DIGIT_THREE || \
62 (s) == DIGIT_FOUR || \
63 (s) == DIGIT_FIVE || \
64 (s) == DIGIT_SIX || \
65 (s) == DIGIT_SEVEN || \
66 (s) == DIGIT_EIGHT || \
67 (s) == DIGIT_NINE
68
69/* u_scanf modifiers */
70#define MOD_H 0x0068
71#define MOD_LOWERL 0x006C
72#define MOD_L 0x004C
73
74#define ISMOD(s) (s) == MOD_H || \
75 (s) == MOD_LOWERL || \
76 (s) == MOD_L
77
78/**
79 * Struct encapsulating a single uscanf format specification.
80 */
81typedef struct u_scanf_spec_info {
82 int32_t fWidth; /* Width */
83
84 UChar fSpec; /* Format specification */
85
86 UChar fPadChar; /* Padding character */
87
88 UBool fSkipArg; /* TRUE if arg should be skipped */
89 UBool fIsLongDouble; /* L flag */
90 UBool fIsShort; /* h flag */
91 UBool fIsLong; /* l flag */
92 UBool fIsLongLong; /* ll flag */
93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
94} u_scanf_spec_info;
95
96
97/**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100typedef struct u_scanf_spec {
101 u_scanf_spec_info fInfo; /* Information on this spec */
102 int32_t fArgPos; /* Position of data in arg list */
103} u_scanf_spec;
104
105/**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112static int32_t
113u_scanf_parse_spec (const UChar *fmt,
114 u_scanf_spec *spec)
115{
116 const UChar *s = fmt;
117 const UChar *backup;
118 u_scanf_spec_info *info = &(spec->fInfo);
119
120 /* initialize spec to default values */
121 spec->fArgPos = -1;
122
123 info->fWidth = -1;
124 info->fSpec = 0x0000;
125 info->fPadChar = 0x0020;
126 info->fSkipArg = FALSE;
127 info->fIsLongDouble = FALSE;
128 info->fIsShort = FALSE;
129 info->fIsLong = FALSE;
130 info->fIsLongLong = FALSE;
131 info->fIsString = TRUE;
132
133
134 /* skip over the initial '%' */
135 s++;
136
137 /* Check for positional argument */
138 if(ISDIGIT(*s)) {
139
140 /* Save the current position */
141 backup = s;
142
143 /* handle positional parameters */
144 if(ISDIGIT(*s)) {
145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147 while(ISDIGIT(*s)) {
148 spec->fArgPos *= 10;
149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150 }
151 }
152
153 /* if there is no '$', don't read anything */
154 if(*s != SPEC_DOLLARSIGN) {
155 spec->fArgPos = -1;
156 s = backup;
157 }
158 /* munge the '$' */
159 else
160 s++;
161 }
162
163 /* Get any format flags */
164 while(ISFLAG(*s)) {
165 switch(*s++) {
166
167 /* skip argument */
168 case FLAG_ASTERISK:
169 info->fSkipArg = TRUE;
170 break;
171
172 /* pad character specified */
173 case FLAG_PAREN:
174
175 /* first four characters are hex values for pad char */
176 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181 /* final character is ignored */
182 s++;
183
184 break;
185 }
186 }
187
188 /* Get the width */
189 if(ISDIGIT(*s)){
190 info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192 while(ISDIGIT(*s)) {
193 info->fWidth *= 10;
194 info->fWidth += (int) (*s++ - DIGIT_ZERO);
195 }
196 }
197
198 /* Get any modifiers */
199 if(ISMOD(*s)) {
200 switch(*s++) {
201
202 /* short */
203 case MOD_H:
204 info->fIsShort = TRUE;
205 break;
206
207 /* long or long long */
208 case MOD_LOWERL:
209 if(*s == MOD_LOWERL) {
210 info->fIsLongLong = TRUE;
211 /* skip over the next 'l' */
212 s++;
213 }
214 else
215 info->fIsLong = TRUE;
216 break;
217
218 /* long double */
219 case MOD_L:
220 info->fIsLongDouble = TRUE;
221 break;
222 }
223 }
224
225 /* finally, get the specifier letter */
226 info->fSpec = *s++;
227
228 /* return # of characters in this specifier */
229 return (int32_t)(s - fmt);
230}
231
232#define UP_PERCENT 0x0025
233
234
235/* ANSI style formatting */
236/* Use US-ASCII characters only for formatting */
237
238/* % */
239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240/* s */
241#define UFMT_STRING {ufmt_string, u_scanf_string_handler}
242/* c */
243#define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
244/* d, i */
245#define UFMT_INT {ufmt_int, u_scanf_integer_handler}
246/* u */
247#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
248/* o */
249#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
250/* x, X */
251#define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
252/* f */
253#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
254/* e, E */
255#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
256/* g, G */
257#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
258/* n */
259#define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
260/* [ */
261#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
262
263/* non-ANSI extensions */
264/* Use US-ASCII characters only for formatting */
265
266/* p */
267#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
268/* V */
269#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
270/* P */
271#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
272/* C K is old format */
273#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
274/* S U is old format */
275#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
276
277
278#define UFMT_EMPTY {ufmt_empty, NULL}
279
280/**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296typedef int32_t (*u_scanf_handler) (UFILE *stream,
297 u_scanf_spec_info *info,
298 ufmt_args *args,
299 const UChar *fmt,
300 int32_t *fmtConsumed,
301 int32_t *argConverted);
302
303typedef struct u_scanf_info {
304 ufmt_type_info info;
305 u_scanf_handler handler;
306} u_scanf_info;
307
308#define USCANF_NUM_FMT_HANDLERS 108
309#define USCANF_SYMBOL_BUFFER_SIZE 8
310
311/* We do not use handlers for 0-0x1f */
312#define USCANF_BASE_FMT_HANDLERS 0x20
313
314
315static int32_t
316u_scanf_skip_leading_ws(UFILE *input,
317 UChar pad)
318{
319 UChar c;
320 int32_t count = 0;
321 UBool isNotEOF;
322
323 /* skip all leading ws in the input */
324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
325 {
326 count++;
327 }
328
329 /* put the final character back on the input */
330 if(isNotEOF)
331 u_fungetc(c, input);
332
333 return count;
334}
335
336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
337static int32_t
338u_scanf_skip_leading_positive_sign(UFILE *input,
339 UNumberFormat *format,
340 UErrorCode *status)
341{
342 UChar c;
343 int32_t count = 0;
344 UBool isNotEOF;
345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
346 int32_t symbolLen;
347 UErrorCode localStatus = U_ZERO_ERROR;
348
349 if (U_SUCCESS(*status)) {
350 symbolLen = unum_getSymbol(format,
351 UNUM_PLUS_SIGN_SYMBOL,
352 plusSymbol,
353 sizeof(plusSymbol)/sizeof(*plusSymbol),
354 &localStatus);
355
356 if (U_SUCCESS(localStatus)) {
357 /* skip all leading ws in the input */
358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
359 {
360 count++;
361 }
362
363 /* put the final character back on the input */
364 if(isNotEOF) {
365 u_fungetc(c, input);
366 }
367 }
368 }
369
370 return count;
371}
372
373static int32_t
374u_scanf_simple_percent_handler(UFILE *input,
375 u_scanf_spec_info *info,
376 ufmt_args *args,
377 const UChar *fmt,
378 int32_t *fmtConsumed,
379 int32_t *argConverted)
380{
381 /* make sure the next character in the input is a percent */
382 *argConverted = 0;
383 if(u_fgetc(input) != 0x0025) {
384 *argConverted = -1;
385 }
386 return 1;
387}
388
389static int32_t
390u_scanf_count_handler(UFILE *input,
391 u_scanf_spec_info *info,
392 ufmt_args *args,
393 const UChar *fmt,
394 int32_t *fmtConsumed,
395 int32_t *argConverted)
396{
397 /* in the special case of count, the u_scanf_spec_info's width */
398 /* will contain the # of items converted thus far */
399 if (!info->fSkipArg) {
400 if (info->fIsShort)
401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
402 else if (info->fIsLongLong)
403 *(int64_t*)(args[0].ptrValue) = info->fWidth;
404 else
405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
406 }
407 *argConverted = 0;
408
409 /* we converted 0 args */
410 return 0;
411}
412
413static int32_t
414u_scanf_double_handler(UFILE *input,
415 u_scanf_spec_info *info,
416 ufmt_args *args,
417 const UChar *fmt,
418 int32_t *fmtConsumed,
419 int32_t *argConverted)
420{
421 int32_t len;
422 double num;
423 UNumberFormat *format;
424 int32_t parsePos = 0;
425 int32_t skipped;
426 UErrorCode status = U_ZERO_ERROR;
427
428
429 /* skip all ws in the input */
430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
431
432 /* fill the input's internal buffer */
433 ufile_fill_uchar_buffer(input);
434
435 /* determine the size of the input's buffer */
436 len = (int32_t)(input->str.fLimit - input->str.fPos);
437
438 /* truncate to the width, if specified */
439 if(info->fWidth != -1)
440 len = ufmt_min(len, info->fWidth);
441
442 /* get the formatter */
443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
444
445 /* handle error */
446 if(format == 0)
447 return 0;
448
449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
451
452 /* parse the number */
453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
454
455 if (!info->fSkipArg) {
456 if (info->fIsLong)
457 *(double*)(args[0].ptrValue) = num;
458 else if (info->fIsLongDouble)
459 *(long double*)(args[0].ptrValue) = num;
460 else
461 *(float*)(args[0].ptrValue) = (float)num;
462 }
463
464 /* mask off any necessary bits */
465 /* if(! info->fIsLong_double)
466 num &= DBL_MAX;*/
467
468 /* update the input's position to reflect consumed data */
469 input->str.fPos += parsePos;
470
471 /* we converted 1 arg */
472 *argConverted = !info->fSkipArg;
473 return parsePos + skipped;
474}
475
476static int32_t
477u_scanf_scientific_handler(UFILE *input,
478 u_scanf_spec_info *info,
479 ufmt_args *args,
480 const UChar *fmt,
481 int32_t *fmtConsumed,
482 int32_t *argConverted)
483{
484 int32_t len;
485 double num;
486 UNumberFormat *format;
487 int32_t parsePos = 0;
488 int32_t skipped;
489 UErrorCode status = U_ZERO_ERROR;
490
491
492 /* skip all ws in the input */
493 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
494
495 /* fill the input's internal buffer */
496 ufile_fill_uchar_buffer(input);
497
498 /* determine the size of the input's buffer */
499 len = (int32_t)(input->str.fLimit - input->str.fPos);
500
501 /* truncate to the width, if specified */
502 if(info->fWidth != -1)
503 len = ufmt_min(len, info->fWidth);
504
505 /* get the formatter */
506 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
507
508 /* handle error */
509 if(format == 0)
510 return 0;
511
512 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
513 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
514
515 /* parse the number */
516 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
517
518 if (!info->fSkipArg) {
519 if (info->fIsLong)
520 *(double*)(args[0].ptrValue) = num;
521 else if (info->fIsLongDouble)
522 *(long double*)(args[0].ptrValue) = num;
523 else
524 *(float*)(args[0].ptrValue) = (float)num;
525 }
526
527 /* mask off any necessary bits */
528 /* if(! info->fIsLong_double)
529 num &= DBL_MAX;*/
530
531 /* update the input's position to reflect consumed data */
532 input->str.fPos += parsePos;
533
534 /* we converted 1 arg */
535 *argConverted = !info->fSkipArg;
536 return parsePos + skipped;
537}
538
539static int32_t
540u_scanf_scidbl_handler(UFILE *input,
541 u_scanf_spec_info *info,
542 ufmt_args *args,
543 const UChar *fmt,
544 int32_t *fmtConsumed,
545 int32_t *argConverted)
546{
547 int32_t len;
548 double num;
549 UNumberFormat *scientificFormat, *genericFormat;
550 /*int32_t scientificResult, genericResult;*/
551 double scientificResult, genericResult;
552 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
553 int32_t skipped;
554 UErrorCode scientificStatus = U_ZERO_ERROR;
555 UErrorCode genericStatus = U_ZERO_ERROR;
556
557
558 /* since we can't determine by scanning the characters whether */
559 /* a number was formatted in the 'f' or 'g' styles, parse the */
560 /* string with both formatters, and assume whichever one */
561 /* parsed the most is the correct formatter to use */
562
563
564 /* skip all ws in the input */
565 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
566
567 /* fill the input's internal buffer */
568 ufile_fill_uchar_buffer(input);
569
570 /* determine the size of the input's buffer */
571 len = (int32_t)(input->str.fLimit - input->str.fPos);
572
573 /* truncate to the width, if specified */
574 if(info->fWidth != -1)
575 len = ufmt_min(len, info->fWidth);
576
577 /* get the formatters */
578 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
579 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
580
581 /* handle error */
582 if(scientificFormat == 0 || genericFormat == 0)
583 return 0;
584
585 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
586 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
587
588 /* parse the number using each format*/
589
590 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
591 &scientificParsePos, &scientificStatus);
592
593 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
594 &genericParsePos, &genericStatus);
595
596 /* determine which parse made it farther */
597 if(scientificParsePos > genericParsePos) {
598 /* stash the result in num */
599 num = scientificResult;
600 /* update the input's position to reflect consumed data */
601 parsePos += scientificParsePos;
602 }
603 else {
604 /* stash the result in num */
605 num = genericResult;
606 /* update the input's position to reflect consumed data */
607 parsePos += genericParsePos;
608 }
609 input->str.fPos += parsePos;
610
611 if (!info->fSkipArg) {
612 if (info->fIsLong)
613 *(double*)(args[0].ptrValue) = num;
614 else if (info->fIsLongDouble)
615 *(long double*)(args[0].ptrValue) = num;
616 else
617 *(float*)(args[0].ptrValue) = (float)num;
618 }
619
620 /* mask off any necessary bits */
621 /* if(! info->fIsLong_double)
622 num &= DBL_MAX;*/
623
624 /* we converted 1 arg */
625 *argConverted = !info->fSkipArg;
626 return parsePos + skipped;
627}
628
629static int32_t
630u_scanf_integer_handler(UFILE *input,
631 u_scanf_spec_info *info,
632 ufmt_args *args,
633 const UChar *fmt,
634 int32_t *fmtConsumed,
635 int32_t *argConverted)
636{
637 int32_t len;
638 void *num = (void*) (args[0].ptrValue);
639 UNumberFormat *format;
640 int32_t parsePos = 0;
641 int32_t skipped;
642 UErrorCode status = U_ZERO_ERROR;
643 int64_t result;
644
645
646 /* skip all ws in the input */
647 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
648
649 /* fill the input's internal buffer */
650 ufile_fill_uchar_buffer(input);
651
652 /* determine the size of the input's buffer */
653 len = (int32_t)(input->str.fLimit - input->str.fPos);
654
655 /* truncate to the width, if specified */
656 if(info->fWidth != -1)
657 len = ufmt_min(len, info->fWidth);
658
659 /* get the formatter */
660 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
661
662 /* handle error */
663 if(format == 0)
664 return 0;
665
666 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
667 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
668
669 /* parse the number */
670 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
671
672 /* mask off any necessary bits */
673 if (!info->fSkipArg) {
674 if (info->fIsShort)
675 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
676 else if (info->fIsLongLong)
677 *(int64_t*)num = result;
678 else
679 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
680 }
681
682 /* update the input's position to reflect consumed data */
683 input->str.fPos += parsePos;
684
685 /* we converted 1 arg */
686 *argConverted = !info->fSkipArg;
687 return parsePos + skipped;
688}
689
690static int32_t
691u_scanf_uinteger_handler(UFILE *input,
692 u_scanf_spec_info *info,
693 ufmt_args *args,
694 const UChar *fmt,
695 int32_t *fmtConsumed,
696 int32_t *argConverted)
697{
698 /* TODO Fix this when Numberformat handles uint64_t */
699 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
700}
701
702static int32_t
703u_scanf_percent_handler(UFILE *input,
704 u_scanf_spec_info *info,
705 ufmt_args *args,
706 const UChar *fmt,
707 int32_t *fmtConsumed,
708 int32_t *argConverted)
709{
710 int32_t len;
711 double num;
712 UNumberFormat *format;
713 int32_t parsePos = 0;
714 int32_t skipped;
715 UErrorCode status = U_ZERO_ERROR;
716
717
718 /* skip all ws in the input */
719 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
720
721 /* fill the input's internal buffer */
722 ufile_fill_uchar_buffer(input);
723
724 /* determine the size of the input's buffer */
725 len = (int32_t)(input->str.fLimit - input->str.fPos);
726
727 /* truncate to the width, if specified */
728 if(info->fWidth != -1)
729 len = ufmt_min(len, info->fWidth);
730
731 /* get the formatter */
732 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
733
734 /* handle error */
735 if(format == 0)
736 return 0;
737
738 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
739 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
740
741 /* parse the number */
742 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
743
744 if (!info->fSkipArg) {
745 *(double*)(args[0].ptrValue) = num;
746 }
747
748 /* mask off any necessary bits */
749 /* if(! info->fIsLong_double)
750 num &= DBL_MAX;*/
751
752 /* update the input's position to reflect consumed data */
753 input->str.fPos += parsePos;
754
755 /* we converted 1 arg */
756 *argConverted = !info->fSkipArg;
757 return parsePos;
758}
759
760static int32_t
761u_scanf_string_handler(UFILE *input,
762 u_scanf_spec_info *info,
763 ufmt_args *args,
764 const UChar *fmt,
765 int32_t *fmtConsumed,
766 int32_t *argConverted)
767{
768 const UChar *source;
769 UConverter *conv;
770 char *arg = (char*)(args[0].ptrValue);
771 char *alias = arg;
772 char *limit;
773 UErrorCode status = U_ZERO_ERROR;
774 int32_t count;
775 int32_t skipped = 0;
776 UChar c;
777 UBool isNotEOF = FALSE;
778
779 /* skip all ws in the input */
780 if (info->fIsString) {
781 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
782 }
783
784 /* get the string one character at a time, truncating to the width */
785 count = 0;
786
787 /* open the default converter */
788 conv = u_getDefaultConverter(&status);
789
790 if(U_FAILURE(status))
791 return -1;
792
793 while( (info->fWidth == -1 || count < info->fWidth)
794 && (isNotEOF = ufile_getch(input, &c))
795 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
796 {
797
798 if (!info->fSkipArg) {
799 /* put the character from the input onto the target */
800 source = &c;
801 /* Since we do this one character at a time, do it this way. */
802 if (info->fWidth > 0) {
803 limit = alias + info->fWidth - count;
804 }
805 else {
806 limit = alias + ucnv_getMaxCharSize(conv);
807 }
808
809 /* convert the character to the default codepage */
810 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
811 NULL, TRUE, &status);
812
813 if(U_FAILURE(status)) {
814 /* clean up */
815 u_releaseDefaultConverter(conv);
816 return -1;
817 }
818 }
819
820 /* increment the count */
821 ++count;
822 }
823
824 /* put the final character we read back on the input */
825 if (!info->fSkipArg) {
826 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
827 u_fungetc(c, input);
828
829 /* add the terminator */
830 if (info->fIsString) {
831 *alias = 0x00;
832 }
833 }
834
835 /* clean up */
836 u_releaseDefaultConverter(conv);
837
838 /* we converted 1 arg */
839 *argConverted = !info->fSkipArg;
840 return count + skipped;
841}
842
843static int32_t
844u_scanf_char_handler(UFILE *input,
845 u_scanf_spec_info *info,
846 ufmt_args *args,
847 const UChar *fmt,
848 int32_t *fmtConsumed,
849 int32_t *argConverted)
850{
851 if (info->fWidth < 0) {
852 info->fWidth = 1;
853 }
854 info->fIsString = FALSE;
855 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
856}
857
858static int32_t
859u_scanf_ustring_handler(UFILE *input,
860 u_scanf_spec_info *info,
861 ufmt_args *args,
862 const UChar *fmt,
863 int32_t *fmtConsumed,
864 int32_t *argConverted)
865{
866 UChar *arg = (UChar*)(args[0].ptrValue);
867 UChar *alias = arg;
868 int32_t count;
869 int32_t skipped = 0;
870 UChar c;
871 UBool isNotEOF = FALSE;
872
873 /* skip all ws in the input */
874 if (info->fIsString) {
875 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
876 }
877
878 /* get the string one character at a time, truncating to the width */
879 count = 0;
880
881 while( (info->fWidth == -1 || count < info->fWidth)
882 && (isNotEOF = ufile_getch(input, &c))
883 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
884 {
885
886 /* put the character from the input onto the target */
887 if (!info->fSkipArg) {
888 *alias++ = c;
889 }
890
891 /* increment the count */
892 ++count;
893 }
894
895 /* put the final character we read back on the input */
896 if (!info->fSkipArg) {
897 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
898 u_fungetc(c, input);
899 }
900
901 /* add the terminator */
902 if (info->fIsString) {
903 *alias = 0x0000;
904 }
905 }
906
907 /* we converted 1 arg */
908 *argConverted = !info->fSkipArg;
909 return count + skipped;
910}
911
912static int32_t
913u_scanf_uchar_handler(UFILE *input,
914 u_scanf_spec_info *info,
915 ufmt_args *args,
916 const UChar *fmt,
917 int32_t *fmtConsumed,
918 int32_t *argConverted)
919{
920 if (info->fWidth < 0) {
921 info->fWidth = 1;
922 }
923 info->fIsString = FALSE;
924 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
925}
926
927static int32_t
928u_scanf_spellout_handler(UFILE *input,
929 u_scanf_spec_info *info,
930 ufmt_args *args,
931 const UChar *fmt,
932 int32_t *fmtConsumed,
933 int32_t *argConverted)
934{
935 int32_t len;
936 double num;
937 UNumberFormat *format;
938 int32_t parsePos = 0;
939 int32_t skipped;
940 UErrorCode status = U_ZERO_ERROR;
941
942
943 /* skip all ws in the input */
944 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
945
946 /* fill the input's internal buffer */
947 ufile_fill_uchar_buffer(input);
948
949 /* determine the size of the input's buffer */
950 len = (int32_t)(input->str.fLimit - input->str.fPos);
951
952 /* truncate to the width, if specified */
953 if(info->fWidth != -1)
954 len = ufmt_min(len, info->fWidth);
955
956 /* get the formatter */
957 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
958
959 /* handle error */
960 if(format == 0)
961 return 0;
962
963 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
964 /* This is not applicable to RBNF. */
965 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
966
967 /* parse the number */
968 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
969
970 if (!info->fSkipArg) {
971 *(double*)(args[0].ptrValue) = num;
972 }
973
974 /* mask off any necessary bits */
975 /* if(! info->fIsLong_double)
976 num &= DBL_MAX;*/
977
978 /* update the input's position to reflect consumed data */
979 input->str.fPos += parsePos;
980
981 /* we converted 1 arg */
982 *argConverted = !info->fSkipArg;
983 return parsePos + skipped;
984}
985
986static int32_t
987u_scanf_hex_handler(UFILE *input,
988 u_scanf_spec_info *info,
989 ufmt_args *args,
990 const UChar *fmt,
991 int32_t *fmtConsumed,
992 int32_t *argConverted)
993{
994 int32_t len;
995 int32_t skipped;
996 void *num = (void*) (args[0].ptrValue);
997 int64_t result;
998
999 /* skip all ws in the input */
1000 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1001
1002 /* fill the input's internal buffer */
1003 ufile_fill_uchar_buffer(input);
1004
1005 /* determine the size of the input's buffer */
1006 len = (int32_t)(input->str.fLimit - input->str.fPos);
1007
1008 /* truncate to the width, if specified */
1009 if(info->fWidth != -1)
1010 len = ufmt_min(len, info->fWidth);
1011
1012 /* check for alternate form */
1013 if( *(input->str.fPos) == 0x0030 &&
1014 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1015
1016 /* skip the '0' and 'x' or 'X' if present */
1017 input->str.fPos += 2;
1018 len -= 2;
1019 }
1020
1021 /* parse the number */
1022 result = ufmt_uto64(input->str.fPos, &len, 16);
1023
1024 /* update the input's position to reflect consumed data */
1025 input->str.fPos += len;
1026
1027 /* mask off any necessary bits */
1028 if (!info->fSkipArg) {
1029 if (info->fIsShort)
1030 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1031 else if (info->fIsLongLong)
1032 *(int64_t*)num = result;
1033 else
1034 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1035 }
1036
1037 /* we converted 1 arg */
1038 *argConverted = !info->fSkipArg;
1039 return len + skipped;
1040}
1041
1042static int32_t
1043u_scanf_octal_handler(UFILE *input,
1044 u_scanf_spec_info *info,
1045 ufmt_args *args,
1046 const UChar *fmt,
1047 int32_t *fmtConsumed,
1048 int32_t *argConverted)
1049{
1050 int32_t len;
1051 int32_t skipped;
1052 void *num = (void*) (args[0].ptrValue);
1053 int64_t result;
1054
1055 /* skip all ws in the input */
1056 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1057
1058 /* fill the input's internal buffer */
1059 ufile_fill_uchar_buffer(input);
1060
1061 /* determine the size of the input's buffer */
1062 len = (int32_t)(input->str.fLimit - input->str.fPos);
1063
1064 /* truncate to the width, if specified */
1065 if(info->fWidth != -1)
1066 len = ufmt_min(len, info->fWidth);
1067
1068 /* parse the number */
1069 result = ufmt_uto64(input->str.fPos, &len, 8);
1070
1071 /* update the input's position to reflect consumed data */
1072 input->str.fPos += len;
1073
1074 /* mask off any necessary bits */
1075 if (!info->fSkipArg) {
1076 if (info->fIsShort)
1077 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1078 else if (info->fIsLongLong)
1079 *(int64_t*)num = result;
1080 else
1081 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1082 }
1083
1084 /* we converted 1 arg */
1085 *argConverted = !info->fSkipArg;
1086 return len + skipped;
1087}
1088
1089static int32_t
1090u_scanf_pointer_handler(UFILE *input,
1091 u_scanf_spec_info *info,
1092 ufmt_args *args,
1093 const UChar *fmt,
1094 int32_t *fmtConsumed,
1095 int32_t *argConverted)
1096{
1097 int32_t len;
1098 int32_t skipped;
1099 void *result;
1100 void **p = (void**)(args[0].ptrValue);
1101
1102
1103 /* skip all ws in the input */
1104 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1105
1106 /* fill the input's internal buffer */
1107 ufile_fill_uchar_buffer(input);
1108
1109 /* determine the size of the input's buffer */
1110 len = (int32_t)(input->str.fLimit - input->str.fPos);
1111
1112 /* truncate to the width, if specified */
1113 if(info->fWidth != -1) {
1114 len = ufmt_min(len, info->fWidth);
1115 }
1116
1117 /* Make sure that we don't consume too much */
1118 if (len > (int32_t)(sizeof(void*)*2)) {
1119 len = (int32_t)(sizeof(void*)*2);
1120 }
1121
1122 /* parse the pointer - assign to temporary value */
1123 result = ufmt_utop(input->str.fPos, &len);
1124
1125 if (!info->fSkipArg) {
1126 *p = result;
1127 }
1128
1129 /* update the input's position to reflect consumed data */
1130 input->str.fPos += len;
1131
1132 /* we converted 1 arg */
1133 *argConverted = !info->fSkipArg;
1134 return len + skipped;
1135}
1136
1137static int32_t
1138u_scanf_scanset_handler(UFILE *input,
1139 u_scanf_spec_info *info,
1140 ufmt_args *args,
1141 const UChar *fmt,
1142 int32_t *fmtConsumed,
1143 int32_t *argConverted)
1144{
1145 USet *scanset;
1146 UErrorCode status = U_ZERO_ERROR;
1147 int32_t chLeft = INT32_MAX;
1148 UChar32 c;
1149 UChar *alias = (UChar*) (args[0].ptrValue);
1150 UBool isNotEOF = FALSE;
1151 UBool readCharacter = FALSE;
1152
1153 /* Create an empty set */
1154 scanset = uset_open(0, -1);
1155
1156 /* Back up one to get the [ */
1157 fmt--;
1158
1159 /* truncate to the width, if specified and alias the target */
1160 if(info->fWidth >= 0) {
1161 chLeft = info->fWidth;
1162 }
1163
1164 /* parse the scanset from the fmt string */
1165 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1166
1167 /* verify that the parse was successful */
1168 if (U_SUCCESS(status)) {
1169 c=0;
1170
1171 /* grab characters one at a time and make sure they are in the scanset */
1172 while(chLeft > 0) {
1173 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1174 readCharacter = TRUE;
1175 if (!info->fSkipArg) {
1176 int32_t idx = 0;
1177 UBool isError = FALSE;
1178
1179 U16_APPEND(alias, idx, chLeft, c, isError);
1180 if (isError) {
1181 break;
1182 }
1183 alias += idx;
1184 }
1185 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1186 }
1187 else {
1188 /* if the character's not in the scanset, break out */
1189 break;
1190 }
1191 }
1192
1193 /* put the final character we read back on the input */
1194 if(isNotEOF && chLeft > 0) {
1195 u_fungetc(c, input);
1196 }
1197 }
1198
1199 uset_close(scanset);
1200
1201 /* if we didn't match at least 1 character, fail */
1202 if(!readCharacter)
1203 return -1;
1204 /* otherwise, add the terminator */
1205 else if (!info->fSkipArg) {
1206 *alias = 0x00;
1207 }
1208
1209 /* we converted 1 arg */
1210 *argConverted = !info->fSkipArg;
1211 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1212}
1213
1214/* Use US-ASCII characters only for formatting. Most codepages have
1215 characters 20-7F from Unicode. Using any other codepage specific
1216 characters will make it very difficult to format the string on
1217 non-Unicode machines */
1218static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1219/* 0x20 */
1220 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1221 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
1222 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1223 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1224
1225/* 0x30 */
1226 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1227 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1228 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1229 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1230
1231/* 0x40 */
1232 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
1233 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
1234#ifdef U_USE_OBSOLETE_IO_FORMATTING
1235 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
1236#else
1237 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1238#endif
1239 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1240
1241/* 0x50 */
1242 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
1243#ifdef U_USE_OBSOLETE_IO_FORMATTING
1244 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
1245#else
1246 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
1247#endif
1248 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
1249 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1250
1251/* 0x60 */
1252 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
1253 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
1254 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
1255 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
1256
1257/* 0x70 */
1258 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
1259 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
1260 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1261 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1262};
1263
1264U_CFUNC int32_t
1265u_scanf_parse(UFILE *f,
1266 const UChar *patternSpecification,
1267 va_list ap)
1268{
1269 const UChar *alias;
1270 int32_t count, converted, argConsumed, cpConsumed;
1271 uint16_t handlerNum;
1272
1273 ufmt_args args;
1274 u_scanf_spec spec;
1275 ufmt_type_info info;
1276 u_scanf_handler handler;
1277
1278 /* alias the pattern */
1279 alias = patternSpecification;
1280
1281 /* haven't converted anything yet */
1282 argConsumed = 0;
1283 converted = 0;
1284 cpConsumed = 0;
1285
1286 /* iterate through the pattern */
1287 for(;;) {
1288
1289 /* match any characters up to the next '%' */
1290 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1291 alias++;
1292 }
1293
1294 /* if we aren't at a '%', or if we're at end of string, break*/
1295 if(*alias != UP_PERCENT || *alias == 0x0000)
1296 break;
1297
1298 /* parse the specifier */
1299 count = u_scanf_parse_spec(alias, &spec);
1300
1301 /* update the pointer in pattern */
1302 alias += count;
1303
1304 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1305 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1306 /* skip the argument, if necessary */
1307 /* query the info function for argument information */
1308 info = g_u_scanf_infos[ handlerNum ].info;
1309 if (info != ufmt_count && u_feof(f)) {
1310 break;
1311 }
1312 else if(spec.fInfo.fSkipArg) {
1313 args.ptrValue = NULL;
1314 }
1315 else {
1316 switch(info) {
1317 case ufmt_count:
1318 /* set the spec's width to the # of items converted */
1319 spec.fInfo.fWidth = cpConsumed;
1320 /* fall through to next case */
1321 case ufmt_char:
1322 case ufmt_uchar:
1323 case ufmt_int:
1324 case ufmt_string:
1325 case ufmt_ustring:
1326 case ufmt_pointer:
1327 case ufmt_float:
1328 case ufmt_double:
1329 args.ptrValue = va_arg(ap, void*);
1330 break;
1331
1332 default:
1333 /* else args is ignored */
1334 args.ptrValue = NULL;
1335 break;
1336 }
1337 }
1338
1339 /* call the handler function */
1340 handler = g_u_scanf_infos[ handlerNum ].handler;
1341 if(handler != 0) {
1342
1343 /* reset count to 1 so that += for alias works. */
1344 count = 1;
1345
1346 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1347
1348 /* if the handler encountered an error condition, break */
1349 if(argConsumed < 0) {
1350 converted = -1;
1351 break;
1352 }
1353
1354 /* add to the # of items converted */
1355 converted += argConsumed;
1356
1357 /* update the pointer in pattern */
1358 alias += count-1;
1359 }
1360 /* else do nothing */
1361 }
1362 /* else do nothing */
1363
1364 /* just ignore unknown tags */
1365 }
1366
1367 /* return # of items converted */
1368 return converted;
1369}
1370
1371#endif /* #if !UCONFIG_NO_FORMATTING */