1 /* $OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <inttypes.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdarg.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 #define BUF 513 /* Maximum length of numeric string. */
46
47 /*
48 * Flags used during conversion.
49 */
50 #define LONG 0x00001 /* l: long or double */
51 #define LONGDBL 0x00002 /* L: long double */
52 #define SHORT 0x00004 /* h: short */
53 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
54 #define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */
55 #define POINTER 0x00020 /* p: void * (as hex) */
56 #define SIZEINT 0x00040 /* z: (signed) size_t */
57 #define MAXINT 0x00080 /* j: intmax_t */
58 #define PTRINT 0x00100 /* t: ptrdiff_t */
59 #define NOSKIP 0x00200 /* [ or c: do not skip blanks */
60 #define SUPPRESS 0x00400 /* *: suppress assignment */
61 #define UNSIGNED 0x00800 /* %[oupxX] conversions */
62
63 /*
64 * The following are used in numeric conversions only:
65 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
66 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
67 */
68 #define SIGNOK 0x01000 /* +/- is (still) legal */
69 #define HAVESIGN 0x02000 /* sign detected */
70 #define NDIGITS 0x04000 /* no digits detected */
71
72 #define DPTOK 0x08000 /* (float) decimal point is still legal */
73 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
74
75 #define PFXOK 0x08000 /* 0x prefix is (still) legal */
76 #define NZDIGITS 0x10000 /* no zero digits detected */
77
78 /*
79 * Conversion types.
80 */
81 #define CT_CHAR 0 /* %c conversion */
82 #define CT_CCL 1 /* %[...] conversion */
83 #define CT_STRING 2 /* %s conversion */
84 #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
85 #define CT_FLOAT 4 /* floating, i.e., strtod */
86
87 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
88 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)89 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
90 // Is this a negated set?
91 bool member_result = true;
92 if (*ccl == '^') {
93 member_result = false;
94 ++ccl;
95 }
96
97 // The first character may be ']' or '-' without being special.
98 if (*ccl == '-' || *ccl == ']') {
99 // A literal match?
100 if (*ccl == wc) return member_result;
101 ++ccl;
102 }
103
104 while (*ccl && *ccl != ']') {
105 // The last character may be '-' without being special.
106 if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
107 wchar_t first = *(ccl - 1);
108 wchar_t last = *(ccl + 1);
109 if (first <= last) {
110 // In the range?
111 if (wc >= first && wc <= last) return member_result;
112 ccl += 2;
113 continue;
114 }
115 // A '-' is not considered to be part of a range if the character after
116 // is not greater than the character before, so fall through...
117 }
118 // A literal match?
119 if (*ccl == wc) return member_result;
120 ++ccl;
121 }
122 return !member_result;
123 }
124
125 #pragma GCC diagnostic push
126 #pragma GCC diagnostic ignored "-Wframe-larger-than="
127
128 /*
129 * vfwscanf
130 */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)131 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
132 wint_t c; /* character from format, or conversion */
133 size_t width; /* field width, or 0 */
134 wchar_t* p; /* points into all kinds of strings */
135 int n; /* handy integer */
136 int flags; /* flags as defined above */
137 wchar_t* p0; /* saves original value of p when necessary */
138 int nassigned; /* number of fields assigned */
139 int nconversions; /* number of conversions */
140 int nread; /* number of characters consumed from fp */
141 int base; /* base argument to strtoimax/strtouimax */
142 wchar_t buf[BUF]; /* buffer for numeric conversions */
143 const wchar_t* ccl;
144 wint_t wi; /* handy wint_t */
145 char* mbp; /* multibyte string pointer for %c %s %[ */
146 size_t nconv; /* number of bytes in mb. conversion */
147 char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
148 mbstate_t mbs;
149
150 /* `basefix' is used to avoid `if' tests in the integer scanner */
151 static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
152
153 _SET_ORIENTATION(fp, 1);
154
155 nassigned = 0;
156 nconversions = 0;
157 nread = 0;
158 base = 0; /* XXX just to keep gcc happy */
159 for (;;) {
160 c = *fmt++;
161 if (c == 0) {
162 return (nassigned);
163 }
164 if (iswspace(c)) {
165 while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
166 ;
167 if (c != WEOF) __ungetwc(c, fp);
168 continue;
169 }
170 if (c != '%') goto literal;
171 width = 0;
172 flags = 0;
173 /*
174 * switch on the format. continue if done;
175 * break once format type is derived.
176 */
177 again:
178 c = *fmt++;
179 switch (c) {
180 case '%':
181 literal:
182 if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
183 if (wi != c) {
184 __ungetwc(wi, fp);
185 goto match_failure;
186 }
187 nread++;
188 continue;
189
190 case '*':
191 flags |= SUPPRESS;
192 goto again;
193 case 'j':
194 flags |= MAXINT;
195 goto again;
196 case 'L':
197 flags |= LONGDBL;
198 goto again;
199 case 'h':
200 if (*fmt == 'h') {
201 fmt++;
202 flags |= SHORTSHORT;
203 } else {
204 flags |= SHORT;
205 }
206 goto again;
207 case 'l':
208 if (*fmt == 'l') {
209 fmt++;
210 flags |= LLONG;
211 } else {
212 flags |= LONG;
213 }
214 goto again;
215 case 'q':
216 flags |= LLONG; /* deprecated */
217 goto again;
218 case 't':
219 flags |= PTRINT;
220 goto again;
221 case 'z':
222 flags |= SIZEINT;
223 goto again;
224
225 case '0':
226 case '1':
227 case '2':
228 case '3':
229 case '4':
230 case '5':
231 case '6':
232 case '7':
233 case '8':
234 case '9':
235 width = width * 10 + c - '0';
236 goto again;
237
238 /*
239 * Conversions.
240 * Those marked `compat' are for 4.[123]BSD compatibility.
241 */
242 case 'D': /* compat */
243 flags |= LONG;
244 /* FALLTHROUGH */
245 case 'd':
246 c = CT_INT;
247 base = 10;
248 break;
249
250 case 'i':
251 c = CT_INT;
252 base = 0;
253 break;
254
255 case 'O': /* compat */
256 flags |= LONG;
257 /* FALLTHROUGH */
258 case 'o':
259 c = CT_INT;
260 flags |= UNSIGNED;
261 base = 8;
262 break;
263
264 case 'u':
265 c = CT_INT;
266 flags |= UNSIGNED;
267 base = 10;
268 break;
269
270 case 'X':
271 case 'x':
272 flags |= PFXOK; /* enable 0x prefixing */
273 c = CT_INT;
274 flags |= UNSIGNED;
275 base = 16;
276 break;
277
278 case 'e':
279 case 'E':
280 case 'f':
281 case 'F':
282 case 'g':
283 case 'G':
284 case 'a':
285 case 'A':
286 c = CT_FLOAT;
287 break;
288
289 case 's':
290 c = CT_STRING;
291 break;
292
293 case '[':
294 ccl = fmt;
295 if (*fmt == '^') fmt++;
296 if (*fmt == ']') fmt++;
297 while (*fmt != '\0' && *fmt != ']') fmt++;
298 fmt++;
299 flags |= NOSKIP;
300 c = CT_CCL;
301 break;
302
303 case 'c':
304 flags |= NOSKIP;
305 c = CT_CHAR;
306 break;
307
308 case 'p': /* pointer format is like hex */
309 flags |= POINTER | PFXOK;
310 c = CT_INT;
311 flags |= UNSIGNED;
312 base = 16;
313 break;
314
315 case 'n':
316 nconversions++;
317 if (flags & SUPPRESS) continue;
318 if (flags & SHORTSHORT)
319 *va_arg(ap, signed char*) = nread;
320 else if (flags & SHORT)
321 *va_arg(ap, short*) = nread;
322 else if (flags & LONG)
323 *va_arg(ap, long*) = nread;
324 else if (flags & SIZEINT)
325 *va_arg(ap, ssize_t*) = nread;
326 else if (flags & PTRINT)
327 *va_arg(ap, ptrdiff_t*) = nread;
328 else if (flags & LLONG)
329 *va_arg(ap, long long*) = nread;
330 else if (flags & MAXINT)
331 *va_arg(ap, intmax_t*) = nread;
332 else
333 *va_arg(ap, int*) = nread;
334 continue;
335
336 /*
337 * Disgusting backwards compatibility hacks. XXX
338 */
339 case '\0': /* compat */
340 return (EOF);
341
342 default: /* compat */
343 if (iswupper(c)) flags |= LONG;
344 c = CT_INT;
345 base = 10;
346 break;
347 }
348
349 /*
350 * Consume leading white space, except for formats
351 * that suppress this.
352 */
353 if ((flags & NOSKIP) == 0) {
354 while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
355 if (wi == WEOF) goto input_failure;
356 __ungetwc(wi, fp);
357 }
358
359 /*
360 * Do the conversion.
361 */
362 switch (c) {
363 case CT_CHAR:
364 /* scan arbitrary characters (sets NOSKIP) */
365 if (width == 0) width = 1;
366 if (flags & LONG) {
367 if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
368 n = 0;
369 while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
370 if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
371 n++;
372 }
373 if (n == 0) goto input_failure;
374 nread += n;
375 if (!(flags & SUPPRESS)) nassigned++;
376 } else {
377 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
378 n = 0;
379 memset(&mbs, 0, sizeof(mbs));
380 while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
381 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
382 nconv = wcrtomb(mbp, wi, &mbs);
383 if (nconv == (size_t)-1) goto input_failure;
384 } else {
385 nconv = wcrtomb(mbbuf, wi, &mbs);
386 if (nconv == (size_t)-1) goto input_failure;
387 if (nconv > width) {
388 __ungetwc(wi, fp);
389 break;
390 }
391 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
392 }
393 if (!(flags & SUPPRESS)) mbp += nconv;
394 width -= nconv;
395 n++;
396 }
397 if (n == 0) goto input_failure;
398 nread += n;
399 if (!(flags & SUPPRESS)) nassigned++;
400 }
401 nconversions++;
402 break;
403
404 case CT_CCL:
405 case CT_STRING:
406 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
407 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
408 if (width == 0) width = (size_t)~0; // 'infinity'.
409 if ((flags & SUPPRESS) && (flags & LONG)) {
410 n = 0;
411 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
412 if (wi != WEOF) __ungetwc(wi, fp);
413 } else if (flags & LONG) {
414 p0 = p = va_arg(ap, wchar_t*);
415 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
416 *p++ = (wchar_t)wi;
417 }
418 if (wi != WEOF) __ungetwc(wi, fp);
419 n = p - p0;
420 } else {
421 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
422 n = 0;
423 memset(&mbs, 0, sizeof(mbs));
424 while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
425 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
426 nconv = wcrtomb(mbp, wi, &mbs);
427 if (nconv == (size_t)-1) goto input_failure;
428 } else {
429 nconv = wcrtomb(mbbuf, wi, &mbs);
430 if (nconv == (size_t)-1) goto input_failure;
431 if (nconv > width) break;
432 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
433 }
434 if (!(flags & SUPPRESS)) mbp += nconv;
435 width -= nconv;
436 n++;
437 }
438 if (wi != WEOF) __ungetwc(wi, fp);
439 }
440 if (c == CT_CCL && n == 0) goto match_failure;
441 if (!(flags & SUPPRESS)) {
442 if (flags & LONG) {
443 *p = L'\0';
444 } else {
445 *mbp = '\0';
446 }
447 ++nassigned;
448 }
449 nread += n;
450 nconversions++;
451 break;
452
453 case CT_INT:
454 /* scan an integer as if by strtoimax/strtoumax */
455 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
456 width = sizeof(buf) / sizeof(*buf) - 1;
457 flags |= SIGNOK | NDIGITS | NZDIGITS;
458 for (p = buf; width; width--) {
459 c = __fgetwc_unlock(fp);
460 /*
461 * Switch on the character; `goto ok'
462 * if we accept it as a part of number.
463 */
464 switch (c) {
465 /*
466 * The digit 0 is always legal, but is
467 * special. For %i conversions, if no
468 * digits (zero or nonzero) have been
469 * scanned (only signs), we will have
470 * base==0. In that case, we should set
471 * it to 8 and enable 0x prefixing.
472 * Also, if we have not scanned zero digits
473 * before this, do not turn off prefixing
474 * (someone else will turn it off if we
475 * have scanned any nonzero digits).
476 */
477 case '0':
478 if (base == 0) {
479 base = 8;
480 flags |= PFXOK;
481 }
482 if (flags & NZDIGITS)
483 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
484 else
485 flags &= ~(SIGNOK | PFXOK | NDIGITS);
486 goto ok;
487
488 /* 1 through 7 always legal */
489 case '1':
490 case '2':
491 case '3':
492 case '4':
493 case '5':
494 case '6':
495 case '7':
496 base = basefix[base];
497 flags &= ~(SIGNOK | PFXOK | NDIGITS);
498 goto ok;
499
500 /* digits 8 and 9 ok iff decimal or hex */
501 case '8':
502 case '9':
503 base = basefix[base];
504 if (base <= 8) break; /* not legal here */
505 flags &= ~(SIGNOK | PFXOK | NDIGITS);
506 goto ok;
507
508 /* letters ok iff hex */
509 case 'A':
510 case 'B':
511 case 'C':
512 case 'D':
513 case 'E':
514 case 'F':
515 case 'a':
516 case 'b':
517 case 'c':
518 case 'd':
519 case 'e':
520 case 'f':
521 /* no need to fix base here */
522 if (base <= 10) break; /* not legal here */
523 flags &= ~(SIGNOK | PFXOK | NDIGITS);
524 goto ok;
525
526 /* sign ok only as first character */
527 case '+':
528 case '-':
529 if (flags & SIGNOK) {
530 flags &= ~SIGNOK;
531 flags |= HAVESIGN;
532 goto ok;
533 }
534 break;
535
536 /*
537 * x ok iff flag still set and 2nd char (or
538 * 3rd char if we have a sign).
539 */
540 case 'x':
541 case 'X':
542 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
543 base = 16; /* if %i */
544 flags &= ~PFXOK;
545 goto ok;
546 }
547 break;
548 }
549
550 /*
551 * If we got here, c is not a legal character
552 * for a number. Stop accumulating digits.
553 */
554 if (c != WEOF) __ungetwc(c, fp);
555 break;
556 ok:
557 /*
558 * c is legal: store it and look at the next.
559 */
560 *p++ = (wchar_t)c;
561 }
562 /*
563 * If we had only a sign, it is no good; push
564 * back the sign. If the number ends in `x',
565 * it was [sign] '0' 'x', so push back the x
566 * and treat it as [sign] '0'.
567 */
568 if (flags & NDIGITS) {
569 if (p > buf) __ungetwc(*--p, fp);
570 goto match_failure;
571 }
572 c = p[-1];
573 if (c == 'x' || c == 'X') {
574 --p;
575 __ungetwc(c, fp);
576 }
577 if ((flags & SUPPRESS) == 0) {
578 uintmax_t res;
579
580 *p = '\0';
581 if (flags & UNSIGNED)
582 res = wcstoimax(buf, NULL, base);
583 else
584 res = wcstoumax(buf, NULL, base);
585 if (flags & POINTER)
586 *va_arg(ap, void**) = (void*)(uintptr_t)res;
587 else if (flags & MAXINT)
588 *va_arg(ap, intmax_t*) = res;
589 else if (flags & LLONG)
590 *va_arg(ap, long long*) = res;
591 else if (flags & SIZEINT)
592 *va_arg(ap, ssize_t*) = res;
593 else if (flags & PTRINT)
594 *va_arg(ap, ptrdiff_t*) = res;
595 else if (flags & LONG)
596 *va_arg(ap, long*) = res;
597 else if (flags & SHORT)
598 *va_arg(ap, short*) = res;
599 else if (flags & SHORTSHORT)
600 *va_arg(ap, signed char*) = res;
601 else
602 *va_arg(ap, int*) = res;
603 nassigned++;
604 }
605 nread += p - buf;
606 nconversions++;
607 break;
608
609 case CT_FLOAT:
610 /* scan a floating point number as if by strtod */
611 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
612 width = sizeof(buf) / sizeof(*buf) - 1;
613 if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
614 if ((flags & SUPPRESS) == 0) {
615 if (flags & LONGDBL) {
616 long double res = wcstold(buf, &p);
617 *va_arg(ap, long double*) = res;
618 } else if (flags & LONG) {
619 double res = wcstod(buf, &p);
620 *va_arg(ap, double*) = res;
621 } else {
622 float res = wcstof(buf, &p);
623 *va_arg(ap, float*) = res;
624 }
625 if (p - buf != (ptrdiff_t)width) abort();
626 nassigned++;
627 }
628 nread += width;
629 nconversions++;
630 break;
631 }
632 }
633 input_failure:
634 return (nconversions != 0 ? nassigned : EOF);
635 match_failure:
636 return (nassigned);
637 }
638 #pragma GCC diagnostic pop
639