1 /*	$OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Chris Torek.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <inttypes.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdarg.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44 
45 #define BUF 513 /* Maximum length of numeric string. */
46 
47 /*
48  * Flags used during conversion.
49  */
50 #define LONG 0x00001       /* l: long or double */
51 #define LONGDBL 0x00002    /* L: long double */
52 #define SHORT 0x00004      /* h: short */
53 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
54 #define LLONG 0x00010      /* ll: long long (+ deprecated q: quad) */
55 #define POINTER 0x00020    /* p: void * (as hex) */
56 #define SIZEINT 0x00040    /* z: (signed) size_t */
57 #define MAXINT 0x00080     /* j: intmax_t */
58 #define PTRINT 0x00100     /* t: ptrdiff_t */
59 #define NOSKIP 0x00200     /* [ or c: do not skip blanks */
60 #define SUPPRESS 0x00400   /* *: suppress assignment */
61 #define UNSIGNED 0x00800   /* %[oupxX] conversions */
62 
63 /*
64  * The following are used in numeric conversions only:
65  * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
66  * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
67  */
68 #define SIGNOK 0x01000   /* +/- is (still) legal */
69 #define HAVESIGN 0x02000 /* sign detected */
70 #define NDIGITS 0x04000  /* no digits detected */
71 
72 #define DPTOK 0x08000 /* (float) decimal point is still legal */
73 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
74 
75 #define PFXOK 0x08000    /* 0x prefix is (still) legal */
76 #define NZDIGITS 0x10000 /* no zero digits detected */
77 
78 /*
79  * Conversion types.
80  */
81 #define CT_CHAR 0   /* %c conversion */
82 #define CT_CCL 1    /* %[...] conversion */
83 #define CT_STRING 2 /* %s conversion */
84 #define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
85 #define CT_FLOAT 4  /* floating, i.e., strtod */
86 
87 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
88 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)89 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
90   // Is this a negated set?
91   bool member_result = true;
92   if (*ccl == '^') {
93     member_result = false;
94     ++ccl;
95   }
96 
97   // The first character may be ']' or '-' without being special.
98   if (*ccl == '-' || *ccl == ']') {
99     // A literal match?
100     if (*ccl == wc) return member_result;
101     ++ccl;
102   }
103 
104   while (*ccl && *ccl != ']') {
105     // The last character may be '-' without being special.
106     if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
107       wchar_t first = *(ccl - 1);
108       wchar_t last = *(ccl + 1);
109       if (first <= last) {
110         // In the range?
111         if (wc >= first && wc <= last) return member_result;
112         ccl += 2;
113         continue;
114       }
115       // A '-' is not considered to be part of a range if the character after
116       // is not greater than the character before, so fall through...
117     }
118     // A literal match?
119     if (*ccl == wc) return member_result;
120     ++ccl;
121   }
122   return !member_result;
123 }
124 
125 #pragma GCC diagnostic push
126 #pragma GCC diagnostic ignored "-Wframe-larger-than="
127 
128 /*
129  * vfwscanf
130  */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)131 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
132   wint_t c;               /* character from format, or conversion */
133   size_t width;           /* field width, or 0 */
134   wchar_t* p;             /* points into all kinds of strings */
135   int n;                  /* handy integer */
136   int flags;              /* flags as defined above */
137   wchar_t* p0;            /* saves original value of p when necessary */
138   int nassigned;          /* number of fields assigned */
139   int nconversions;       /* number of conversions */
140   int nread;              /* number of characters consumed from fp */
141   int base;               /* base argument to strtoimax/strtouimax */
142   wchar_t buf[BUF];       /* buffer for numeric conversions */
143   const wchar_t* ccl;
144   wint_t wi;              /* handy wint_t */
145   char* mbp;              /* multibyte string pointer for %c %s %[ */
146   size_t nconv;           /* number of bytes in mb. conversion */
147   char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
148   mbstate_t mbs;
149 
150   /* `basefix' is used to avoid `if' tests in the integer scanner */
151   static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
152 
153   _SET_ORIENTATION(fp, 1);
154 
155   nassigned = 0;
156   nconversions = 0;
157   nread = 0;
158   base = 0; /* XXX just to keep gcc happy */
159   for (;;) {
160     c = *fmt++;
161     if (c == 0) {
162       return (nassigned);
163     }
164     if (iswspace(c)) {
165       while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
166         ;
167       if (c != WEOF) __ungetwc(c, fp);
168       continue;
169     }
170     if (c != '%') goto literal;
171     width = 0;
172     flags = 0;
173     /*
174      * switch on the format.  continue if done;
175      * break once format type is derived.
176      */
177   again:
178     c = *fmt++;
179     switch (c) {
180       case '%':
181       literal:
182         if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
183         if (wi != c) {
184           __ungetwc(wi, fp);
185           goto match_failure;
186         }
187         nread++;
188         continue;
189 
190       case '*':
191         flags |= SUPPRESS;
192         goto again;
193       case 'j':
194         flags |= MAXINT;
195         goto again;
196       case 'L':
197         flags |= LONGDBL;
198         goto again;
199       case 'h':
200         if (*fmt == 'h') {
201           fmt++;
202           flags |= SHORTSHORT;
203         } else {
204           flags |= SHORT;
205         }
206         goto again;
207       case 'l':
208         if (*fmt == 'l') {
209           fmt++;
210           flags |= LLONG;
211         } else {
212           flags |= LONG;
213         }
214         goto again;
215       case 'q':
216         flags |= LLONG; /* deprecated */
217         goto again;
218       case 't':
219         flags |= PTRINT;
220         goto again;
221       case 'z':
222         flags |= SIZEINT;
223         goto again;
224 
225       case '0':
226       case '1':
227       case '2':
228       case '3':
229       case '4':
230       case '5':
231       case '6':
232       case '7':
233       case '8':
234       case '9':
235         width = width * 10 + c - '0';
236         goto again;
237 
238       /*
239        * Conversions.
240        * Those marked `compat' are for 4.[123]BSD compatibility.
241        */
242       case 'D': /* compat */
243         flags |= LONG;
244         /* FALLTHROUGH */
245       case 'd':
246         c = CT_INT;
247         base = 10;
248         break;
249 
250       case 'i':
251         c = CT_INT;
252         base = 0;
253         break;
254 
255       case 'O': /* compat */
256         flags |= LONG;
257         /* FALLTHROUGH */
258       case 'o':
259         c = CT_INT;
260         flags |= UNSIGNED;
261         base = 8;
262         break;
263 
264       case 'u':
265         c = CT_INT;
266         flags |= UNSIGNED;
267         base = 10;
268         break;
269 
270       case 'X':
271       case 'x':
272         flags |= PFXOK; /* enable 0x prefixing */
273         c = CT_INT;
274         flags |= UNSIGNED;
275         base = 16;
276         break;
277 
278       case 'e':
279       case 'E':
280       case 'f':
281       case 'F':
282       case 'g':
283       case 'G':
284       case 'a':
285       case 'A':
286         c = CT_FLOAT;
287         break;
288 
289       case 's':
290         c = CT_STRING;
291         break;
292 
293       case '[':
294         ccl = fmt;
295         if (*fmt == '^') fmt++;
296         if (*fmt == ']') fmt++;
297         while (*fmt != '\0' && *fmt != ']') fmt++;
298         fmt++;
299         flags |= NOSKIP;
300         c = CT_CCL;
301         break;
302 
303       case 'c':
304         flags |= NOSKIP;
305         c = CT_CHAR;
306         break;
307 
308       case 'p': /* pointer format is like hex */
309         flags |= POINTER | PFXOK;
310         c = CT_INT;
311         flags |= UNSIGNED;
312         base = 16;
313         break;
314 
315       case 'n':
316         nconversions++;
317         if (flags & SUPPRESS) continue;
318         if (flags & SHORTSHORT)
319           *va_arg(ap, signed char*) = nread;
320         else if (flags & SHORT)
321           *va_arg(ap, short*) = nread;
322         else if (flags & LONG)
323           *va_arg(ap, long*) = nread;
324         else if (flags & SIZEINT)
325           *va_arg(ap, ssize_t*) = nread;
326         else if (flags & PTRINT)
327           *va_arg(ap, ptrdiff_t*) = nread;
328         else if (flags & LLONG)
329           *va_arg(ap, long long*) = nread;
330         else if (flags & MAXINT)
331           *va_arg(ap, intmax_t*) = nread;
332         else
333           *va_arg(ap, int*) = nread;
334         continue;
335 
336       /*
337        * Disgusting backwards compatibility hacks.	XXX
338        */
339       case '\0': /* compat */
340         return (EOF);
341 
342       default: /* compat */
343         if (iswupper(c)) flags |= LONG;
344         c = CT_INT;
345         base = 10;
346         break;
347     }
348 
349     /*
350      * Consume leading white space, except for formats
351      * that suppress this.
352      */
353     if ((flags & NOSKIP) == 0) {
354       while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
355       if (wi == WEOF) goto input_failure;
356       __ungetwc(wi, fp);
357     }
358 
359     /*
360      * Do the conversion.
361      */
362     switch (c) {
363       case CT_CHAR:
364         /* scan arbitrary characters (sets NOSKIP) */
365         if (width == 0) width = 1;
366         if (flags & LONG) {
367           if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
368           n = 0;
369           while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
370             if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
371             n++;
372           }
373           if (n == 0) goto input_failure;
374           nread += n;
375           if (!(flags & SUPPRESS)) nassigned++;
376         } else {
377           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
378           n = 0;
379           memset(&mbs, 0, sizeof(mbs));
380           while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
381             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
382               nconv = wcrtomb(mbp, wi, &mbs);
383               if (nconv == (size_t)-1) goto input_failure;
384             } else {
385               nconv = wcrtomb(mbbuf, wi, &mbs);
386               if (nconv == (size_t)-1) goto input_failure;
387               if (nconv > width) {
388                 __ungetwc(wi, fp);
389                 break;
390               }
391               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
392             }
393             if (!(flags & SUPPRESS)) mbp += nconv;
394             width -= nconv;
395             n++;
396           }
397           if (n == 0) goto input_failure;
398           nread += n;
399           if (!(flags & SUPPRESS)) nassigned++;
400         }
401         nconversions++;
402         break;
403 
404       case CT_CCL:
405       case CT_STRING:
406         // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
407         // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
408         if (width == 0) width = (size_t)~0; // 'infinity'.
409         if ((flags & SUPPRESS) && (flags & LONG)) {
410           n = 0;
411           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
412           if (wi != WEOF) __ungetwc(wi, fp);
413         } else if (flags & LONG) {
414           p0 = p = va_arg(ap, wchar_t*);
415           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
416             *p++ = (wchar_t)wi;
417           }
418           if (wi != WEOF) __ungetwc(wi, fp);
419           n = p - p0;
420         } else {
421           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
422           n = 0;
423           memset(&mbs, 0, sizeof(mbs));
424           while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
425             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
426               nconv = wcrtomb(mbp, wi, &mbs);
427               if (nconv == (size_t)-1) goto input_failure;
428             } else {
429               nconv = wcrtomb(mbbuf, wi, &mbs);
430               if (nconv == (size_t)-1) goto input_failure;
431               if (nconv > width) break;
432               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
433             }
434             if (!(flags & SUPPRESS)) mbp += nconv;
435             width -= nconv;
436             n++;
437           }
438           if (wi != WEOF) __ungetwc(wi, fp);
439         }
440         if (c == CT_CCL && n == 0) goto match_failure;
441         if (!(flags & SUPPRESS)) {
442           if (flags & LONG) {
443             *p = L'\0';
444           } else {
445             *mbp = '\0';
446           }
447           ++nassigned;
448         }
449         nread += n;
450         nconversions++;
451         break;
452 
453       case CT_INT:
454         /* scan an integer as if by strtoimax/strtoumax */
455         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
456           width = sizeof(buf) / sizeof(*buf) - 1;
457         flags |= SIGNOK | NDIGITS | NZDIGITS;
458         for (p = buf; width; width--) {
459           c = __fgetwc_unlock(fp);
460           /*
461            * Switch on the character; `goto ok'
462            * if we accept it as a part of number.
463            */
464           switch (c) {
465             /*
466              * The digit 0 is always legal, but is
467              * special.  For %i conversions, if no
468              * digits (zero or nonzero) have been
469              * scanned (only signs), we will have
470              * base==0.  In that case, we should set
471              * it to 8 and enable 0x prefixing.
472              * Also, if we have not scanned zero digits
473              * before this, do not turn off prefixing
474              * (someone else will turn it off if we
475              * have scanned any nonzero digits).
476              */
477             case '0':
478               if (base == 0) {
479                 base = 8;
480                 flags |= PFXOK;
481               }
482               if (flags & NZDIGITS)
483                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
484               else
485                 flags &= ~(SIGNOK | PFXOK | NDIGITS);
486               goto ok;
487 
488             /* 1 through 7 always legal */
489             case '1':
490             case '2':
491             case '3':
492             case '4':
493             case '5':
494             case '6':
495             case '7':
496               base = basefix[base];
497               flags &= ~(SIGNOK | PFXOK | NDIGITS);
498               goto ok;
499 
500             /* digits 8 and 9 ok iff decimal or hex */
501             case '8':
502             case '9':
503               base = basefix[base];
504               if (base <= 8) break; /* not legal here */
505               flags &= ~(SIGNOK | PFXOK | NDIGITS);
506               goto ok;
507 
508             /* letters ok iff hex */
509             case 'A':
510             case 'B':
511             case 'C':
512             case 'D':
513             case 'E':
514             case 'F':
515             case 'a':
516             case 'b':
517             case 'c':
518             case 'd':
519             case 'e':
520             case 'f':
521               /* no need to fix base here */
522               if (base <= 10) break; /* not legal here */
523               flags &= ~(SIGNOK | PFXOK | NDIGITS);
524               goto ok;
525 
526             /* sign ok only as first character */
527             case '+':
528             case '-':
529               if (flags & SIGNOK) {
530                 flags &= ~SIGNOK;
531                 flags |= HAVESIGN;
532                 goto ok;
533               }
534               break;
535 
536             /*
537              * x ok iff flag still set and 2nd char (or
538              * 3rd char if we have a sign).
539              */
540             case 'x':
541             case 'X':
542               if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
543                 base = 16; /* if %i */
544                 flags &= ~PFXOK;
545                 goto ok;
546               }
547               break;
548           }
549 
550           /*
551            * If we got here, c is not a legal character
552            * for a number.  Stop accumulating digits.
553            */
554           if (c != WEOF) __ungetwc(c, fp);
555           break;
556         ok:
557           /*
558            * c is legal: store it and look at the next.
559            */
560           *p++ = (wchar_t)c;
561         }
562         /*
563          * If we had only a sign, it is no good; push
564          * back the sign.  If the number ends in `x',
565          * it was [sign] '0' 'x', so push back the x
566          * and treat it as [sign] '0'.
567          */
568         if (flags & NDIGITS) {
569           if (p > buf) __ungetwc(*--p, fp);
570           goto match_failure;
571         }
572         c = p[-1];
573         if (c == 'x' || c == 'X') {
574           --p;
575           __ungetwc(c, fp);
576         }
577         if ((flags & SUPPRESS) == 0) {
578           uintmax_t res;
579 
580           *p = '\0';
581           if (flags & UNSIGNED)
582             res = wcstoimax(buf, NULL, base);
583           else
584             res = wcstoumax(buf, NULL, base);
585           if (flags & POINTER)
586             *va_arg(ap, void**) = (void*)(uintptr_t)res;
587           else if (flags & MAXINT)
588             *va_arg(ap, intmax_t*) = res;
589           else if (flags & LLONG)
590             *va_arg(ap, long long*) = res;
591           else if (flags & SIZEINT)
592             *va_arg(ap, ssize_t*) = res;
593           else if (flags & PTRINT)
594             *va_arg(ap, ptrdiff_t*) = res;
595           else if (flags & LONG)
596             *va_arg(ap, long*) = res;
597           else if (flags & SHORT)
598             *va_arg(ap, short*) = res;
599           else if (flags & SHORTSHORT)
600             *va_arg(ap, signed char*) = res;
601           else
602             *va_arg(ap, int*) = res;
603           nassigned++;
604         }
605         nread += p - buf;
606         nconversions++;
607         break;
608 
609       case CT_FLOAT:
610         /* scan a floating point number as if by strtod */
611         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
612           width = sizeof(buf) / sizeof(*buf) - 1;
613         if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
614         if ((flags & SUPPRESS) == 0) {
615           if (flags & LONGDBL) {
616             long double res = wcstold(buf, &p);
617             *va_arg(ap, long double*) = res;
618           } else if (flags & LONG) {
619             double res = wcstod(buf, &p);
620             *va_arg(ap, double*) = res;
621           } else {
622             float res = wcstof(buf, &p);
623             *va_arg(ap, float*) = res;
624           }
625           if (p - buf != (ptrdiff_t)width) abort();
626           nassigned++;
627         }
628         nread += width;
629         nconversions++;
630         break;
631     }
632   }
633 input_failure:
634   return (nconversions != 0 ? nassigned : EOF);
635 match_failure:
636   return (nassigned);
637 }
638 #pragma GCC diagnostic pop
639