1 /*	$OpenBSD: vfscanf.c,v 1.31 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Chris Torek.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <ctype.h>
35 #include <inttypes.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/param.h>
42 #include <wctype.h>
43 #include "local.h"
44 
45 #include <private/bionic_fortify.h>
46 #include <platform/bionic/macros.h>
47 #include <private/bionic_mbstate.h>
48 
49 #define BUF 513 /* Maximum length of numeric string. */
50 
51 // Flags used during conversion.
52 // Size/type:
53 #define LONG       0x00001 // l: long or double
54 #define LONGDBL    0x00002 // L: long double
55 #define SHORT      0x00004 // h: short
56 #define SHORTSHORT 0x00008 // hh: 8 bit integer
57 #define LLONG      0x00010 // ll: long long (+ deprecated q: quad)
58 #define POINTER    0x00020 // p: void* (as hex)
59 #define SIZEINT    0x00040 // z: (signed) size_t
60 #define MAXINT     0x00080 // j: intmax_t
61 #define PTRINT     0x00100 // t: ptrdiff_t
62 #define NOSKIP     0x00200 // [ or c: do not skip blanks
63 // Modifiers:
64 #define SUPPRESS   0x00400 // *: suppress assignment
65 #define UNSIGNED   0x00800 // %[oupxX] conversions
66 #define ALLOCATE   0x01000 // m: allocate a char*
67 // Internal use during integer parsing:
68 #define SIGNOK     0x02000 // +/- is (still) legal
69 #define HAVESIGN   0x04000 // Sign detected
70 #define NDIGITS    0x08000 // No digits detected
71 #define PFXOK      0x10000 // "0x" prefix is (still) legal
72 #define NZDIGITS   0x20000 // No zero digits detected
73 
74 // Conversion types.
75 #define CT_CHAR 0   // %c conversion
76 #define CT_CCL 1    // %[...] conversion
77 #define CT_STRING 2 // %s conversion
78 #define CT_INT 3    // Integer: strtoimax/strtoumax
79 #define CT_FLOAT 4  // Float: strtod
80 
81 static const unsigned char* __sccl(char*, const unsigned char*);
82 
83 /*
84  * Internal, unlocked version of vfscanf
85  */
__svfscanf(FILE * fp,const char * fmt0,va_list ap)86 int __svfscanf(FILE* fp, const char* fmt0, va_list ap) {
87   const unsigned char* fmt = reinterpret_cast<const unsigned char*>(fmt0);
88   int c;            /* character from format, or conversion */
89   size_t width;     /* field width, or 0 */
90   char* p;
91   wchar_t* wcp;
92   size_t n;
93   int flags;        /* flags as defined above */
94   int nassigned;    /* number of fields assigned */
95   int nread;        /* number of characters consumed from fp */
96   int base;         /* base argument to strtoimax/strtouimax */
97   char ccltab[256]; /* character class table for %[...] */
98   char buf[BUF];    /* buffer for numeric conversions */
99   size_t nconv;     /* length of multibyte sequence converted */
100   mbstate_t mbs;
101   void* allocation = nullptr; // Allocated but unassigned result for %mc/%ms/%m[.
102   size_t capacity = 0; // Number of char/wchar_t units allocated in `allocation`.
103 
104   /* `basefix' is used to avoid `if' tests in the integer scanner */
105   static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
106 
107   _SET_ORIENTATION(fp, -1);
108 
109   nassigned = 0;
110   nread = 0;
111   for (;;) {
112     c = *fmt++;
113     if (c == 0) return nassigned;
114     if (isspace(c)) {
115       while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) nread++, fp->_r--, fp->_p++;
116       continue;
117     }
118     if (c != '%') goto literal;
119     width = 0;
120     flags = 0;
121     /*
122      * switch on the format.  continue if done;
123      * break once format type is derived.
124      */
125 again:
126     c = *fmt++;
127     switch (c) {
128       case '%':
129 literal:
130         if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
131         if (*fp->_p != c) goto match_failure;
132         fp->_r--, fp->_p++;
133         nread++;
134         continue;
135 
136       case '*':
137         flags |= SUPPRESS;
138         goto again;
139       case 'j':
140         flags |= MAXINT;
141         goto again;
142       case 'L':
143         flags |= LONGDBL;
144         goto again;
145       case 'h':
146         if (*fmt == 'h') {
147           fmt++;
148           flags |= SHORTSHORT;
149         } else {
150           flags |= SHORT;
151         }
152         goto again;
153       case 'l':
154         if (*fmt == 'l') {
155           fmt++;
156           flags |= LLONG;
157         } else {
158           flags |= LONG;
159         }
160         goto again;
161       case 'm':
162         flags |= ALLOCATE;
163         goto again;
164       case 'q':
165         flags |= LLONG; /* deprecated */
166         goto again;
167       case 't':
168         flags |= PTRINT;
169         goto again;
170       case 'z':
171         flags |= SIZEINT;
172         goto again;
173 
174       case '0':
175       case '1':
176       case '2':
177       case '3':
178       case '4':
179       case '5':
180       case '6':
181       case '7':
182       case '8':
183       case '9':
184         width = width * 10 + c - '0';
185         goto again;
186 
187       /*
188        * Conversions.
189        * Those marked `compat' are for 4.[123]BSD compatibility.
190        */
191       case 'D': /* compat */
192         flags |= LONG;
193         __BIONIC_FALLTHROUGH;
194       case 'd':
195         c = CT_INT;
196         base = 10;
197         break;
198 
199       case 'i':
200         c = CT_INT;
201         base = 0;
202         break;
203 
204       case 'O': /* compat */
205         flags |= LONG;
206         __BIONIC_FALLTHROUGH;
207       case 'o':
208         c = CT_INT;
209         flags |= UNSIGNED;
210         base = 8;
211         break;
212 
213       case 'u':
214         c = CT_INT;
215         flags |= UNSIGNED;
216         base = 10;
217         break;
218 
219       case 'X':
220       case 'x':
221         flags |= PFXOK; /* enable 0x prefixing */
222         c = CT_INT;
223         flags |= UNSIGNED;
224         base = 16;
225         break;
226 
227       case 'e':
228       case 'E':
229       case 'f':
230       case 'F':
231       case 'g':
232       case 'G':
233       case 'a':
234       case 'A':
235         c = CT_FLOAT;
236         break;
237 
238       case 's':
239         memset(ccltab, 1, 256);
240         ccltab['\t'] = ccltab['\n'] = ccltab['\v'] = ccltab['\f'] = ccltab['\r'] = ccltab[' '] = 0;
241         c = CT_STRING;
242         break;
243 
244       case '[':
245         fmt = __sccl(ccltab, fmt);
246         flags |= NOSKIP;
247         c = CT_CCL;
248         break;
249 
250       case 'c':
251         flags |= NOSKIP;
252         c = CT_CHAR;
253         break;
254 
255       case 'p': /* pointer format is like hex */
256         flags |= POINTER | PFXOK;
257         c = CT_INT;
258         flags |= UNSIGNED;
259         base = 16;
260         break;
261 
262       case 'n':
263         if (flags & SUPPRESS) continue;
264         if (flags & SHORTSHORT) {
265           *va_arg(ap, signed char*) = nread;
266         } else if (flags & SHORT) {
267           *va_arg(ap, short*) = nread;
268         } else if (flags & LONG) {
269           *va_arg(ap, long*) = nread;
270         } else if (flags & SIZEINT) {
271           *va_arg(ap, ssize_t*) = nread;
272         } else if (flags & PTRINT) {
273           *va_arg(ap, ptrdiff_t*) = nread;
274         } else if (flags & LLONG) {
275           *va_arg(ap, long long*) = nread;
276         } else if (flags & MAXINT) {
277           *va_arg(ap, intmax_t*) = nread;
278         } else {
279           *va_arg(ap, int*) = nread;
280         }
281         continue;
282 
283       /*
284        * Disgusting backwards compatibility hacks.	XXX
285        */
286       case '\0': /* compat */
287         return EOF;
288 
289       default: /* compat */
290         if (isupper(c)) flags |= LONG;
291         c = CT_INT;
292         base = 10;
293         break;
294     }
295 
296     if ((flags & ALLOCATE) != 0 && c > CT_STRING) {
297       __fortify_fatal("scanf 'm' only works with %%c/%%s/%%[");
298     }
299     if ((flags & (ALLOCATE|SUPPRESS)) == (ALLOCATE|SUPPRESS)) {
300       __fortify_fatal("scanf 'm' makes no sense with '*'");
301     }
302 
303     /*
304      * We have a conversion that requires input.
305      */
306     if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
307 
308     /*
309      * Consume leading white space, except for formats
310      * that suppress this.
311      */
312     if ((flags & NOSKIP) == 0) {
313       while (isspace(*fp->_p)) {
314         nread++;
315         if (--fp->_r > 0) {
316           fp->_p++;
317         } else if (__srefill(fp)) {
318           goto input_failure;
319         }
320       }
321       /*
322        * Note that there is at least one character in
323        * the buffer, so conversions that do not set NOSKIP
324        * ca no longer result in an input failure.
325        */
326     }
327 
328     /*
329      * Do the conversion.
330      */
331     switch (c) {
332       case CT_CHAR:
333         /* scan arbitrary characters (sets NOSKIP) */
334         if (width == 0) width = 1;
335         if (flags & LONG) {
336           if (flags & ALLOCATE) {
337             allocation = wcp = reinterpret_cast<wchar_t*>(malloc(width * sizeof(wchar_t)));
338             if (allocation == nullptr) goto allocation_failure;
339           } else if (flags & SUPPRESS) {
340             wcp = nullptr;
341           } else {
342             wcp = va_arg(ap, wchar_t*);
343           }
344           size_t bytes = 0;
345           while (width != 0) {
346             if (bytes == MB_CUR_MAX) {
347               fp->_flags |= __SERR;
348               goto input_failure;
349             }
350             buf[bytes++] = *fp->_p;
351             fp->_p++;
352             fp->_r--;
353             memset(&mbs, 0, sizeof(mbs));
354             nconv = mbrtowc(wcp, buf, bytes, &mbs);
355             if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
356               fp->_flags |= __SERR;
357               goto input_failure;
358             }
359             if (nconv == 0 && !(flags & SUPPRESS)) *wcp = L'\0';
360             if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
361               nread += bytes;
362               width--;
363               if (!(flags & SUPPRESS)) wcp++;
364               bytes = 0;
365             }
366             if (fp->_r <= 0 && __srefill(fp)) {
367               if (bytes != 0) {
368                 fp->_flags |= __SERR;
369                 goto input_failure;
370               }
371               break;
372             }
373           }
374           if (allocation != nullptr) {
375             *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
376             allocation = nullptr;
377           }
378           if (!(flags & SUPPRESS)) nassigned++;
379         } else if (flags & SUPPRESS) {
380           size_t sum = 0;
381           for (;;) {
382             if ((n = fp->_r) < width) {
383               sum += n;
384               width -= n;
385               fp->_p += n;
386               if (__srefill(fp)) {
387                 if (sum == 0) goto input_failure;
388                 break;
389               }
390             } else {
391               sum += width;
392               fp->_r -= width;
393               fp->_p += width;
394               break;
395             }
396           }
397           nread += sum;
398         } else {
399           if (flags & ALLOCATE) {
400             allocation = p = reinterpret_cast<char*>(malloc(width));
401             if (allocation == nullptr) goto allocation_failure;
402           } else {
403             p = va_arg(ap, char*);
404           }
405           size_t r = fread(p, 1, width, fp);
406           if (r == 0) goto input_failure;
407           if (allocation != nullptr) {
408             *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
409             allocation = nullptr;
410           }
411           nread += r;
412           nassigned++;
413         }
414         break;
415 
416       case CT_CCL:
417       case CT_STRING:
418         // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
419         // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
420         if (width == 0) width = SIZE_MAX;
421         if (flags & LONG) {
422           // TODO: since no-one cares, replace this with a simple fgetwc loop?
423           n = 0;
424           if (flags & ALLOCATE) {
425             capacity = MIN(width, 32);
426             allocation = wcp = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * capacity));
427             if (allocation == nullptr) goto allocation_failure;
428           } else if (flags & SUPPRESS) {
429             wcp = nullptr;
430           } else {
431             wcp = va_arg(ap, wchar_t*);
432           }
433           size_t bytes = 0;
434           while ((c == CT_CCL || !isspace(*fp->_p)) && width != 0) {
435             if (bytes == MB_CUR_MAX) {
436               fp->_flags |= __SERR;
437               goto input_failure;
438             }
439             buf[bytes++] = *fp->_p;
440             fp->_p++;
441             fp->_r--;
442             wchar_t wc = L'\0';
443             memset(&mbs, 0, sizeof(mbs));
444             nconv = mbrtowc(&wc, buf, bytes, &mbs);
445             if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
446               fp->_flags |= __SERR;
447               goto input_failure;
448             }
449             if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
450               if ((c == CT_CCL && wctob(wc) != EOF && !ccltab[wctob(wc)]) || (c == CT_STRING && iswspace(wc))) {
451                 while (bytes != 0) {
452                   bytes--;
453                   ungetc(buf[bytes], fp);
454                 }
455                 break;
456               }
457               if (wcp) wcp[n] = wc;
458               n++;
459               if (allocation != nullptr && n == capacity) {
460                 capacity *= 2;
461                 wchar_t* new_allocation =
462                     reinterpret_cast<wchar_t*>(realloc(allocation, sizeof(wchar_t) * capacity));
463                 if (new_allocation == nullptr) goto allocation_failure;
464                 allocation = wcp = new_allocation;
465               }
466               nread += bytes;
467               width--;
468               bytes = 0;
469             }
470             if (fp->_r <= 0 && __srefill(fp)) {
471               if (bytes != 0) {
472                 fp->_flags |= __SERR;
473                 goto input_failure;
474               }
475               break;
476             }
477           }
478           if (c == CT_CCL && bytes != 0) {
479             fp->_flags |= __SERR;
480             goto input_failure;
481           }
482           if (allocation != nullptr) {
483             *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
484             allocation = nullptr;
485           }
486         } else if (flags & SUPPRESS) {
487           n = 0;
488           while (ccltab[*fp->_p]) {
489             n++, fp->_r--, fp->_p++;
490             if (--width == 0) break;
491             if (fp->_r <= 0 && __srefill(fp)) {
492               if (c == CT_CCL && n == 0) goto input_failure;
493               break;
494             }
495           }
496           nread += n;
497         } else {
498           if (flags & ALLOCATE) {
499             capacity = MIN(width, 32);
500             allocation = p = reinterpret_cast<char*>(malloc(capacity));
501             if (allocation == nullptr) goto allocation_failure;
502           } else {
503             p = va_arg(ap, char*);
504           }
505           n = 0;
506           while (ccltab[*fp->_p]) {
507             fp->_r--;
508             p[n++] = *fp->_p++;
509             if (allocation != nullptr && n == capacity) {
510               capacity *= 2;
511               char* new_allocation = reinterpret_cast<char*>(realloc(allocation, capacity));
512               if (new_allocation == nullptr) goto allocation_failure;
513               allocation = p = new_allocation;
514             }
515             if (--width == 0) break;
516             if (fp->_r <= 0 && __srefill(fp)) {
517               if (c == CT_CCL && n == 0) goto input_failure;
518               break;
519             }
520           }
521           nread += n;
522           if (allocation != nullptr) {
523             *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
524             allocation = nullptr;
525           }
526         }
527         if (c == CT_CCL && n == 0) goto match_failure;
528         if (!(flags & SUPPRESS)) {
529           if (flags & LONG) {
530             wcp[n] = L'\0';
531           } else {
532             p[n] = '\0';
533           }
534           ++nassigned;
535         }
536         break;
537 
538       case CT_INT:
539         /* scan an integer as if by strtoimax/strtoumax */
540 #ifdef hardway
541         if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
542 #else
543         /* size_t is unsigned, hence this optimisation */
544         if (--width > sizeof(buf) - 2) width = sizeof(buf) - 2;
545         width++;
546 #endif
547         flags |= SIGNOK | NDIGITS | NZDIGITS;
548         for (p = buf; width; width--) {
549           c = *fp->_p;
550           /*
551            * Switch on the character; `goto ok'
552            * if we accept it as a part of number.
553            */
554           switch (c) {
555             /*
556              * The digit 0 is always legal, but is
557              * special.  For %i conversions, if no
558              * digits (zero or nonzero) have been
559              * scanned (only signs), we will have
560              * base==0.  In that case, we should set
561              * it to 8 and enable 0x prefixing.
562              * Also, if we have not scanned zero digits
563              * before this, do not turn off prefixing
564              * (someone else will turn it off if we
565              * have scanned any nonzero digits).
566              */
567             case '0':
568               if (base == 0) {
569                 base = 8;
570                 flags |= PFXOK;
571               }
572               if (flags & NZDIGITS)
573                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
574               else
575                 flags &= ~(SIGNOK | PFXOK | NDIGITS);
576               goto ok;
577 
578             /* 1 through 7 always legal */
579             case '1':
580             case '2':
581             case '3':
582             case '4':
583             case '5':
584             case '6':
585             case '7':
586               base = basefix[base];
587               flags &= ~(SIGNOK | PFXOK | NDIGITS);
588               goto ok;
589 
590             /* digits 8 and 9 ok iff decimal or hex */
591             case '8':
592             case '9':
593               base = basefix[base];
594               if (base <= 8) break; /* not legal here */
595               flags &= ~(SIGNOK | PFXOK | NDIGITS);
596               goto ok;
597 
598             /* letters ok iff hex */
599             case 'A':
600             case 'B':
601             case 'C':
602             case 'D':
603             case 'E':
604             case 'F':
605             case 'a':
606             case 'b':
607             case 'c':
608             case 'd':
609             case 'e':
610             case 'f':
611               /* no need to fix base here */
612               if (base <= 10) break; /* not legal here */
613               flags &= ~(SIGNOK | PFXOK | NDIGITS);
614               goto ok;
615 
616             /* sign ok only as first character */
617             case '+':
618             case '-':
619               if (flags & SIGNOK) {
620                 flags &= ~SIGNOK;
621                 flags |= HAVESIGN;
622                 goto ok;
623               }
624               break;
625 
626             /*
627              * x ok iff flag still set and 2nd char (or
628              * 3rd char if we have a sign).
629              */
630             case 'x':
631             case 'X':
632               if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
633                 base = 16; /* if %i */
634                 flags &= ~PFXOK;
635                 goto ok;
636               }
637               break;
638           }
639 
640           /*
641            * If we got here, c is not a legal character
642            * for a number.  Stop accumulating digits.
643            */
644           break;
645         ok:
646           /*
647            * c is legal: store it and look at the next.
648            */
649           *p++ = c;
650           if (--fp->_r > 0)
651             fp->_p++;
652           else if (__srefill(fp))
653             break; /* EOF */
654         }
655         /*
656          * If we had only a sign, it is no good; push
657          * back the sign.  If the number ends in `x',
658          * it was [sign] '0' 'x', so push back the x
659          * and treat it as [sign] '0'.
660          */
661         if (flags & NDIGITS) {
662           if (p > buf) (void)ungetc(*(u_char*)--p, fp);
663           goto match_failure;
664         }
665         c = ((u_char*)p)[-1];
666         if (c == 'x' || c == 'X') {
667           --p;
668           (void)ungetc(c, fp);
669         }
670         if ((flags & SUPPRESS) == 0) {
671           uintmax_t res;
672 
673           *p = '\0';
674           if (flags & UNSIGNED) {
675             res = strtoumax(buf, nullptr, base);
676           } else {
677             res = strtoimax(buf, nullptr, base);
678           }
679           if (flags & POINTER) {
680             *va_arg(ap, void**) = (void*)(uintptr_t)res;
681           } else if (flags & MAXINT) {
682             *va_arg(ap, intmax_t*) = res;
683           } else if (flags & LLONG) {
684             *va_arg(ap, long long*) = res;
685           } else if (flags & SIZEINT) {
686             *va_arg(ap, ssize_t*) = res;
687           } else if (flags & PTRINT) {
688             *va_arg(ap, ptrdiff_t*) = res;
689           } else if (flags & LONG) {
690             *va_arg(ap, long*) = res;
691           } else if (flags & SHORT) {
692             *va_arg(ap, short*) = res;
693           } else if (flags & SHORTSHORT) {
694             *va_arg(ap, signed char*) = res;
695           } else {
696             *va_arg(ap, int*) = res;
697           }
698           nassigned++;
699         }
700         nread += p - buf;
701         break;
702 
703       case CT_FLOAT:
704         /* scan a floating point number as if by strtod */
705         if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
706         if ((width = parsefloat(fp, buf, buf + width)) == 0) goto match_failure;
707         if ((flags & SUPPRESS) == 0) {
708           if (flags & LONGDBL) {
709             long double res = strtold(buf, &p);
710             *va_arg(ap, long double*) = res;
711           } else if (flags & LONG) {
712             double res = strtod(buf, &p);
713             *va_arg(ap, double*) = res;
714           } else {
715             float res = strtof(buf, &p);
716             *va_arg(ap, float*) = res;
717           }
718           if ((size_t)(p - buf) != width) abort();
719           nassigned++;
720         }
721         nread += width;
722         break;
723     }
724   }
725 allocation_failure:
726 input_failure:
727   free(allocation);
728   if (nassigned == 0) nassigned = -1;
729 match_failure:
730   return nassigned;
731 }
732 
733 /*
734  * Fill in the given table from the scanset at the given format
735  * (just after `[').  Return a pointer to the character past the
736  * closing `]'.  The table has a 1 wherever characters should be
737  * considered part of the scanset.
738  */
__sccl(char * tab,const unsigned char * fmt)739 static const unsigned char* __sccl(char* tab, const unsigned char* fmt) {
740   int c, n, v;
741 
742   /* first `clear' the whole table */
743   c = *fmt++; /* first char hat => negated scanset */
744   if (c == '^') {
745     v = 1;      /* default => accept */
746     c = *fmt++; /* get new first char */
747   } else {
748     v = 0; /* default => reject */
749   }
750   memset(tab, v, 256);
751   if (c == 0) return (fmt - 1); /* format ended before closing ] */
752 
753   /*
754    * Now set the entries corresponding to the actual scanset
755    * to the opposite of the above.
756    *
757    * The first character may be ']' (or '-') without being special;
758    * the last character may be '-'.
759    */
760   v = 1 - v;
761   for (;;) {
762     tab[c] = v; /* take character c */
763   doswitch:
764     n = *fmt++; /* and examine the next */
765     switch (n) {
766       case 0: /* format ended too soon */
767         return (fmt - 1);
768 
769       case '-':
770         /*
771          * A scanset of the form
772          *	[01+-]
773          * is defined as `the digit 0, the digit 1,
774          * the character +, the character -', but
775          * the effect of a scanset such as
776          *	[a-zA-Z0-9]
777          * is implementation defined.  The V7 Unix
778          * scanf treats `a-z' as `the letters a through
779          * z', but treats `a-a' as `the letter a, the
780          * character -, and the letter a'.
781          *
782          * For compatibility, the `-' is not considerd
783          * to define a range if the character following
784          * it is either a close bracket (required by ANSI)
785          * or is not numerically greater than the character
786          * we just stored in the table (c).
787          */
788         n = *fmt;
789         if (n == ']' || n < c) {
790           c = '-';
791           break; /* resume the for(;;) */
792         }
793         fmt++;
794         do { /* fill in the range */
795           tab[++c] = v;
796         } while (c < n);
797 #if 1 /* XXX another disgusting compatibility hack */
798         /*
799          * Alas, the V7 Unix scanf also treats formats
800          * such as [a-c-e] as `the letters a through e'.
801          * This too is permitted by the standard....
802          */
803         goto doswitch;
804 #else
805         c = *fmt++;
806         if (c == 0) return (fmt - 1);
807         if (c == ']') return (fmt);
808 #endif
809         break;
810 
811       case ']': /* end of scanset */
812         return fmt;
813 
814       default: /* just another character */
815         c = n;
816         break;
817     }
818   }
819   /* NOTREACHED */
820 }
821