1 /*	$OpenBSD: fnmatch.c,v 1.22 2020/03/13 03:25:45 djm Exp $	*/
2 
3 /* Copyright (c) 2011, VMware, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *     * Redistributions of source code must retain the above copyright
9  *       notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above copyright
11  *       notice, this list of conditions and the following disclaimer in the
12  *       documentation and/or other materials provided with the distribution.
13  *     * Neither the name of the VMware, Inc. nor the names of its contributors
14  *       may be used to endorse or promote products derived from this software
15  *       without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2008, 2016 Todd C. Miller <millert@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
46  *
47  * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
48  * as described in;
49  *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
50  *
51  * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
52  * from chapter 2. "Shell Command Language"
53  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
54  * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
55  * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
56  * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
57  * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
58  * a valid bracket expression is treated as an ordinary character; 4. a differing
59  * number of consecutive slashes within pattern and string will NOT match;
60  * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
61  *
62  * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
63  * from chapter 9, "Regular Expressions"
64  *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
65  * with no support for collating symbols, equivalence class expressions or
66  * character class expressions.  A partial range expression with a leading
67  * hyphen following a valid range expression will match only the ordinary
68  * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
69  * 'a' through 'm', a <hyphen> '-', or a 'z').
70  *
71  * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
72  * path segment of string, and FNM_CASEFOLD to ignore alpha case.
73  *
74  * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
75  * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
76  * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
77  * nonalpha characters within a range.
78  *
79  * XXX comments below indicate porting required for multi-byte character sets
80  * and non-POSIX locale collation orders; requires mbr* APIs to track shift
81  * state of pattern and string (rewinding pattern and string repeatedly).
82  *
83  * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
84  * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
85  * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
86  */
87 
88 #include <fnmatch.h>
89 #include <string.h>
90 #include <ctype.h>
91 
92 #include "charclass.h"
93 
94 #define	RANGE_MATCH	1
95 #define	RANGE_NOMATCH	0
96 #define	RANGE_ERROR	(-1)
97 
98 static int
classmatch(const char * pattern,char test,int foldcase,const char ** ep)99 classmatch(const char *pattern, char test, int foldcase, const char **ep)
100 {
101 	const char * const mismatch = pattern;
102 	const char *colon;
103 	struct cclass *cc;
104 	int rval = RANGE_NOMATCH;
105 	size_t len;
106 
107 	if (pattern[0] != '[' || pattern[1] != ':') {
108 		*ep = mismatch;
109 		return RANGE_ERROR;
110 	}
111 	pattern += 2;
112 
113 	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
114 		*ep = mismatch;
115 		return RANGE_ERROR;
116 	}
117 	*ep = colon + 2;
118 	len = (size_t)(colon - pattern);
119 
120 	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
121 		pattern = "lower:]";
122 	for (cc = cclasses; cc->name != NULL; cc++) {
123 		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
124 			if (cc->isctype((unsigned char)test))
125 				rval = RANGE_MATCH;
126 			break;
127 		}
128 	}
129 	if (cc->name == NULL) {
130 		/* invalid character class, treat as normal text */
131 		*ep = mismatch;
132 		rval = RANGE_ERROR;
133 	}
134 	return rval;
135 }
136 
137 /* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
138  * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
139  * however the "\/" sequence is advanced to '/'.
140  *
141  * Both pattern and string are **char to support pointer increment of arbitrary
142  * multibyte characters for the given locale, in a later iteration of this code
143  */
fnmatch_ch(const char ** pattern,const char ** string,int flags)144 static int fnmatch_ch(const char **pattern, const char **string, int flags)
145 {
146 	const char * const mismatch = *pattern;
147 	const int nocase = !!(flags & FNM_CASEFOLD);
148 	const int escape = !(flags & FNM_NOESCAPE);
149 	const int slash = !!(flags & FNM_PATHNAME);
150 	int result = FNM_NOMATCH;
151 	const char *startch;
152 	int negate;
153 
154 	if (**pattern == '[') {
155 		++*pattern;
156 
157 		/* Handle negation, either leading ! or ^ operators */
158 		negate = (**pattern == '!') || (**pattern == '^');
159 		if (negate)
160 			++*pattern;
161 
162 		/* ']' is an ordinary char at the start of the range pattern */
163 		if (**pattern == ']')
164 			goto leadingclosebrace;
165 
166 		while (**pattern) {
167 			if (**pattern == ']') {
168 				++*pattern;
169 				/* XXX: Fix for MBCS character width */
170 				++*string;
171 				return (result ^ negate);
172 			}
173 
174 			if (escape && (**pattern == '\\')) {
175 				++*pattern;
176 
177 				/* Patterns must terminate with ']', not EOS */
178 				if (!**pattern)
179 					break;
180 			}
181 
182 			/* Patterns must terminate with ']' not '/' */
183 			if (slash && (**pattern == '/'))
184 				break;
185 
186 			/* Match character classes. */
187 			switch (classmatch(*pattern, **string, nocase, pattern)) {
188 			case RANGE_MATCH:
189 				result = 0;
190 				continue;
191 			case RANGE_NOMATCH:
192 				/* Valid character class but no match. */
193 				continue;
194 			default:
195 				/* Not a valid character class. */
196 				break;
197 			}
198 			if (!**pattern)
199 				break;
200 
201 leadingclosebrace:
202 			/* Look at only well-formed range patterns;
203 			 * "x-]" is not allowed unless escaped ("x-\]")
204 			 * XXX: Fix for locale/MBCS character width
205 			 */
206 			if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) {
207 				startch = *pattern;
208 				*pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
209 
210 				/*
211 				 * NOT a properly balanced [expr] pattern, EOS
212 				 * terminated or ranges containing a slash in
213 				 * FNM_PATHNAME mode pattern fall out to to the
214 				 * rewind and test '[' literal code path.
215 				 */
216 				if (!**pattern || (slash && (**pattern == '/')))
217 					break;
218 
219 				/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
220 				if ((**string >= *startch) && (**string <= **pattern))
221 					result = 0;
222 				else if (nocase &&
223 				    (isupper((unsigned char)**string) ||
224 				     isupper((unsigned char)*startch) ||
225 				     isupper((unsigned char)**pattern)) &&
226 				    (tolower((unsigned char)**string) >=
227 				     tolower((unsigned char)*startch)) &&
228 				    (tolower((unsigned char)**string) <=
229 				     tolower((unsigned char)**pattern)))
230 					result = 0;
231 
232 				++*pattern;
233 				continue;
234 			}
235 
236 			/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
237 			if ((**string == **pattern))
238 				result = 0;
239 			else if (nocase && (isupper((unsigned char)**string) ||
240 			    isupper((unsigned char)**pattern)) &&
241 			    (tolower((unsigned char)**string) ==
242 			    tolower((unsigned char)**pattern)))
243 				result = 0;
244 
245 			++*pattern;
246 		}
247 		/*
248 		 * NOT a properly balanced [expr] pattern;
249 		 * Rewind and reset result to test '[' literal
250 		 */
251 		*pattern = mismatch;
252 		result = FNM_NOMATCH;
253 	} else if (**pattern == '?') {
254 		/* Optimize '?' match before unescaping **pattern */
255 		if (!**string || (slash && (**string == '/')))
256 			return FNM_NOMATCH;
257 		result = 0;
258 		goto fnmatch_ch_success;
259 	} else if (escape && (**pattern == '\\') && (*pattern)[1]) {
260 		++*pattern;
261 	}
262 
263 	/* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
264 	if (**string == **pattern)
265 		result = 0;
266 	else if (nocase && (isupper((unsigned char)**string) ||
267 	    isupper((unsigned char)**pattern)) &&
268 	    (tolower((unsigned char)**string) ==
269 	    tolower((unsigned char)**pattern)))
270 		result = 0;
271 
272 	/* Refuse to advance over trailing slash or NULs */
273 	if (**string == '\0' || **pattern == '\0' ||
274 	    (slash && ((**string == '/') || (**pattern == '/'))))
275 		return result;
276 
277 fnmatch_ch_success:
278 	++*pattern;
279 	++*string;
280 	return result;
281 }
282 
283 
fnmatch(const char * pattern,const char * string,int flags)284 int fnmatch(const char *pattern, const char *string, int flags)
285 {
286 	static const char dummystring[2] = {' ', 0};
287 	const int escape = !(flags & FNM_NOESCAPE);
288 	const int slash = !!(flags & FNM_PATHNAME);
289 	const int leading_dir = !!(flags & FNM_LEADING_DIR);
290 	const char *dummyptr, *matchptr, *strendseg;
291 	int wild;
292 	/* For '*' wild processing only; suppress 'used before initialization'
293 	 * warnings with dummy initialization values;
294 	 */
295 	const char *strstartseg = NULL;
296 	const char *mismatch = NULL;
297 	int matchlen = 0;
298 
299 	if (*pattern == '*')
300 		goto firstsegment;
301 
302 	while (*pattern && *string) {
303 		/*
304 		 * Pre-decode "\/" which has no special significance, and
305 		 * match balanced slashes, starting a new segment pattern.
306 		 */
307 		if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
308 			++pattern;
309 		if (slash && (*pattern == '/') && (*string == '/')) {
310 			++pattern;
311 			++string;
312 		}
313 
314 firstsegment:
315 		/*
316 		 * At the beginning of each segment, validate leading period
317 		 * behavior.
318 		 */
319 		if ((flags & FNM_PERIOD) && (*string == '.')) {
320 		    if (*pattern == '.')
321 			    ++pattern;
322 		    else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
323 			    pattern += 2;
324 		    else
325 			    return FNM_NOMATCH;
326 		    ++string;
327 		}
328 
329 		/*
330 		 * Determine the end of string segment.  Presumes '/'
331 		 * character is unique, not composite in any MBCS encoding
332 		 */
333 		if (slash) {
334 			strendseg = strchr(string, '/');
335 			if (!strendseg)
336 				strendseg = strchr(string, '\0');
337 		} else {
338 			strendseg = strchr(string, '\0');
339 		}
340 
341 		/*
342 		 * Allow pattern '*' to be consumed even with no remaining
343 		 * string to match.
344 		 */
345 		while (*pattern) {
346 			if ((string > strendseg) ||
347 			    ((string == strendseg) && (*pattern != '*')))
348 				break;
349 
350 			if (slash && ((*pattern == '/') ||
351 			    (escape && (*pattern == '\\') && (pattern[1] == '/'))))
352 				break;
353 
354 			/*
355 			 * Reduce groups of '*' and '?' to n '?' matches
356 			 * followed by one '*' test for simplicity.
357 			 */
358 			for (wild = 0; (*pattern == '*') || (*pattern == '?'); ++pattern) {
359 				if (*pattern == '*') {
360 					wild = 1;
361 				} else if (string < strendseg) {  /* && (*pattern == '?') */
362 					/* XXX: Advance 1 char for MBCS locale */
363 					++string;
364 				}
365 				else {  /* (string >= strendseg) && (*pattern == '?') */
366 					return FNM_NOMATCH;
367 				}
368 			}
369 
370 			if (wild) {
371 				strstartseg = string;
372 				mismatch = pattern;
373 
374 				/*
375 				 * Count fixed (non '*') char matches remaining
376 				 * in pattern * excluding '/' (or "\/") and '*'.
377 				 */
378 				for (matchptr = pattern, matchlen = 0; 1; ++matchlen) {
379 					if ((*matchptr == '\0') ||
380 					    (slash && ((*matchptr == '/') ||
381 					    (escape && (*matchptr == '\\') &&
382 					    (matchptr[1] == '/'))))) {
383 						/* Compare precisely this many
384 						 * trailing string chars, the
385 						 * resulting match needs no
386 						 * wildcard loop.
387 						 */
388 						/* XXX: Adjust for MBCS */
389 						if (string + matchlen > strendseg)
390 							return FNM_NOMATCH;
391 
392 						string = strendseg - matchlen;
393 						wild = 0;
394 						break;
395 					}
396 
397 					if (*matchptr == '*') {
398 						/*
399 						 * Ensure at least this many
400 						 * trailing string chars remain
401 						 * for the first comparison.
402 						 */
403 						/* XXX: Adjust for MBCS */
404 						if (string + matchlen > strendseg)
405 							return FNM_NOMATCH;
406 
407 						/*
408 						 * Begin first wild comparison
409 						 * at the current position.
410 						 */
411 						break;
412 					}
413 
414 					/*
415 					 * Skip forward in pattern by a single
416 					 * character match Use a dummy
417 					 * fnmatch_ch() test to count one
418 					 * "[range]" escape.
419 					 */
420 					/* XXX: Adjust for MBCS */
421 					if (escape && (*matchptr == '\\') &&
422 					    matchptr[1]) {
423 						matchptr += 2;
424 					} else if (*matchptr == '[') {
425 						dummyptr = dummystring;
426 						fnmatch_ch(&matchptr, &dummyptr,
427 						    flags);
428 					} else {
429 						++matchptr;
430 					}
431 				}
432 			}
433 
434 			/* Incrementally match string against the pattern. */
435 			while (*pattern && (string < strendseg)) {
436 				/* Success; begin a new wild pattern search. */
437 				if (*pattern == '*')
438 					break;
439 
440 				if (slash && ((*string == '/') ||
441 				    (*pattern == '/') || (escape &&
442 				    (*pattern == '\\') && (pattern[1] == '/'))))
443 					break;
444 
445 				/*
446 				 * Compare ch's (the pattern is advanced over
447 				 * "\/" to the '/', but slashes will mismatch,
448 				 * and are not consumed).
449 				 */
450 				if (!fnmatch_ch(&pattern, &string, flags))
451 					continue;
452 
453 				/*
454 				 * Failed to match, loop against next char
455 				 * offset of string segment until not enough
456 				 * string chars remain to match the fixed
457 				 * pattern.
458 				 */
459 				if (wild) {
460 					/* XXX: Advance 1 char for MBCS locale */
461 					string = ++strstartseg;
462 					if (string + matchlen > strendseg)
463 						return FNM_NOMATCH;
464 
465 					pattern = mismatch;
466 					continue;
467 				} else
468 					return FNM_NOMATCH;
469 			}
470 		}
471 
472 		if (*string && !((slash || leading_dir) && (*string == '/')))
473 			return FNM_NOMATCH;
474 
475 		if (*pattern && !(slash && ((*pattern == '/') ||
476 		    (escape && (*pattern == '\\') && (pattern[1] == '/')))))
477 			return FNM_NOMATCH;
478 
479 		if (leading_dir && !*pattern && *string == '/')
480 			return 0;
481 	}
482 
483 	/* Where both pattern and string are at EOS, declare success.  */
484 	if (!*string && !*pattern)
485 		return 0;
486 
487 	/* Pattern didn't match to the end of string. */
488 	return FNM_NOMATCH;
489 }
490