1 /*
2  * Copyright (C) 2006 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.webkit;
18 
19 import android.annotation.Nullable;
20 import android.compat.annotation.UnsupportedAppUsage;
21 import android.net.ParseException;
22 import android.net.Uri;
23 import android.net.WebAddress;
24 import android.util.Log;
25 
26 import java.io.UnsupportedEncodingException;
27 import java.util.Locale;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 
31 public final class URLUtil {
32 
33     private static final String LOGTAG = "webkit";
34     private static final boolean TRACE = false;
35 
36     // to refer to bar.png under your package's asset/foo/ directory, use
37     // "file:///android_asset/foo/bar.png".
38     static final String ASSET_BASE = "file:///android_asset/";
39     // to refer to bar.png under your package's res/drawable/ directory, use
40     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
41     // "drawable-hdpi" directory as well.
42     static final String RESOURCE_BASE = "file:///android_res/";
43     static final String FILE_BASE = "file:";
44     static final String PROXY_BASE = "file:///cookieless_proxy/";
45     static final String CONTENT_BASE = "content:";
46 
47     /**
48      * Cleans up (if possible) user-entered web addresses
49      */
guessUrl(String inUrl)50     public static String guessUrl(String inUrl) {
51 
52         String retVal = inUrl;
53         WebAddress webAddress;
54 
55         if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
56 
57         if (inUrl.length() == 0) return inUrl;
58         if (inUrl.startsWith("about:")) return inUrl;
59         // Do not try to interpret data scheme URLs
60         if (inUrl.startsWith("data:")) return inUrl;
61         // Do not try to interpret file scheme URLs
62         if (inUrl.startsWith("file:")) return inUrl;
63         // Do not try to interpret javascript scheme URLs
64         if (inUrl.startsWith("javascript:")) return inUrl;
65 
66         // bug 762454: strip period off end of url
67         if (inUrl.endsWith(".") == true) {
68             inUrl = inUrl.substring(0, inUrl.length() - 1);
69         }
70 
71         try {
72             webAddress = new WebAddress(inUrl);
73         } catch (ParseException ex) {
74 
75             if (TRACE) {
76                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
77             }
78             return retVal;
79         }
80 
81         // Check host
82         if (webAddress.getHost().indexOf('.') == -1) {
83             // no dot: user probably entered a bare domain.  try .com
84             webAddress.setHost("www." + webAddress.getHost() + ".com");
85         }
86         return webAddress.toString();
87     }
88 
composeSearchUrl(String inQuery, String template, String queryPlaceHolder)89     public static String composeSearchUrl(String inQuery, String template,
90                                           String queryPlaceHolder) {
91         int placeHolderIndex = template.indexOf(queryPlaceHolder);
92         if (placeHolderIndex < 0) {
93             return null;
94         }
95 
96         String query;
97         StringBuilder buffer = new StringBuilder();
98         buffer.append(template.substring(0, placeHolderIndex));
99 
100         try {
101             query = java.net.URLEncoder.encode(inQuery, "utf-8");
102             buffer.append(query);
103         } catch (UnsupportedEncodingException ex) {
104             return null;
105         }
106 
107         buffer.append(template.substring(
108                 placeHolderIndex + queryPlaceHolder.length()));
109 
110         return buffer.toString();
111     }
112 
decode(byte[] url)113     public static byte[] decode(byte[] url) throws IllegalArgumentException {
114         if (url.length == 0) {
115             return new byte[0];
116         }
117 
118         // Create a new byte array with the same length to ensure capacity
119         byte[] tempData = new byte[url.length];
120 
121         int tempCount = 0;
122         for (int i = 0; i < url.length; i++) {
123             byte b = url[i];
124             if (b == '%') {
125                 if (url.length - i > 2) {
126                     b = (byte) (parseHex(url[i + 1]) * 16
127                             + parseHex(url[i + 2]));
128                     i += 2;
129                 } else {
130                     throw new IllegalArgumentException("Invalid format");
131                 }
132             }
133             tempData[tempCount++] = b;
134         }
135         byte[] retData = new byte[tempCount];
136         System.arraycopy(tempData, 0, retData, 0, tempCount);
137         return retData;
138     }
139 
140     /**
141      * @return {@code true} if the url is correctly URL encoded
142      */
143     @UnsupportedAppUsage
verifyURLEncoding(String url)144     static boolean verifyURLEncoding(String url) {
145         int count = url.length();
146         if (count == 0) {
147             return false;
148         }
149 
150         int index = url.indexOf('%');
151         while (index >= 0 && index < count) {
152             if (index < count - 2) {
153                 try {
154                     parseHex((byte) url.charAt(++index));
155                     parseHex((byte) url.charAt(++index));
156                 } catch (IllegalArgumentException e) {
157                     return false;
158                 }
159             } else {
160                 return false;
161             }
162             index = url.indexOf('%', index + 1);
163         }
164         return true;
165     }
166 
parseHex(byte b)167     private static int parseHex(byte b) {
168         if (b >= '0' && b <= '9') return (b - '0');
169         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
170         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
171 
172         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
173     }
174 
175     /**
176      * @return {@code true} if the url is an asset file.
177      */
isAssetUrl(String url)178     public static boolean isAssetUrl(String url) {
179         return (null != url) && url.startsWith(ASSET_BASE);
180     }
181 
182     /**
183      * @return {@code true} if the url is a resource file.
184      * @hide
185      */
186     @UnsupportedAppUsage
isResourceUrl(String url)187     public static boolean isResourceUrl(String url) {
188         return (null != url) && url.startsWith(RESOURCE_BASE);
189     }
190 
191     /**
192      * @return {@code true} if the url is a proxy url to allow cookieless network
193      * requests from a file url.
194      * @deprecated Cookieless proxy is no longer supported.
195      */
196     @Deprecated
isCookielessProxyUrl(String url)197     public static boolean isCookielessProxyUrl(String url) {
198         return (null != url) && url.startsWith(PROXY_BASE);
199     }
200 
201     /**
202      * @return {@code true} if the url is a local file.
203      */
isFileUrl(String url)204     public static boolean isFileUrl(String url) {
205         return (null != url) && (url.startsWith(FILE_BASE) &&
206                                  !url.startsWith(ASSET_BASE) &&
207                                  !url.startsWith(PROXY_BASE));
208     }
209 
210     /**
211      * @return {@code true} if the url is an about: url.
212      */
isAboutUrl(String url)213     public static boolean isAboutUrl(String url) {
214         return (null != url) && url.startsWith("about:");
215     }
216 
217     /**
218      * @return {@code true} if the url is a data: url.
219      */
isDataUrl(String url)220     public static boolean isDataUrl(String url) {
221         return (null != url) && url.startsWith("data:");
222     }
223 
224     /**
225      * @return {@code true} if the url is a javascript: url.
226      */
isJavaScriptUrl(String url)227     public static boolean isJavaScriptUrl(String url) {
228         return (null != url) && url.startsWith("javascript:");
229     }
230 
231     /**
232      * @return {@code true} if the url is an http: url.
233      */
isHttpUrl(String url)234     public static boolean isHttpUrl(String url) {
235         return (null != url) &&
236                (url.length() > 6) &&
237                url.substring(0, 7).equalsIgnoreCase("http://");
238     }
239 
240     /**
241      * @return {@code true} if the url is an https: url.
242      */
isHttpsUrl(String url)243     public static boolean isHttpsUrl(String url) {
244         return (null != url) &&
245                (url.length() > 7) &&
246                url.substring(0, 8).equalsIgnoreCase("https://");
247     }
248 
249     /**
250      * @return {@code true} if the url is a network url.
251      */
isNetworkUrl(String url)252     public static boolean isNetworkUrl(String url) {
253         if (url == null || url.length() == 0) {
254             return false;
255         }
256         return isHttpUrl(url) || isHttpsUrl(url);
257     }
258 
259     /**
260      * @return {@code true} if the url is a content: url.
261      */
isContentUrl(String url)262     public static boolean isContentUrl(String url) {
263         return (null != url) && url.startsWith(CONTENT_BASE);
264     }
265 
266     /**
267      * @return {@code true} if the url is valid.
268      */
isValidUrl(String url)269     public static boolean isValidUrl(String url) {
270         if (url == null || url.length() == 0) {
271             return false;
272         }
273 
274         return (isAssetUrl(url) ||
275                 isResourceUrl(url) ||
276                 isFileUrl(url) ||
277                 isAboutUrl(url) ||
278                 isHttpUrl(url) ||
279                 isHttpsUrl(url) ||
280                 isJavaScriptUrl(url) ||
281                 isContentUrl(url));
282     }
283 
284     /**
285      * Strips the url of the anchor.
286      */
stripAnchor(String url)287     public static String stripAnchor(String url) {
288         int anchorIndex = url.indexOf('#');
289         if (anchorIndex != -1) {
290             return url.substring(0, anchorIndex);
291         }
292         return url;
293     }
294 
295     /**
296      * Guesses canonical filename that a download would have, using
297      * the URL and contentDisposition. File extension, if not defined,
298      * is added based on the mimetype
299      * @param url Url to the content
300      * @param contentDisposition Content-Disposition HTTP header or {@code null}
301      * @param mimeType Mime-type of the content or {@code null}
302      *
303      * @return suggested filename
304      */
guessFileName( String url, @Nullable String contentDisposition, @Nullable String mimeType)305     public static final String guessFileName(
306             String url,
307             @Nullable String contentDisposition,
308             @Nullable String mimeType) {
309         String filename = null;
310         String extension = null;
311 
312         // If we couldn't do anything with the hint, move toward the content disposition
313         if (filename == null && contentDisposition != null) {
314             filename = parseContentDisposition(contentDisposition);
315             if (filename != null) {
316                 int index = filename.lastIndexOf('/') + 1;
317                 if (index > 0) {
318                     filename = filename.substring(index);
319                 }
320             }
321         }
322 
323         // If all the other http-related approaches failed, use the plain uri
324         if (filename == null) {
325             String decodedUrl = Uri.decode(url);
326             if (decodedUrl != null) {
327                 int queryIndex = decodedUrl.indexOf('?');
328                 // If there is a query string strip it, same as desktop browsers
329                 if (queryIndex > 0) {
330                     decodedUrl = decodedUrl.substring(0, queryIndex);
331                 }
332                 if (!decodedUrl.endsWith("/")) {
333                     int index = decodedUrl.lastIndexOf('/') + 1;
334                     if (index > 0) {
335                         filename = decodedUrl.substring(index);
336                     }
337                 }
338             }
339         }
340 
341         // Finally, if couldn't get filename from URI, get a generic filename
342         if (filename == null) {
343             filename = "downloadfile";
344         }
345 
346         // Split filename between base and extension
347         // Add an extension if filename does not have one
348         int dotIndex = filename.indexOf('.');
349         if (dotIndex < 0) {
350             if (mimeType != null) {
351                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
352                 if (extension != null) {
353                     extension = "." + extension;
354                 }
355             }
356             if (extension == null) {
357                 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
358                     if (mimeType.equalsIgnoreCase("text/html")) {
359                         extension = ".html";
360                     } else {
361                         extension = ".txt";
362                     }
363                 } else {
364                     extension = ".bin";
365                 }
366             }
367         } else {
368             if (mimeType != null) {
369                 // Compare the last segment of the extension against the mime type.
370                 // If there's a mismatch, discard the entire extension.
371                 int lastDotIndex = filename.lastIndexOf('.');
372                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
373                         filename.substring(lastDotIndex + 1));
374                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
375                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
376                     if (extension != null) {
377                         extension = "." + extension;
378                     }
379                 }
380             }
381             if (extension == null) {
382                 extension = filename.substring(dotIndex);
383             }
384             filename = filename.substring(0, dotIndex);
385         }
386 
387         return filename + extension;
388     }
389 
390     /** Regex used to parse content-disposition headers */
391     private static final Pattern CONTENT_DISPOSITION_PATTERN =
392             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
393             Pattern.CASE_INSENSITIVE);
394 
395     /**
396      * Parse the Content-Disposition HTTP Header. The format of the header
397      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
398      * This header provides a filename for content that is going to be
399      * downloaded to the file system. We only support the attachment type.
400      * Note that RFC 2616 specifies the filename value must be double-quoted.
401      * Unfortunately some servers do not quote the value so to maintain
402      * consistent behaviour with other browsers, we allow unquoted values too.
403      */
404     @UnsupportedAppUsage
parseContentDisposition(String contentDisposition)405     static String parseContentDisposition(String contentDisposition) {
406         try {
407             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
408             if (m.find()) {
409                 return m.group(2);
410             }
411         } catch (IllegalStateException ex) {
412              // This function is defined as returning null when it can't parse the header
413         }
414         return null;
415     }
416 }
417