1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 package java.net; 28 29 import java.io.*; 30 31 /** 32 * Utility class for HTML form decoding. This class contains static methods 33 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> 34 * MIME format. 35 * <p> 36 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed 37 * that all characters in the encoded string are one of the following: 38 * "{@code a}" through "{@code z}", 39 * "{@code A}" through "{@code Z}", 40 * "{@code 0}" through "{@code 9}", and 41 * "{@code -}", "{@code _}", 42 * "{@code .}", and "{@code *}". The 43 * character "{@code %}" is allowed but is interpreted 44 * as the start of a special escaped sequence. 45 * <p> 46 * The following rules are applied in the conversion: 47 * 48 * <ul> 49 * <li>The alphanumeric characters "{@code a}" through 50 * "{@code z}", "{@code A}" through 51 * "{@code Z}" and "{@code 0}" 52 * through "{@code 9}" remain the same. 53 * <li>The special characters "{@code .}", 54 * "{@code -}", "{@code *}", and 55 * "{@code _}" remain the same. 56 * <li>The plus sign "{@code +}" is converted into a 57 * space character " " . 58 * <li>A sequence of the form "<i>{@code %xy}</i>" will be 59 * treated as representing a byte where <i>xy</i> is the two-digit 60 * hexadecimal representation of the 8 bits. Then, all substrings 61 * that contain one or more of these byte sequences consecutively 62 * will be replaced by the character(s) whose encoding would result 63 * in those consecutive bytes. 64 * The encoding scheme used to decode these characters may be specified, 65 * or if unspecified, the default encoding of the platform will be used. 66 * </ul> 67 * <p> 68 * There are two possible ways in which this decoder could deal with 69 * illegal strings. It could either leave illegal characters alone or 70 * it could throw an {@link java.lang.IllegalArgumentException}. 71 * Which approach the decoder takes is left to the 72 * implementation. 73 * 74 * @author Mark Chamness 75 * @author Michael McCloskey 76 * @since 1.2 77 */ 78 79 public class URLDecoder { 80 81 // The platform default encoding 82 static String dfltEncName = URLEncoder.dfltEncName; 83 84 /** 85 * Decodes a {@code x-www-form-urlencoded} string. 86 * The platform's default encoding is used to determine what characters 87 * are represented by any consecutive sequences of the form 88 * "<i>{@code %xy}</i>". 89 * @param s the {@code String} to decode 90 * @deprecated The resulting string may vary depending on the platform's 91 * default encoding. Instead, use the decode(String,String) method 92 * to specify the encoding. 93 * @return the newly decoded {@code String} 94 */ 95 @Deprecated decode(String s)96 public static String decode(String s) { 97 98 String str = null; 99 100 try { 101 str = decode(s, dfltEncName); 102 } catch (UnsupportedEncodingException e) { 103 // The system should always have the platform default 104 } 105 106 return str; 107 } 108 109 /** 110 * Decodes a {@code application/x-www-form-urlencoded} string using a specific 111 * encoding scheme. 112 * The supplied encoding is used to determine 113 * what characters are represented by any consecutive sequences of the 114 * form "<i>{@code %xy}</i>". 115 * <p> 116 * <em><strong>Note:</strong> The <a href= 117 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 118 * World Wide Web Consortium Recommendation</a> states that 119 * UTF-8 should be used. Not doing so may introduce 120 * incompatibilities.</em> 121 * 122 * @param s the {@code String} to decode 123 * @param enc The name of a supported 124 * <a href="../lang/package-summary.html#charenc">character 125 * encoding</a>. 126 * @return the newly decoded {@code String} 127 * @exception UnsupportedEncodingException 128 * If character encoding needs to be consulted, but 129 * named character encoding is not supported 130 * @see URLEncoder#encode(java.lang.String, java.lang.String) 131 * @since 1.4 132 */ decode(String s, String enc)133 public static String decode(String s, String enc) 134 throws UnsupportedEncodingException{ 135 136 boolean needToChange = false; 137 int numChars = s.length(); 138 StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars); 139 int i = 0; 140 141 if (enc.length() == 0) { 142 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); 143 } 144 145 char c; 146 byte[] bytes = null; 147 while (i < numChars) { 148 c = s.charAt(i); 149 switch (c) { 150 case '+': 151 sb.append(' '); 152 i++; 153 needToChange = true; 154 break; 155 case '%': 156 /* 157 * Starting with this instance of %, process all 158 * consecutive substrings of the form %xy. Each 159 * substring %xy will yield a byte. Convert all 160 * consecutive bytes obtained this way to whatever 161 * character(s) they represent in the provided 162 * encoding. 163 */ 164 165 try { 166 167 // (numChars-i)/3 is an upper bound for the number 168 // of remaining bytes 169 if (bytes == null) 170 bytes = new byte[(numChars-i)/3]; 171 int pos = 0; 172 173 while ( ((i+2) < numChars) && 174 (c=='%')) { 175 // BEGIN Android-changed: App compat. Forbid non-hex chars after '%'. 176 if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) { 177 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : " 178 + s.substring(i, i + 3)); 179 } 180 // END Android-changed: App compat. Forbid non-hex chars after '%'. 181 int v = Integer.parseInt(s.substring(i+1,i+3),16); 182 if (v < 0) 183 // Android-changed: Improve error message by printing the string value. 184 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : " 185 + s.substring(i, i + 3)); 186 bytes[pos++] = (byte) v; 187 i+= 3; 188 if (i < numChars) 189 c = s.charAt(i); 190 } 191 192 // A trailing, incomplete byte encoding such as 193 // "%x" will cause an exception to be thrown 194 195 if ((i < numChars) && (c=='%')) 196 throw new IllegalArgumentException( 197 "URLDecoder: Incomplete trailing escape (%) pattern"); 198 199 sb.append(new String(bytes, 0, pos, enc)); 200 } catch (NumberFormatException e) { 201 throw new IllegalArgumentException( 202 "URLDecoder: Illegal hex characters in escape (%) pattern - " 203 + e.getMessage()); 204 } 205 needToChange = true; 206 break; 207 default: 208 sb.append(c); 209 i++; 210 break; 211 } 212 } 213 214 return (needToChange? sb.toString() : s); 215 } 216 217 // BEGIN Android-added: App compat. Forbid non-hex chars after '%'. isValidHexChar(char c)218 private static boolean isValidHexChar(char c) { 219 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 220 } 221 // END Android-added: App compat. Forbid non-hex chars after '%'. 222 } 223