1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.database.sqlite; 18 19 import android.annotation.NonNull; 20 import android.annotation.Nullable; 21 22 import java.util.ArrayList; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.function.Consumer; 26 27 /** 28 * SQL Tokenizer specialized to extract tokens from SQL (snippets). 29 * <p> 30 * Based on sqlite3GetToken() in tokenzie.c in SQLite. 31 * <p> 32 * Source for v3.8.6 (which android uses): http://www.sqlite.org/src/artifact/ae45399d6252b4d7 33 * (Latest source as of now: http://www.sqlite.org/src/artifact/78c8085bc7af1922) 34 * <p> 35 * Also draft spec: http://www.sqlite.org/draft/tokenreq.html 36 * 37 * @hide 38 */ 39 public class SQLiteTokenizer { isAlpha(char ch)40 private static boolean isAlpha(char ch) { 41 return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || (ch == '_'); 42 } 43 isNum(char ch)44 private static boolean isNum(char ch) { 45 return ('0' <= ch && ch <= '9'); 46 } 47 isAlNum(char ch)48 private static boolean isAlNum(char ch) { 49 return isAlpha(ch) || isNum(ch); 50 } 51 isAnyOf(char ch, String set)52 private static boolean isAnyOf(char ch, String set) { 53 return set.indexOf(ch) >= 0; 54 } 55 genException(String message, String sql)56 private static IllegalArgumentException genException(String message, String sql) { 57 throw new IllegalArgumentException(message + " in '" + sql + "'"); 58 } 59 peek(String s, int index)60 private static char peek(String s, int index) { 61 return index < s.length() ? s.charAt(index) : '\0'; 62 } 63 64 public static final int OPTION_NONE = 0; 65 66 /** 67 * Require that SQL contains only tokens; any comments or values will result 68 * in an exception. 69 */ 70 public static final int OPTION_TOKEN_ONLY = 1 << 0; 71 72 /** 73 * Tokenize the given SQL, returning the list of each encountered token. 74 * 75 * @throws IllegalArgumentException if invalid SQL is encountered. 76 */ tokenize(@ullable String sql, int options)77 public static List<String> tokenize(@Nullable String sql, int options) { 78 final ArrayList<String> res = new ArrayList<>(); 79 tokenize(sql, options, res::add); 80 return res; 81 } 82 83 /** 84 * Tokenize the given SQL, sending each encountered token to the given 85 * {@link Consumer}. 86 * 87 * @throws IllegalArgumentException if invalid SQL is encountered. 88 */ tokenize(@ullable String sql, int options, Consumer<String> checker)89 public static void tokenize(@Nullable String sql, int options, Consumer<String> checker) { 90 if (sql == null) { 91 return; 92 } 93 int pos = 0; 94 final int len = sql.length(); 95 while (pos < len) { 96 final char ch = peek(sql, pos); 97 98 // Regular token. 99 if (isAlpha(ch)) { 100 final int start = pos; 101 pos++; 102 while (isAlNum(peek(sql, pos))) { 103 pos++; 104 } 105 final int end = pos; 106 107 final String token = sql.substring(start, end); 108 checker.accept(token); 109 110 continue; 111 } 112 113 // Handle quoted tokens 114 if (isAnyOf(ch, "'\"`")) { 115 final int quoteStart = pos; 116 pos++; 117 118 for (;;) { 119 pos = sql.indexOf(ch, pos); 120 if (pos < 0) { 121 throw genException("Unterminated quote", sql); 122 } 123 if (peek(sql, pos + 1) != ch) { 124 break; 125 } 126 // Quoted quote char -- e.g. "abc""def" is a single string. 127 pos += 2; 128 } 129 final int quoteEnd = pos; 130 pos++; 131 132 if (ch != '\'') { 133 // Extract the token 134 final String tokenUnquoted = sql.substring(quoteStart + 1, quoteEnd); 135 136 final String token; 137 138 // Unquote if needed. i.e. "aa""bb" -> aa"bb 139 if (tokenUnquoted.indexOf(ch) >= 0) { 140 token = tokenUnquoted.replaceAll( 141 String.valueOf(ch) + ch, String.valueOf(ch)); 142 } else { 143 token = tokenUnquoted; 144 } 145 checker.accept(token); 146 } else { 147 if ((options &= OPTION_TOKEN_ONLY) != 0) { 148 throw genException("Non-token detected", sql); 149 } 150 } 151 continue; 152 } 153 // Handle tokens enclosed in [...] 154 if (ch == '[') { 155 final int quoteStart = pos; 156 pos++; 157 158 pos = sql.indexOf(']', pos); 159 if (pos < 0) { 160 throw genException("Unterminated quote", sql); 161 } 162 final int quoteEnd = pos; 163 pos++; 164 165 final String token = sql.substring(quoteStart + 1, quoteEnd); 166 167 checker.accept(token); 168 continue; 169 } 170 if ((options &= OPTION_TOKEN_ONLY) != 0) { 171 throw genException("Non-token detected", sql); 172 } 173 174 // Detect comments. 175 if (ch == '-' && peek(sql, pos + 1) == '-') { 176 pos += 2; 177 pos = sql.indexOf('\n', pos); 178 if (pos < 0) { 179 // We disallow strings ending in an inline comment. 180 throw genException("Unterminated comment", sql); 181 } 182 pos++; 183 184 continue; 185 } 186 if (ch == '/' && peek(sql, pos + 1) == '*') { 187 pos += 2; 188 pos = sql.indexOf("*/", pos); 189 if (pos < 0) { 190 throw genException("Unterminated comment", sql); 191 } 192 pos += 2; 193 194 continue; 195 } 196 197 // Semicolon is never allowed. 198 if (ch == ';') { 199 throw genException("Semicolon is not allowed", sql); 200 } 201 202 // For this purpose, we can simply ignore other characters. 203 // (Note it doesn't handle the X'' literal properly and reports this X as a token, 204 // but that should be fine...) 205 pos++; 206 } 207 } 208 209 /** 210 * Test if given token is a 211 * <a href="https://www.sqlite.org/lang_keywords.html">SQLite reserved 212 * keyword</a>. 213 */ isKeyword(@onNull String token)214 public static boolean isKeyword(@NonNull String token) { 215 switch (token.toUpperCase(Locale.US)) { 216 case "ABORT": case "ACTION": case "ADD": case "AFTER": 217 case "ALL": case "ALTER": case "ANALYZE": case "AND": 218 case "AS": case "ASC": case "ATTACH": case "AUTOINCREMENT": 219 case "BEFORE": case "BEGIN": case "BETWEEN": case "BINARY": 220 case "BY": case "CASCADE": case "CASE": case "CAST": 221 case "CHECK": case "COLLATE": case "COLUMN": case "COMMIT": 222 case "CONFLICT": case "CONSTRAINT": case "CREATE": case "CROSS": 223 case "CURRENT": case "CURRENT_DATE": case "CURRENT_TIME": case "CURRENT_TIMESTAMP": 224 case "DATABASE": case "DEFAULT": case "DEFERRABLE": case "DEFERRED": 225 case "DELETE": case "DESC": case "DETACH": case "DISTINCT": 226 case "DO": case "DROP": case "EACH": case "ELSE": 227 case "END": case "ESCAPE": case "EXCEPT": case "EXCLUDE": 228 case "EXCLUSIVE": case "EXISTS": case "EXPLAIN": case "FAIL": 229 case "FILTER": case "FOLLOWING": case "FOR": case "FOREIGN": 230 case "FROM": case "FULL": case "GLOB": case "GROUP": 231 case "GROUPS": case "HAVING": case "IF": case "IGNORE": 232 case "IMMEDIATE": case "IN": case "INDEX": case "INDEXED": 233 case "INITIALLY": case "INNER": case "INSERT": case "INSTEAD": 234 case "INTERSECT": case "INTO": case "IS": case "ISNULL": 235 case "JOIN": case "KEY": case "LEFT": case "LIKE": 236 case "LIMIT": case "MATCH": case "NATURAL": case "NO": 237 case "NOCASE": case "NOT": case "NOTHING": case "NOTNULL": 238 case "NULL": case "OF": case "OFFSET": case "ON": 239 case "OR": case "ORDER": case "OTHERS": case "OUTER": 240 case "OVER": case "PARTITION": case "PLAN": case "PRAGMA": 241 case "PRECEDING": case "PRIMARY": case "QUERY": case "RAISE": 242 case "RANGE": case "RECURSIVE": case "REFERENCES": case "REGEXP": 243 case "REINDEX": case "RELEASE": case "RENAME": case "REPLACE": 244 case "RESTRICT": case "RIGHT": case "ROLLBACK": case "ROW": 245 case "ROWS": case "RTRIM": case "SAVEPOINT": case "SELECT": 246 case "SET": case "TABLE": case "TEMP": case "TEMPORARY": 247 case "THEN": case "TIES": case "TO": case "TRANSACTION": 248 case "TRIGGER": case "UNBOUNDED": case "UNION": case "UNIQUE": 249 case "UPDATE": case "USING": case "VACUUM": case "VALUES": 250 case "VIEW": case "VIRTUAL": case "WHEN": case "WHERE": 251 case "WINDOW": case "WITH": case "WITHOUT": 252 return true; 253 default: 254 return false; 255 } 256 } 257 258 /** 259 * Test if given token is a 260 * <a href="https://www.sqlite.org/lang_corefunc.html">SQLite reserved 261 * function</a>. 262 */ isFunction(@onNull String token)263 public static boolean isFunction(@NonNull String token) { 264 switch (token.toLowerCase(Locale.US)) { 265 case "abs": case "avg": case "char": case "coalesce": 266 case "count": case "glob": case "group_concat": case "hex": 267 case "ifnull": case "instr": case "length": case "like": 268 case "likelihood": case "likely": case "lower": case "ltrim": 269 case "max": case "min": case "nullif": case "random": 270 case "randomblob": case "replace": case "round": case "rtrim": 271 case "substr": case "sum": case "total": case "trim": 272 case "typeof": case "unicode": case "unlikely": case "upper": 273 case "zeroblob": 274 return true; 275 default: 276 return false; 277 } 278 } 279 280 /** 281 * Test if given token is a 282 * <a href="https://www.sqlite.org/datatype3.html">SQLite reserved type</a>. 283 */ isType(@onNull String token)284 public static boolean isType(@NonNull String token) { 285 switch (token.toUpperCase(Locale.US)) { 286 case "INT": case "INTEGER": case "TINYINT": case "SMALLINT": 287 case "MEDIUMINT": case "BIGINT": case "INT2": case "INT8": 288 case "CHARACTER": case "VARCHAR": case "NCHAR": case "NVARCHAR": 289 case "TEXT": case "CLOB": case "BLOB": case "REAL": 290 case "DOUBLE": case "FLOAT": case "NUMERIC": case "DECIMAL": 291 case "BOOLEAN": case "DATE": case "DATETIME": 292 return true; 293 default: 294 return false; 295 } 296 } 297 } 298