1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.util; 17 18 import com.android.ddmlib.Log; 19 20 import java.util.ArrayList; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 public class QuotationAwareTokenizer { 25 private static final String LOG_TAG = "TOKEN"; 26 27 /** 28 * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive, 29 * unquoted double-quote marks. 30 * 31 * <p>How the tokenizer works: 32 * 33 * <ol> 34 * <li> Split the string into "characters" where each "character" is either an escaped 35 * character like \" (that is, "\\\"") or a single real character like f (just "f"). 36 * <li> For each "character" 37 * <ol> 38 * <li> If it's a space, finish a token unless we're being quoted 39 * <li> If it's a quotation mark, flip the "we're being quoted" bit 40 * <li> Otherwise, add it to the token being built 41 * </ol> 42 * 43 * <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList} 44 * <ol> 45 * <li> If the last "character" is an escape character, throw an exception; that's not 46 * valid 47 * <li> If we're in the middle of a quotation, throw an exception; that's not valid 48 * <li> Otherwise, add the final token to (tokens) 49 * </ol> 50 * 51 * <li> Return a String[] version of (tokens) 52 * </ol> 53 * 54 * @param line A {@link String} to be tokenized 55 * @param delim the delimiter to split on 56 * @param logging whether or not to log operations 57 * @return A tokenized version of the string 58 * @throws IllegalArgumentException if the line cannot be parsed 59 */ tokenizeLine(String line, String delim, boolean logging)60 public static String[] tokenizeLine(String line, String delim, boolean logging) 61 throws IllegalArgumentException { 62 if (line == null) { 63 throw new IllegalArgumentException("line is null"); 64 } 65 66 ArrayList<String> tokens = new ArrayList<String>(); 67 StringBuilder token = new StringBuilder(); 68 // This pattern matches an escaped character or a character. Escaped char takes precedence 69 final Pattern charPattern = Pattern.compile("\\\\.|."); 70 final Matcher charMatcher = charPattern.matcher(line); 71 String aChar = ""; 72 boolean quotation = false; 73 74 log(String.format("Trying to tokenize the line '%s'", line), logging); 75 while (charMatcher.find()) { 76 aChar = charMatcher.group(); 77 78 if (delim.equals(aChar)) { 79 if (quotation) { 80 // inside a quotation; treat spaces as part of the token 81 token.append(aChar); 82 } else { 83 if (token.length() > 0) { 84 // this is the end of a non-empty token; dump it in our list of tokens, 85 // clear our temp storage, and keep rolling 86 log(String.format("Finished token '%s'", token.toString()), logging); 87 tokens.add(token.toString()); 88 token.delete(0, token.length()); 89 } 90 // otherwise, this is the non-first in a sequence of spaces; ignore. 91 } 92 } else if ("\"".equals(aChar)) { 93 // unescaped quotation mark; flip quotation state 94 log("Flipped quotation state", logging); 95 quotation ^= true; 96 } else { 97 // default case: add the character to the token being built 98 token.append(aChar); 99 } 100 } 101 102 if (quotation || "\\".equals(aChar)) { 103 // We ended in a quotation or with an escape character; this is not valid 104 throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " + 105 "character"); 106 } 107 108 // Add the final token to the tokens array. 109 if (token.length() > 0) { 110 log(String.format("Finished final token '%s'", token.toString()), logging); 111 tokens.add(token.toString()); 112 token.delete(0, token.length()); 113 } 114 115 String[] tokensArray = new String[tokens.size()]; 116 return tokens.toArray(tokensArray); 117 } 118 119 /** 120 * Tokenizes the string, splitting on spaces. Does not split between consecutive, 121 * unquoted double-quote marks. 122 * <p> 123 * See also {@link #tokenizeLine(String, String)} 124 */ tokenizeLine(String line)125 public static String[] tokenizeLine(String line) throws IllegalArgumentException { 126 return tokenizeLine(line, " ", true); 127 } 128 tokenizeLine(String line, String delim)129 public static String[] tokenizeLine(String line, String delim) throws IllegalArgumentException { 130 return tokenizeLine(line, delim, true); 131 } 132 133 /** 134 * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted 135 * double-quote marks. 136 * 137 * <p>See also {@link #tokenizeLine(String, String)} 138 */ tokenizeLine(String line, boolean logging)139 public static String[] tokenizeLine(String line, boolean logging) 140 throws IllegalArgumentException { 141 return tokenizeLine(line, " ", logging); 142 } 143 144 /** 145 * Perform the reverse of {@link #tokenizeLine(String)}. <br/> 146 * Given array of tokens, combine them into a single line. 147 * 148 * @param tokens 149 * @return A {@link String} created from all the tokens. 150 */ combineTokens(String... tokens)151 public static String combineTokens(String... tokens) { 152 final Pattern wsPattern = Pattern.compile("\\s"); 153 StringBuilder sb = new StringBuilder(); 154 for (int i=0; i < tokens.length; i++) { 155 final String token = tokens[i]; 156 final Matcher wsMatcher = wsPattern.matcher(token); 157 if (wsMatcher.find()) { 158 sb.append('"'); 159 sb.append(token); 160 sb.append('"'); 161 } else { 162 sb.append(token); 163 } 164 if (i < (tokens.length - 1)) { 165 // don't output space after last token 166 sb.append(' '); 167 } 168 } 169 return sb.toString(); 170 } 171 log(String message, boolean display)172 private static void log(String message, boolean display) { 173 if (display) { 174 Log.v(LOG_TAG, message); 175 } 176 } 177 } 178