1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.util;
17 
18 import com.android.ddmlib.Log;
19 
20 import java.util.ArrayList;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23 
24 public class QuotationAwareTokenizer {
25     private static final String LOG_TAG = "TOKEN";
26 
27     /**
28      * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive,
29      * unquoted double-quote marks.
30      *
31      * <p>How the tokenizer works:
32      *
33      * <ol>
34      *   <li> Split the string into "characters" where each "character" is either an escaped
35      *       character like \" (that is, "\\\"") or a single real character like f (just "f").
36      *   <li> For each "character"
37      *       <ol>
38      *         <li> If it's a space, finish a token unless we're being quoted
39      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
40      *         <li> Otherwise, add it to the token being built
41      *       </ol>
42      *
43      *   <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
44      *       <ol>
45      *         <li> If the last "character" is an escape character, throw an exception; that's not
46      *             valid
47      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
48      *         <li> Otherwise, add the final token to (tokens)
49      *       </ol>
50      *
51      *   <li> Return a String[] version of (tokens)
52      * </ol>
53      *
54      * @param line A {@link String} to be tokenized
55      * @param delim the delimiter to split on
56      * @param logging whether or not to log operations
57      * @return A tokenized version of the string
58      * @throws IllegalArgumentException if the line cannot be parsed
59      */
tokenizeLine(String line, String delim, boolean logging)60     public static String[] tokenizeLine(String line, String delim, boolean logging)
61             throws IllegalArgumentException {
62         if (line == null) {
63             throw new IllegalArgumentException("line is null");
64         }
65 
66         ArrayList<String> tokens = new ArrayList<String>();
67         StringBuilder token = new StringBuilder();
68         // This pattern matches an escaped character or a character.  Escaped char takes precedence
69         final Pattern charPattern = Pattern.compile("\\\\.|.");
70         final Matcher charMatcher = charPattern.matcher(line);
71         String aChar = "";
72         boolean quotation = false;
73 
74         log(String.format("Trying to tokenize the line '%s'", line), logging);
75         while (charMatcher.find()) {
76             aChar = charMatcher.group();
77 
78             if (delim.equals(aChar)) {
79                 if (quotation) {
80                     // inside a quotation; treat spaces as part of the token
81                     token.append(aChar);
82                 } else {
83                     if (token.length() > 0) {
84                         // this is the end of a non-empty token; dump it in our list of tokens,
85                         // clear our temp storage, and keep rolling
86                         log(String.format("Finished token '%s'", token.toString()), logging);
87                         tokens.add(token.toString());
88                         token.delete(0, token.length());
89                     }
90                     // otherwise, this is the non-first in a sequence of spaces; ignore.
91                 }
92             } else if ("\"".equals(aChar)) {
93                 // unescaped quotation mark; flip quotation state
94                 log("Flipped quotation state", logging);
95                 quotation ^= true;
96             } else {
97                 // default case: add the character to the token being built
98                 token.append(aChar);
99             }
100         }
101 
102         if (quotation || "\\".equals(aChar)) {
103             // We ended in a quotation or with an escape character; this is not valid
104             throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " +
105                     "character");
106         }
107 
108         // Add the final token to the tokens array.
109         if (token.length() > 0) {
110             log(String.format("Finished final token '%s'", token.toString()), logging);
111             tokens.add(token.toString());
112             token.delete(0, token.length());
113         }
114 
115         String[] tokensArray = new String[tokens.size()];
116         return tokens.toArray(tokensArray);
117     }
118 
119     /**
120      * Tokenizes the string, splitting on spaces.  Does not split between consecutive,
121      * unquoted double-quote marks.
122      * <p>
123      * See also {@link #tokenizeLine(String, String)}
124      */
tokenizeLine(String line)125     public static String[] tokenizeLine(String line) throws IllegalArgumentException {
126         return tokenizeLine(line, " ", true);
127     }
128 
tokenizeLine(String line, String delim)129     public static String[] tokenizeLine(String line, String delim) throws IllegalArgumentException {
130         return tokenizeLine(line, delim, true);
131     }
132 
133     /**
134      * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
135      * double-quote marks.
136      *
137      * <p>See also {@link #tokenizeLine(String, String)}
138      */
tokenizeLine(String line, boolean logging)139     public static String[] tokenizeLine(String line, boolean logging)
140             throws IllegalArgumentException {
141         return tokenizeLine(line, " ", logging);
142     }
143 
144     /**
145      * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
146      * Given array of tokens, combine them into a single line.
147      *
148      * @param tokens
149      * @return A {@link String} created from all the tokens.
150      */
combineTokens(String... tokens)151     public static String combineTokens(String... tokens) {
152         final Pattern wsPattern = Pattern.compile("\\s");
153         StringBuilder sb = new StringBuilder();
154         for (int i=0; i < tokens.length; i++) {
155             final String token = tokens[i];
156             final Matcher wsMatcher = wsPattern.matcher(token);
157             if (wsMatcher.find()) {
158                 sb.append('"');
159                 sb.append(token);
160                 sb.append('"');
161             } else {
162                 sb.append(token);
163             }
164             if (i < (tokens.length - 1)) {
165                 // don't output space after last token
166                 sb.append(' ');
167             }
168         }
169         return sb.toString();
170     }
171 
log(String message, boolean display)172     private static void log(String message, boolean display) {
173         if (display) {
174             Log.v(LOG_TAG, message);
175         }
176     }
177 }
178