1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_CMDLINE_TOKEN_RANGE_H_
18 #define ART_CMDLINE_TOKEN_RANGE_H_
19 
20 #include <assert.h>
21 #include <algorithm>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "android-base/strings.h"
27 
28 namespace art {
29 // A range of tokens to make token matching algorithms easier.
30 //
31 // We try really hard to avoid copying and store only a pointer and iterators to the
32 // interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
33 // It is up to the user to play nice and not to mutate the strings in-place.
34 //
35 // Tokens are only copied if a mutating operation is performed (and even then only
36 // if it *actually* mutates the token).
37 struct TokenRange {
38   // Short-hand for a vector of strings. A single string and a token is synonymous.
39   using TokenList = std::vector<std::string>;
40 
41   // Copying-from-vector constructor.
TokenRangeTokenRange42   explicit TokenRange(const TokenList& token_list)
43     : token_list_(new TokenList(token_list)),
44       begin_(token_list_->begin()),
45       end_(token_list_->end())
46   {}
47 
48   // Copying-from-iterator constructor
49   template <typename ForwardIterator>
TokenRangeTokenRange50   TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
51     : token_list_(new TokenList(it_begin, it_end)),
52       begin_(token_list_->begin()),
53       end_(token_list_->end())
54   {}
55 
56 #if 0
57   // Copying-from-vector constructor.
58   TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
59              TokenList::const_iterator it_begin,
60              TokenList::const_iterator it_end)
61     : token_list_(new TokenList(it_begin, it_end)),
62       begin_(token_list_->begin()),
63       end_(token_list_->end()) {
64     assert(it_begin >= token_list.begin());
65     assert(it_end <= token_list.end());
66   }
67 #endif
68 
69   // Copying from char array constructor, convertings into tokens (strings) along the way.
TokenRangeTokenRange70   TokenRange(const char* token_list[], size_t length)
71     : token_list_(new TokenList(&token_list[0], &token_list[length])),
72       begin_(token_list_->begin()),
73       end_(token_list_->end())
74   {}
75 
76   // Non-copying move-from-vector constructor. Takes over the token vector.
TokenRangeTokenRange77   explicit TokenRange(TokenList&& token_list)
78     : token_list_(new TokenList(std::forward<TokenList>(token_list))),
79       begin_(token_list_->begin()),
80       end_(token_list_->end())
81   {}
82 
83   // Non-copying constructor. Retain reference to existing list of tokens.
TokenRangeTokenRange84   TokenRange(std::shared_ptr<TokenList> token_list,
85              TokenList::const_iterator it_begin,
86              TokenList::const_iterator it_end)
87     : token_list_(token_list),
88       begin_(it_begin),
89       end_(it_end) {
90     assert(it_begin >= token_list->begin());
91     assert(it_end <= token_list->end());
92   }
93 
94   // Non-copying copy constructor.
95   TokenRange(const TokenRange&) = default;
96 
97   // Non-copying move constructor.
98   TokenRange(TokenRange&&) = default;
99 
100   // Non-copying constructor. Retains reference to an existing list of tokens, with offset.
TokenRangeTokenRange101   explicit TokenRange(std::shared_ptr<TokenList> token_list)
102     : token_list_(token_list),
103       begin_(token_list_->begin()),
104       end_(token_list_->end())
105   {}
106 
107   // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
108   using iterator = TokenList::const_iterator;
109 
110   // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
111   using const_iterator = iterator;
112 
113   // Create a token range by splitting a string. Each separator gets their own token.
114   // Since the separator are retained as tokens, it might be useful to call
115   // RemoveToken afterwards.
SplitTokenRange116   static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
117     TokenList new_token_list;
118 
119     std::string tok;
120     for (auto&& c : string) {
121       for (char sep : separators) {
122         if (c == sep) {
123           // We spotted a separator character.
124           // Push back everything before the last separator as a new token.
125           // Push back the separator as a token.
126           if (!tok.empty()) {
127             new_token_list.push_back(tok);
128             tok = "";
129           }
130           new_token_list.push_back(std::string() + sep);
131         } else {
132           // Build up the token with another character.
133           tok += c;
134         }
135       }
136     }
137 
138     if (!tok.empty()) {
139       new_token_list.push_back(tok);
140     }
141 
142     return TokenRange(std::move(new_token_list));
143   }
144 
145   // A RandomAccessIterator to the first element in this range.
beginTokenRange146   iterator begin() const {
147     return begin_;
148   }
149 
150   // A RandomAccessIterator to one past the last element in this range.
endTokenRange151   iterator end() const {
152     return end_;
153   }
154 
155   // The size of the range, i.e. how many tokens are in it.
SizeTokenRange156   size_t Size() const {
157     return std::distance(begin_, end_);
158   }
159 
160   // Are there 0 tokens in this range?
IsEmptyTokenRange161   bool IsEmpty() const {
162     return Size() > 0;
163   }
164 
165   // Look up a token by it's offset.
GetTokenTokenRange166   const std::string& GetToken(size_t offset) const {
167     assert(offset < Size());
168     return *(begin_ + offset);
169   }
170 
171   // Does this token range equal the other range?
172   // Equality is defined as having both the same size, and
173   // each corresponding token being equal.
174   bool operator==(const TokenRange& other) const {
175     if (this == &other) {
176       return true;
177     }
178 
179     if (Size() != other.Size()) {
180       return false;
181     }
182 
183     return std::equal(begin(), end(), other.begin());
184   }
185 
186   // Look up the token at the requested index.
187   const std::string& operator[](int index) const {
188     assert(index >= 0 && static_cast<size_t>(index) < Size());
189     return *(begin() + index);
190   }
191 
192   // Does this current range start with the other range?
StartsWithTokenRange193   bool StartsWith(const TokenRange& other) const {
194     if (this == &other) {
195       return true;
196     }
197 
198     if (Size() < other.Size()) {
199       return false;
200     }
201 
202     auto& smaller = Size() < other.Size() ? *this : other;
203     auto& greater = Size() < other.Size() ? other : *this;
204 
205     return std::equal(smaller.begin(), smaller.end(), greater.begin());
206   }
207 
208   // Remove all characters 'c' from each token, potentially copying the underlying tokens.
RemoveCharacterTokenRange209   TokenRange RemoveCharacter(char c) const {
210     TokenList new_token_list(begin(), end());
211 
212     bool changed = false;
213     for (auto&& token : new_token_list) {
214       auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
215         if (ch == c) {
216           changed = true;
217           return true;
218         }
219         return false;
220       });
221       token.erase(it, token.end());
222     }
223 
224     if (!changed) {
225       return *this;
226     }
227 
228     return TokenRange(std::move(new_token_list));
229   }
230 
231   // Remove all tokens matching this one, potentially copying the underlying tokens.
RemoveTokenTokenRange232   TokenRange RemoveToken(const std::string& token) {
233     return RemoveIf([&](const std::string& tok) { return tok == token; });
234   }
235 
236   // Discard all empty tokens, potentially copying the underlying tokens.
DiscardEmptyTokenRange237   TokenRange DiscardEmpty() const {
238     return RemoveIf([](const std::string& token) { return token.empty(); });
239   }
240 
241   // Create a non-copying subset of this range.
242   // Length is trimmed so that the Slice does not go out of range.
243   TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
244     assert(offset < Size());
245 
246     if (length != std::string::npos && offset + length > Size()) {
247       length = Size() - offset;
248     }
249 
250     iterator it_end;
251     if (length == std::string::npos) {
252       it_end = end();
253     } else {
254       it_end = begin() + offset + length;
255     }
256 
257     return TokenRange(token_list_, begin() + offset, it_end);
258   }
259 
260   // Try to match the string with tokens from this range.
261   // Each token is used to match exactly once (after which the next token is used, and so on).
262   // The matching happens from left-to-right in a non-greedy fashion.
263   // If the currently-matched token is the wildcard, then the new outputted token will
264   // contain as much as possible until the next token is matched.
265   //
266   // For example, if this == ["a:", "_", "b:] and "_" is the match string, then
267   // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
268   //
269   // Since the string matching can fail (e.g. ["foo"] against "bar"), then this
270   // function can fail, in which cause it will return null.
MatchSubstringsTokenRange271   std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
272                                               const std::string& wildcard) const {
273     TokenList new_token_list;
274 
275     size_t wildcard_idx = std::string::npos;
276     size_t string_idx = 0;
277 
278     // Function to push all the characters matched as a wildcard so far
279     // as a brand new token. It resets the wildcard matching.
280     // Empty wildcards are possible and ok, but only if wildcard matching was on.
281     auto maybe_push_wildcard_token = [&]() {
282       if (wildcard_idx != std::string::npos) {
283         size_t wildcard_length = string_idx - wildcard_idx;
284         std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
285         new_token_list.push_back(std::move(wildcard_substr));
286 
287         wildcard_idx = std::string::npos;
288       }
289     };
290 
291     for (iterator it = begin(); it != end(); ++it) {
292       const std::string& tok = *it;
293 
294       if (tok == wildcard) {
295         maybe_push_wildcard_token();
296         wildcard_idx = string_idx;
297         continue;
298       }
299 
300       size_t next_token_idx = string.find(tok);
301       if (next_token_idx == std::string::npos) {
302         // Could not find token at all
303         return nullptr;
304       } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
305         // Found the token at a non-starting location, and we weren't
306         // trying to parse the wildcard.
307         return nullptr;
308       }
309 
310       new_token_list.push_back(string.substr(next_token_idx, tok.size()));
311       maybe_push_wildcard_token();
312       string_idx += tok.size();
313     }
314 
315     size_t remaining = string.size() - string_idx;
316     if (remaining > 0) {
317       if (wildcard_idx == std::string::npos) {
318         // Some characters were still remaining in the string,
319         // but it wasn't trying to match a wildcard.
320         return nullptr;
321       }
322     }
323 
324     // If some characters are remaining, the rest must be a wildcard.
325     string_idx += remaining;
326     maybe_push_wildcard_token();
327 
328     return std::make_unique<TokenRange>(std::move(new_token_list));
329   }
330 
331   // Do a quick match token-by-token, and see if they match.
332   // Any tokens with a wildcard in them are only matched up until the wildcard.
333   // If this is true, then the wildcard matching later on can still fail, so this is not
334   // a guarantee that the argument is correct, it's more of a strong hint that the
335   // user-provided input *probably* was trying to match this argument.
336   //
337   // Returns how many tokens were either matched (or ignored because there was a
338   // wildcard present). 0 means no match. If the size() tokens are returned.
MaybeMatchesTokenRange339   size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
340     auto token_it = token_list.begin();
341     auto token_end = token_list.end();
342     auto name_it = begin();
343     auto name_end = end();
344 
345     size_t matched_tokens = 0;
346 
347     while (token_it != token_end && name_it != name_end) {
348       // Skip token matching when the corresponding name has a wildcard in it.
349       const std::string& name = *name_it;
350 
351       size_t wildcard_idx = name.find(wildcard);
352       if (wildcard_idx == std::string::npos) {  // No wildcard present
353         // Did the definition token match the user token?
354         if (name != *token_it) {
355           return matched_tokens;
356         }
357       } else {
358         std::string name_prefix = name.substr(0, wildcard_idx);
359 
360         // Did the user token start with the up-to-the-wildcard prefix?
361         if (!StartsWith(*token_it, name_prefix)) {
362           return matched_tokens;
363         }
364       }
365 
366       ++token_it;
367       ++name_it;
368       ++matched_tokens;
369     }
370 
371     // If we got this far, it's either a full match or the token list was too short.
372     return matched_tokens;
373   }
374 
375   // Flatten the token range by joining every adjacent token with the separator character.
376   // e.g. ["hello", "world"].join('$') == "hello$world"
JoinTokenRange377   std::string Join(char separator) const {
378     TokenList tmp(begin(), end());
379     return android::base::Join(tmp, separator);
380     // TODO: Join should probably take an offset or iterators
381   }
382 
383  private:
StartsWithTokenRange384   static bool StartsWith(const std::string& larger, const std::string& smaller) {
385     if (larger.size() >= smaller.size()) {
386       return std::equal(smaller.begin(), smaller.end(), larger.begin());
387     }
388 
389     return false;
390   }
391 
392   template <typename TPredicate>
RemoveIfTokenRange393   TokenRange RemoveIf(const TPredicate& predicate) const {
394     // If any of the tokens in the token lists are empty, then
395     // we need to remove them and compress the token list into a smaller one.
396     bool remove = false;
397     for (auto it = begin_; it != end_; ++it) {
398       auto&& token = *it;
399 
400       if (predicate(token)) {
401         remove = true;
402         break;
403       }
404     }
405 
406     // Actually copy the token list and remove the tokens that don't match our predicate.
407     if (remove) {
408       auto token_list = std::make_shared<TokenList>(begin(), end());
409       TokenList::iterator new_end =
410           std::remove_if(token_list->begin(), token_list->end(), predicate);
411       token_list->erase(new_end, token_list->end());
412 
413       assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
414 
415       return TokenRange(token_list);
416     }
417 
418     return *this;
419   }
420 
421   const std::shared_ptr<std::vector<std::string>> token_list_;
422   const iterator begin_;
423   const iterator end_;
424 };
425 }  // namespace art
426 
427 #endif  // ART_CMDLINE_TOKEN_RANGE_H_
428