1 // Copyright (C) 2018 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "common/debug.h"
16 #include "inode2filename/search_directories.h"
17 #include "inode2filename/system_call.h"
18 
19 #include <android-base/file.h>
20 #include <android-base/logging.h>
21 #include <android-base/scopeguard.h>
22 #include <android-base/stringprintf.h>
23 #include <android-base/unique_fd.h>
24 
25 #include "rxcpp/rx.hpp"
26 
27 #include <iostream>
28 #include <stdio.h>
29 #include <fstream>
30 #include <vector>
31 #include <optional>
32 
33 #include <signal.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 
37 #include <sys/types.h>
38 
39 #ifdef __ANDROID__
40 #include <sys/sysmacros.h>
41 #endif
42 
43 #include <sys/stat.h>
44 #include <fcntl.h>
45 #include <poll.h>
46 #include <dirent.h>
47 
48 #include <unordered_map>
49 
50 namespace rx = rxcpp;
51 using android::base::unique_fd;  // NOLINT
52 using android::base::StringPrintf;  // NOLINT
53 
54 namespace iorap::inode2filename {
55 
56 // A multimap of 'ino_t -> List[Inode]' (where the value Inodes have the same ino_t as the key).
57 //
58 // A flat list of Inodes is turned into the above map, then keys can be removed one at a time
59 // until the InodeSet eventually becomes empty.
60 struct InodeSet {
61   struct ValueRange {
beginiorap::inode2filename::InodeSet::ValueRange62     auto/*Iterable<Inode>*/ begin() {
63       return begin_;
64     }
65 
endiorap::inode2filename::InodeSet::ValueRange66     auto/*Iterable<Inode>*/ end() {
67       return end_;
68     }
69 
emptyiorap::inode2filename::InodeSet::ValueRange70     bool empty() const {
71       return begin_ == end_;
72     }
73 
operator booliorap::inode2filename::InodeSet::ValueRange74     explicit operator bool() const {
75       return !empty();
76     }
77 
78     std::unordered_multimap<ino_t, Inode>::iterator begin_, end_;
79 
80     friend std::ostream& operator<<(std::ostream& os, const ValueRange& s);
81   };
82 
83   // Create an observable that emits the remaining inodes in the map.
84   //
85   // Mutation functions must not be called until this observable
86   // has been finished emitting all values (e.g. with on_completed) since that
87   // would cause the underlying iterators to go into an undefined state.
IterateValuesiorap::inode2filename::InodeSet88   auto/*observable<Inode>*/ IterateValues() const {
89     return rxcpp::observable<>::iterate(set_).map(  // XX: should we use identity_immediate here?
90         [](const std::pair<const ino_t, Inode>& pair) {
91           return pair.second;
92         }
93     );
94     // TODO: this would be more efficient as a range-v3 view.
95   }
96 
Emptyiorap::inode2filename::InodeSet97   constexpr bool Empty() const {
98     return set_.empty();
99   }
100 
OfListiorap::inode2filename::InodeSet101   static InodeSet OfList(const std::vector<Inode>& list) {
102     InodeSet new_inode_set;
103     std::unordered_multimap<ino_t, Inode>* map = &new_inode_set.set_;
104 
105     for (const Inode& inode : list) {
106       map->insert({inode.inode, inode});
107     }
108 
109     return new_inode_set;
110   }
111 
112   // Return an optional list of 'Inode' structs whose 'inode' field matches the 'inode' parameter.
113   // Returns an empty range if there was nothing found.
FindInodeListiorap::inode2filename::InodeSet114   ValueRange FindInodeList(ino_t inode) {
115     auto range = set_.equal_range(inode);
116     return ValueRange{range.first, range.second};
117   }
118 
119   // Match all fields of an Inode against a 'struct stat' stat_buf.
120   //
121   // The returned Inode (if any) is removed from the InodeSet; it will not be returned by
122   // FindInodeList in future calls.
FindAndRemoveInodeInListiorap::inode2filename::InodeSet123   std::optional<Inode> FindAndRemoveInodeInList(ValueRange inode_list,
124                                                 const struct stat& stat_buf) {
125     LOG(VERBOSE) << "FindAndRemoveInodeInList " << inode_list << ", "
126                  << "stat_buf{st_dev=" << stat_buf.st_dev << ",st_ino=" << stat_buf.st_ino << "}";
127 
128     auto /*iterator*/ found = std::find_if(inode_list.begin(),
129                                            inode_list.end(),
130                                            [&](const std::pair<ino_t, Inode>& pair) {
131       const Inode& inode = pair.second;
132       if (inode.inode != stat_buf.st_ino) {
133         return false;
134       }
135 
136       dev_t inode_dev =
137           makedev(static_cast<int>(inode.device_major), static_cast<int>(inode.device_minor));
138 
139       // Inodes could be the same across different devices.
140       // Also match the device id.
141       if (inode_dev != stat_buf.st_dev) {
142         LOG(VERBOSE) << "InodeSet:FindAndRemoveInodeInList matched ino: " << inode.inode
143                      << " but not device"
144                      << ", expected dev: " << stat_buf.st_dev
145                      << ", actual dev: " << inode_dev;
146         return false;
147       }
148       return true;
149     });
150 
151     if (found != inode_list.end()) {
152       const Inode& inode = found->second;
153       LOG(VERBOSE) << "InodeSet:FindAndRemoveInodeInList *success* inode+device " << inode;
154       DCHECK(found->second.inode == stat_buf.st_ino);
155       // Erase the inode from the list. This is important.
156       set_.erase(found);
157       return inode;
158     }
159 
160     return std::nullopt;
161   }
162 
163   // TODO: equality and string operators for testing/logging.
164  private:
165   // Explanation: readdir returns a 'file' -> 'ino_t inode' mapping.
166   //
167   // However inodes can be reused on different partitions (but they have a different device number).
168   // To handle this edge case, and to avoid calling stat whenever the inode definitely doesn't match
169   // store the inodes into a single-key,multi-value container.
170   //
171   // This enables fast scanning of readdir results by matching just the 'inode' portion,
172   // then calling stat only when the inode portion definitely matches to confirm the device.
173 
174   // There are no single-key multi-value containers in standard C++, so pretend
175   // we have one by writing this simple facade around an unordered set.
176   //
177   // We expect that the vector size is usually size=1 (or 2 or 3) since the # of devices
178   // is fixed by however many partitions there are on the system, AND the same inode #
179   // would have to be reused across a different file.
180   std::unordered_multimap<ino_t, Inode> set_;  // TODO: Rename to map_.
181 
182   friend std::ostream& operator<<(std::ostream& os, const InodeSet& s);
183 };
184 
operator <<(std::ostream & os,const InodeSet & s)185 std::ostream& operator<<(std::ostream& os, const InodeSet& s) {
186   os << "InodeSet{";
187   for (const auto& kv : s.set_) {
188     // e.g. "123=>(1:2:123)" ... its expected for the 'ino_t' portion to be repeated.
189     os << "" << kv.first << "=>(" << kv.second << "),";
190   }
191   os << "}";
192   return os;
193 }
194 
operator <<(std::ostream & os,const InodeSet::ValueRange & v)195 std::ostream& operator<<(std::ostream& os, const InodeSet::ValueRange& v) {
196   // Don't want to make a const and non const version of ValueRange.
197   InodeSet::ValueRange& s = const_cast<InodeSet::ValueRange&>(v);
198 
199   os << "InodeSet::ValueRange{";
200   for (const auto& kv : s) {
201     // e.g. "123=>(1:2:123)" ... its expected for the 'ino_t' portion to be repeated.
202     os << "" << kv.first << "=>(" << kv.second << "),";
203   }
204   os << "}";
205   return os;
206 }
207 
208 void search_for_inodes_in(std::vector<Inode>& inode_list, const std::string& dirpath);
209 
210 enum DirectoryEntryErrorCode {
211   kInvalid,    // not a real error code. to detect bad initialization.
212   kOpenDir,    // opendir failed.
213   kReadDir,    // readdir failed.
214   kDtUnknown,  // d_type was DT_UNKNOWN error.
215 };
216 
217 struct DirectoryEntryError {
218   DirectoryEntryErrorCode code;
219   int err_no;
220   std::string filename;
221 };
222 
operator <<(std::ostream & os,const DirectoryEntryError & e)223 std::ostream& operator<<(std::ostream& os, const DirectoryEntryError& e) {
224   os << "DirectoryEntryError{"
225      << static_cast<int>(e.code) << "," << e.err_no << "," << e.filename << "}";
226   return os;
227   // TODO: pretty-print code and err-no
228 }
229 
230 static common::DebugCounter gDebugDirectoryEntryCounter{};
231 static constexpr bool kDebugDirectoryEntry = false;
232 
233 #define DIRECTORY_ENTRY_MOVE_DCHECK() \
234     DCHECK_EQ(other.moved_from_, false) << __PRETTY_FUNCTION__ << "CNT:" << other.debug_counter_;
235 #define DIRECTORY_ENTRY_TRACE_CTOR() \
236     if (kDebugDirectoryEntry) LOG(VERBOSE) << __PRETTY_FUNCTION__ << "@CNT:" << debug_counter_
237 
238 struct DirectoryEntry {
239   using ResultT = iorap::expected<DirectoryEntry, DirectoryEntryError>;
240   using ObservableT = rx::observable<ResultT>;
241 
242   static constexpr ino_t kInvalidIno = std::numeric_limits<ino_t>::max();
243   static constexpr auto kInvalidFileName = "";
244 
245   // Path to file, the prefix is one of the root directories.
246   std::string filename{kInvalidFileName};
247   // Inode number of the file. Not unique across different devices.
248   ino_t d_ino{kInvalidIno};
249   // File type (DT_LNK, DT_REG, DT_DIR, or DT_UNKNOWN)
250   unsigned char d_type{DT_UNKNOWN};  // Note: not seen outside of sentinel roots.
251   // TODO: Consider invariant checks for valid combinations of above fields?
252 
253   // Debug-only flags.
254   bool moved_from_{false};
255   size_t debug_counter_{0};
256 
257  private:
258   // TODO: remove default constructor?
259   //
260   // SEEMS TO BE USED by std::vector etc. FIX DAT.
DirectoryEntryiorap::inode2filename::DirectoryEntry261   DirectoryEntry() noexcept {
262     debug_counter_ = gDebugDirectoryEntryCounter++;
263     DIRECTORY_ENTRY_TRACE_CTOR();
264   }
265  public:
DirectoryEntryiorap::inode2filename::DirectoryEntry266   DirectoryEntry(std::string filename, ino_t d_ino, unsigned char d_type) noexcept
267     : filename{std::move(filename)},
268       d_ino{d_ino},
269       d_type{d_type} {
270     debug_counter_ = gDebugDirectoryEntryCounter++;
271     DIRECTORY_ENTRY_TRACE_CTOR();
272   }
273 
DirectoryEntryiorap::inode2filename::DirectoryEntry274   DirectoryEntry(const DirectoryEntry& other) noexcept {
275     // Do not use member-initialization syntax so that this DCHECK can execute first.
276     DIRECTORY_ENTRY_MOVE_DCHECK();
277 
278     filename = other.filename;
279     d_ino = other.d_ino;
280     d_type = other.d_type;
281     children_paths_ = other.children_paths_;
282     children_initialized_ = other.children_initialized_;
283     debug_counter_ = other.debug_counter_;
284     DIRECTORY_ENTRY_TRACE_CTOR();
285   }
286 
operator =iorap::inode2filename::DirectoryEntry287   DirectoryEntry& operator=(const DirectoryEntry& other) noexcept {
288     if (this == &other) {
289       return *this;
290     }
291 
292     DIRECTORY_ENTRY_MOVE_DCHECK();
293 
294     filename = other.filename;
295     d_ino = other.d_ino;
296     d_type = other.d_type;
297     children_paths_ = other.children_paths_;
298     children_initialized_ = other.children_initialized_;
299     debug_counter_ = other.debug_counter_;
300     DIRECTORY_ENTRY_TRACE_CTOR();
301 
302     return *this;
303   }
304 
operator =iorap::inode2filename::DirectoryEntry305   DirectoryEntry& operator=(DirectoryEntry&& other) noexcept {
306     if (this == &other) {
307       return *this;
308     }
309 
310     DIRECTORY_ENTRY_MOVE_DCHECK();
311 
312     filename = std::move(other.filename);
313     d_ino = other.d_ino;
314     d_type = other.d_type;
315     children_paths_ = std::move(other.children_paths_);
316     children_initialized_ = other.children_initialized_;
317     debug_counter_ = other.debug_counter_;
318     DIRECTORY_ENTRY_TRACE_CTOR();
319 
320     return *this;
321   }
322 
DirectoryEntryiorap::inode2filename::DirectoryEntry323   DirectoryEntry(DirectoryEntry&& other) noexcept {
324     DIRECTORY_ENTRY_MOVE_DCHECK();
325     other.moved_from_ = true;
326 
327     filename = std::move(other.filename);
328     d_ino = other.d_ino;
329     d_type = other.d_type;
330     children_paths_ = std::move(other.children_paths_);
331     children_initialized_ = other.children_initialized_;
332     debug_counter_ = other.debug_counter_;
333     DIRECTORY_ENTRY_TRACE_CTOR();
334   }
335 
336   // Create a sentinel (root of roots) whose children entries are those specified by
337   // children_paths.
CreateSentineliorap::inode2filename::DirectoryEntry338   static DirectoryEntry CreateSentinel(std::vector<std::string> children_paths) {
339     DirectoryEntry e;
340     e.d_type = DT_DIR;
341     ++gDebugDirectoryEntryCounter;
342 
343     for (std::string& child_path : children_paths) {
344       // TODO: Should we call Stat on the child path here to reconstitute the ino_t for a root dir?
345       // Otherwise it can look a little strange (i.e. the root dir itself will never match
346       // the searched inode).
347       //
348       // Probably not too big of a problem in practice.
349       DirectoryEntry child_entry{std::move(child_path), kInvalidIno, DT_DIR};
350       ResultT child_entry_as_result{std::move(child_entry)};
351       e.children_paths_.push_back(std::move(child_entry_as_result));
352     }
353 
354     e.children_initialized_ = true;
355 
356     return e;
357   }
358 
359   // Return an observable which emits the direct children only.
360   // The children entries are now read from disk (with readdir) if they weren't read previously.
GetChildrenEntriesiorap::inode2filename::DirectoryEntry361   std::vector<ResultT> GetChildrenEntries(borrowed<SystemCall*> system_call) const& {
362     BuildChildrenPaths(system_call);
363     return children_paths_;
364   }
365 
366   // Return an observable which emits the direct children only.
367   // The children entries are now read from disk (with readdir) if they weren't read previously.
368   // Movable overload.
GetChildrenEntriesiorap::inode2filename::DirectoryEntry369   std::vector<ResultT> GetChildrenEntries(borrowed<SystemCall*> system_call) && {
370     BuildChildrenPaths(system_call);
371     return std::move(children_paths_);
372   }
373 
374   // Returns a (lazy) observable that emits every single node, in pre-order,
375   // rooted at this tree.
376   //
377   // New entries are only read from disk (with e.g. readdir) when more values are pulled
378   // from the observable. Only the direct children of any entry are read at any time.
379   //
380   // The emission can be stopped prematurely by unsubscribing from the observable.
381   // This means the maximum amount of 'redundant' IO reads is bounded by the children count
382   // of all entries emitted thus far minus entries actually emitted.
383   ObservableT GetSubTreePreOrderEntries(borrowed<SystemCall*> system_call) const;
384 
385  private:
386   // Out-of-line definition to avoid circular type dependency.
387   void BuildChildrenPaths(borrowed<SystemCall*> system_call) const;
388 
389   // We need to lazily initialize children_paths_ only when we try to read them.
390   //
391   // Assuming the underlying file system doesn't change (which isn't strictly true),
392   // the directory children are referentially transparent.
393   //
394   // In practice we do not need to distinguish between the file contents changing out
395   // from under us in this code, so we don't need the more strict requirements.
396   mutable std::vector<ResultT> children_paths_;
397   mutable bool children_initialized_{false};
398 
399   friend std::ostream& operator<<(std::ostream& os, const DirectoryEntry& d);
400 };
401 
operator <<(std::ostream & os,const DirectoryEntry & d)402 std::ostream& operator<<(std::ostream& os, const DirectoryEntry& d) {
403   os << "DirectoryEntry{" << d.filename << ",ino:" << d.d_ino << ",type:" << d.d_type << "}";
404   return os;
405 }
406 
407 using DirectoryEntryResult = DirectoryEntry::ResultT;
408 
409 // Read all directory entries and return it as a vector. This must be an eager operation,
410 // as readdir is not re-entrant.
411 //
412 // This could be considered as a limitation from the 'observable' perspective since
413 // one can end up reading unnecessary extra directory entries that are then never consumed.
414 //
415 // The following entries are skipped:
416 //  - '.' self
417 //  - ".." parent
418 //
419 // All DT types except the following are removed:
420 //  * DT_LNK - symbolic link (empty children)
421 //  * DT_REG - regular file  (empty children)
422 //  * DT_DIR - directory     (has children)
423 static std::vector<DirectoryEntryResult>
ReadDirectoryEntriesFromDirectoryPath(std::string dirpath,borrowed<SystemCall * > system_call)424     ReadDirectoryEntriesFromDirectoryPath(std::string dirpath, borrowed<SystemCall*> system_call) {
425   DIR *dirp;
426   struct dirent *dp;
427 
428   LOG(VERBOSE) << "ReadDirectoryEntriesFromDirectoryPath(" << dirpath << ")";
429 
430   if ((dirp = system_call->opendir(dirpath.c_str())) == nullptr) {
431     PLOG(ERROR) << "Couldn't open directory: " << dirpath;
432     return {DirectoryEntryError{kOpenDir, errno, dirpath}};
433   }
434 
435   // Read all the results up front because readdir is not re-entrant.
436   std::vector<DirectoryEntryResult> results;
437 
438   // Get full path + the directory entry path.
439   auto child_path = [&] { return dirpath + "/" + dp->d_name; };
440 
441   do {
442     errno = 0;
443     if ((dp = system_call->readdir(dirp)) != nullptr) {
444       if (dp->d_type == DT_DIR) {
445         if (strcmp(".", dp->d_name) == 0 || strcmp("..", dp->d_name) == 0) {
446           LOG(VERBOSE) << "Skip self/parent: " << dp->d_name;
447           continue;
448         }
449 
450         LOG(VERBOSE) << "Find entry " << child_path()
451                      << ", ino: " << dp->d_ino << ", type: " << dp->d_type;
452         results.push_back(DirectoryEntry{child_path(),
453                                          static_cast<ino_t>(dp->d_ino),
454                                          dp->d_type});
455       } else if (dp->d_type == DT_UNKNOWN) {
456         // This seems bad if it happens. We should probably do something about this.
457         LOG(WARNING) << "Found unknown DT entry: " << child_path();
458 
459         results.push_back(DirectoryEntryError{kDtUnknown, /*errno*/0, child_path()});
460       } else if (dp->d_type == DT_LNK || dp->d_type == DT_REG) {
461         // Regular non-directory file entry.
462         results.push_back(DirectoryEntry{child_path(),
463                                          static_cast<ino_t>(dp->d_ino),
464                                          dp->d_type});
465       } else {
466         // Block device, character device, socket, etc...
467         LOG(VERBOSE) << "Skip DT entry of type: " << dp->d_type << " " << child_path();
468       }
469     } else if (errno != 0) {
470       PLOG(ERROR) << "Error reading directory entry in " << dirpath;
471 
472       results.push_back(DirectoryEntryError{kReadDir, errno, dirpath});
473     }
474   } while (dp != nullptr);
475 
476   if (system_call->closedir(dirp) < 0) {
477     PLOG(ERROR) << "Failed to close directory " << dirpath;
478   }
479 
480   return results;
481 }
482 
BuildChildrenPaths(borrowed<SystemCall * > system_call) const483 void DirectoryEntry::BuildChildrenPaths(borrowed<SystemCall*> system_call) const {
484   if (children_initialized_) {
485     return;
486   }
487 
488   if (d_type == DT_DIR) {
489     children_paths_ = ReadDirectoryEntriesFromDirectoryPath(filename, system_call);
490     // TODO: consider using dependency injection here to substitute this function during testing?
491   }
492 }
493 
494 struct InodeSearchParameters {
495   std::vector<Inode> inode_list;
496   std::vector<std::string> root_dirs;
497 };
498 
499 // [IN]
500 // observable: expected<Value, Error>, ...
501 // [OUT]
502 // observable: Value, ...
503 //
504 // Any encountered 'Error' items are dropped after logging.
505 template <typename T>
MapExpectedOrLog(T && observable,::android::base::LogSeverity log_level)506 auto MapExpectedOrLog(T&& observable,
507                       ::android::base::LogSeverity log_level) {
508   return observable.filter([log_level](const auto& result) {
509     if (result) {
510       return true;
511     } else {
512       LOG(log_level) << result.error();
513       return false;
514     }
515   }).map([](auto&& result) {
516     return IORAP_FORWARD_LAMBDA(result).value();
517   });
518 }
519 
520 template <typename T>
MapExpectedOrLogError(T && observable)521 auto MapExpectedOrLogError(T&& observable) {
522   return MapExpectedOrLog(std::forward<T>(observable), ::android::base::ERROR);
523 }
524 
525 template <typename T>
MapOptionalOrDrop(T && observable)526 auto MapOptionalOrDrop(T&& observable) {
527   return observable.filter([](const auto& result) {
528     return result.has_value();
529   }).map([](auto&& result) {
530     return IORAP_FORWARD_LAMBDA(result).value();
531   });
532   // TODO: static_assert this isn't used with an unexpected.
533 }
534 
535 template <typename T, typename F>
VisitValueOrLogError(T && expected,F && visit_func,const char * error_prefix="")536 auto VisitValueOrLogError(T&& expected, F&& visit_func, const char* error_prefix = "") {
537   if (!expected) {
538     LOG(ERROR) << error_prefix << " " << expected.error();
539   } else {
540     visit_func(std::forward<T>(expected).value());
541   }
542   // TODO: Could be good to make this more monadic by returning an optional.
543 }
544 
545 template <typename TSimple, typename T, typename F>
TreeTraversalPreOrderObservableImpl(rx::subscriber<TSimple> dest,T && node,F && fn)546 void TreeTraversalPreOrderObservableImpl(rx::subscriber<TSimple> dest, T&& node, F&& fn) {
547   LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (begin) " << __PRETTY_FUNCTION__;
548 
549   if (!dest.is_subscribed()) {
550     LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (unsubscribed)";
551     return;
552   } else {
553     LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (on_next node)";
554 
555     // Copy the node here. This is less bad than it seems since we haven't yet
556     // calculated its children (except in the root), so its just doing a shallow memcpy (sizeof(T)).
557     //
558     // This assumes the children are calculated lazily, otherwise we'd need to have a separate
559     // NodeBody class which only holds the non-children elements.
560 
561     TSimple copy = std::forward<T>(node);
562     dest.on_next(std::move(copy));
563 
564     if (!node.has_value()) {
565       return;
566     }
567 
568     // Whenever we call 'on_next' also check if we end up unsubscribing.
569     // This avoids the expensive call into the children.
570     if (!dest.is_subscribed()) {
571       LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (post-self unsubscribe)";
572       return;
573     }
574 
575     // Eagerly get the childrem, moving them instead of copying them.
576     auto&& children = fn(std::forward<T>(node));
577     for (auto&& child : children) {
578       TreeTraversalPreOrderObservableImpl(dest, IORAP_FORWARD_LAMBDA(child), fn);
579       // TODO: double check this is doing the std::move properly for rvalues.
580 
581       if (!dest.is_subscribed()) {
582         LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (unsubscribed in children)";
583         break;
584       }
585     };
586   }
587 }
588 
589 // Creates an observable over all the nodes in the tree rooted at node.
590 // fn is a function that returns the children of that node.
591 //
592 // The items are emitted left-to-right pre-order, and stop early if the
593 // observable is unsubscribed from.
594 //
595 // Implementation requirement:
596 //    typeof(node) -> expected<V, E> or optional<V> or similar.
597 //    fn(node) -> iterable<typeof(node)>
598 //
599 // preorder(self):
600 //   visit(self)
601 //   for child in fn(self):
602 //     preorder(child)
603 template <typename T, typename F>
TreeTraversalPreOrderObservable(T && node,F && fn)604 auto/*observable<T>*/ TreeTraversalPreOrderObservable(T&& node, F&& fn) {
605   LOG(VERBOSE) << "TreeTraversalPreOrderObservable: " << __PRETTY_FUNCTION__;
606 
607   using T_simple = std::decay_t<T>;
608   return rx::observable<>::create<T_simple>(
609     // Copy node to avoid lifetime issues.
610     [node=node,fn=std::forward<F>(fn)](rx::subscriber<T_simple> dest) {
611       LOG(VERBOSE) << "TreeTraversalPreOrderObservable (lambda)";
612       TreeTraversalPreOrderObservableImpl<T_simple>(dest,
613                                                     std::move(node),
614                                                     std::move(fn));
615       dest.on_completed();
616     }
617   );
618 }
619 
620 DirectoryEntry::ObservableT
GetSubTreePreOrderEntries(borrowed<SystemCall * > system_call) const621     DirectoryEntry::GetSubTreePreOrderEntries(borrowed<SystemCall*> system_call) const {
622   return TreeTraversalPreOrderObservable(
623       DirectoryEntryResult{*this},
624       [system_call=system_call](auto/*DirectoryEntryResult*/&& result)
625           -> std::vector<DirectoryEntryResult> {
626         if (!result) {
627           LOG(VERBOSE) << "GetSubTreePreOrderEntries (no value return)";
628           // Cannot have children when it was an error.
629           return {};
630         }
631         return
632             IORAP_FORWARD_LAMBDA(result)
633             .value()
634             .GetChildrenEntries(system_call);
635       });
636 }
637 
638 struct StatError {
639   int err_no;
640   std::string path_name;
641 };
642 
operator <<(std::ostream & os,const StatError & e)643 std::ostream& operator<<(std::ostream& os, const StatError& e) {
644   os << "StatError{" << e.err_no << "," << e.path_name << "}";
645   return os;
646 }
647 
648 template <typename U = void>  // suppress unused warning.
Stat(const std::string & path_name,borrowed<SystemCall * > system_call)649 static iorap::expected<struct stat, StatError> Stat(const std::string& path_name,
650                                                     borrowed<SystemCall*> system_call) {
651   struct stat statbuf{};
652 
653   // Call stat(2) in live code. Overridden in test code.
654   if (system_call->stat(path_name.c_str(), /*out*/&statbuf) == 0) {
655     return statbuf;
656   } else {
657     return iorap::unexpected(StatError{errno, path_name});
658   }
659 }
660 
661 using StatResult = iorap::expected<struct stat, StatError>;
662 
663 // An inode's corresponding filename on the system.
664 struct SearchMatch {
665   Inode inode;
666   // Relative path joined with a root directory.
667   //
668   // Use absolute path root dirs to get back absolute path filenames.
669   // If relative, this is relative to the current working directory.
670   std::string filename;
671 };
672 
operator <<(std::ostream & os,const SearchMatch & s)673 std::ostream& operator<<(std::ostream& os, const SearchMatch& s) {
674   os << "SearchMatch{" << s.inode << ", " << s.filename << "}";
675   return os;
676 }
677 
678 struct SearchState {
679   // Emit 'match' Inodes corresponding to the ones here.
680   InodeSet inode_set;
681 
682   // An inode matching one of the ones in inode_set was discovered in the most-recently
683   // emitted SearchState.
684   //
685   // The InodeSet removes any matching 'Inode'.
686   std::optional<SearchMatch> match;
687 
688   // TODO: make sure this doesn't copy [inodes], as that would be unnecessarily expensive.
689 };
690 
operator <<(std::ostream & os,const SearchState & s)691 std::ostream& operator<<(std::ostream& os, const SearchState& s) {
692   os << "SearchState{match:";
693   // Print the 'match' first. The InodeSet could be very large so it could be truncated in logs.
694   if (s.match) {
695     os << s.match.value();
696   } else {
697     os << "(none)";
698   }
699   os << ", inode_set:" << s.inode_set << "}";
700   return os;
701 }
702 
703 // TODO: write operator<< etc.
704 
705 // Return a lazy observable that will search for all filenames whose inodes
706 // match the inodes in inode_search_list.
707 //
708 // Every unmatched inode will be emitted as an unexpected at the end of the stream.
SearchDirectoriesForMatchingInodes(std::vector<std::string> root_dirs,std::vector<Inode> inode_search_list,borrowed<SystemCall * > system_call)709 auto/*[observable<InodeResult>, connectable]*/ SearchDirectoriesForMatchingInodes(
710     std::vector<std::string> root_dirs,
711     std::vector<Inode> inode_search_list,
712     borrowed<SystemCall*> system_call) {
713 
714   // Create a (lazy) observable that will emit each DirectoryEntry that is a recursive subchild
715   // of root_dirs. Emission will be stopped when its unsubscribed from.
716   //
717   // This is done by calling readdir(3) lazily.
718   auto/*obs<DirectoryEntry>*/ find_all_subdir_entries = ([&]() {
719     DirectoryEntry sentinel = DirectoryEntry::CreateSentinel(std::move(root_dirs));
720     auto/*obs<DirectoryEntryResult*/ results = sentinel.GetSubTreePreOrderEntries(system_call);
721 
722     // Drop any errors by logging them to logcat. "Unwrap" the expected into the underlying data.
723     auto/*obs<DirectoryEntry*>*/ expected_drop_errors = MapExpectedOrLogError(std::move(results));
724     return expected_drop_errors;
725   })();
726 
727   // DirectoryEntry is missing the dev_t portion, so we may need to call scan(2) again
728   // to confirm the dev_t. We skip calling scan(2) when the ino_t does not match.
729   // InodeSet lets us optimally avoid calling scan(2).
730   SearchState initial;
731   initial.inode_set = InodeSet::OfList(inode_search_list);
732 
733   auto/*[observable<SearchState>,Connectable]*/ search_state_results = find_all_subdir_entries.scan(
734       std::move(initial),
735       [system_call=system_call](SearchState search_state, const DirectoryEntry& dir_entry) {
736         LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#Scan "
737                      << dir_entry << ", state: " << search_state;
738 
739         search_state.match = std::nullopt;
740 
741         InodeSet* inodes = &search_state.inode_set;
742 
743         // Find all the possible inodes across different devices.
744         InodeSet::ValueRange inode_list = inodes->FindInodeList(dir_entry.d_ino);
745 
746         // This directory doesn't correspond to any inodes we are searching for.
747         if (!inode_list) {
748           return search_state;
749         }
750 
751         StatResult maybe_stat = Stat(dir_entry.filename, system_call);
752         VisitValueOrLogError(maybe_stat, [&](const struct stat& stat_buf) {
753           // Try to match the specific inode. Usually this will not result in a match (nullopt).
754           std::optional<Inode> inode = inodes->FindAndRemoveInodeInList(inode_list, stat_buf);
755 
756           if (inode) {
757             search_state.match = SearchMatch{inode.value(), dir_entry.filename};
758           }
759         });
760 
761         return search_state;  // implicit move.
762       }
763   // Avoid exhausting a potentially 'infinite' stream of files by terminating as soon
764   // as we find every single inode we care about.
765   ).take_while([](const SearchState& state) {
766       // Also emit the last item that caused the search set to go empty.
767       bool cond = !state.inode_set.Empty() || state.match;
768 
769       if (WOULD_LOG(VERBOSE)) {
770         static int kCounter = 0;
771         LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#take_while (" << kCounter++ <<
772                      ",is_empty:"
773                      << state.inode_set.Empty() << ", match:" << state.match.has_value();
774       }
775       // Minor O(1) implementation inefficiency:
776       // (Too minor to fix but it can be strange if looking at the logs or readdir traces).
777       //
778       // Note, because we return 'true' after the search set went empty,
779       // the overall stream graph still pulls from search_state_results exactly once more:
780       //
781       // This means that for cond to go to false, we would've read one extra item and then discarded
782       // it. If that item was the first child of a directory, that means we essentially did
783       // one redundant pass of doing a readdir.
784       //
785       // In other words if the search set goes to empty while the current item is a directory,
786       // it will definitely readdir on it at least once as we try to get the first child in
787       // OnTreeTraversal.
788       //
789       // This could be fixed with a 'take_until(Predicate)' operator which doesn't discard
790       // the last item when the condition becomes false. However rxcpp seems to lack this operator,
791       // whereas RxJava has it.
792 
793       if (!cond) {
794         LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#take_while "
795                      << "should now terminate for " << state;
796       }
797 
798       return cond;
799   }).publish();
800   // The publish here is mandatory. The stream is consumed twice (once by matched and once by
801   // unmatched streams). Without the publish, once all items from 'matched' were consumed it would
802   // start another instance of 'search_state_results' (i.e. it appears as if the search
803   // is restarted).
804   //
805   // By using 'publish', the search_state_results is effectively shared by both downstream nodes.
806   // Note that this also requires the subscriber to additionally call #connect on the above stream,
807   // otherwise no work will happen.
808 
809   // Lifetime notes:
810   //
811   // The the 'SearchState' is emitted into both below streams simultaneously.
812   //    The 'unmatched_inode_values' only touches the inode_set.
813   //    The 'matched_inode_values' only touches the match.
814   // Either stream can 'std::move' from those fields because they don't move each other's fields.
815   auto/*observable<InodeResult>*/ matched_inode_values = search_state_results
816       .filter([](const SearchState& search_state) { return search_state.match.has_value(); })
817       .map([](SearchState& search_state) { return std::move(search_state.match.value()); })
818                      // observable<SearchMatch>
819       .map([](SearchMatch search_match) {
820           return InodeResult::makeSuccess(search_match.inode, std::move(search_match.filename));
821       });            // observable<InodeResult>
822 
823   auto/*observable<?>*/ unmatched_inode_values = search_state_results
824       // The 'last' SearchState is the one that contains all the remaining inodes.
825       .take_last(1)  // observable<SearchState>
826       .flat_map([](const SearchState& search_state) {
827           LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#unmatched -- flat_map";
828           // Aside: Could've used a move here if the inodes weren't so lightweight already.
829           return search_state.inode_set.IterateValues(); })
830                      // observable<Inode>
831       .map([](const Inode& inode) {
832           LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#unmatched -- map";
833           return InodeResult::makeFailure(inode, InodeResult::kCouldNotFindFilename);
834       });
835                      // observable<InodeResult>
836 
837   // The matched and unmatched InodeResults are emitted together.
838   //   Use merge, not concat, because we need both observables to be subscribed to simultaneously.
839 
840   auto/*observable<InodeResult*/ all_inode_results =
841       matched_inode_values.merge(unmatched_inode_values);
842 
843   // Now that all mid-stream observables have been connected, turn the Connectable observable
844   // into a regular observable.
845 
846   // The caller has to call 'connect' on the search_state_results after subscribing
847   // and before any work can actually start.
848   return std::make_pair(all_inode_results, search_state_results);
849 }
850 
851 
FindFilenamesFromInodes(std::vector<std::string> root_directories,std::vector<Inode> inode_list,SearchMode mode)852 rxcpp::observable<InodeResult> SearchDirectories::FindFilenamesFromInodes(
853     std::vector<std::string> root_directories,
854     std::vector<Inode> inode_list,
855     SearchMode mode) {
856   DCHECK(mode == SearchMode::kInProcessDirect) << " other modes not implemented yet";
857 
858   auto/*observable[2]*/ [inode_results, connectable] = SearchDirectoriesForMatchingInodes(
859       std::move(root_directories),
860       std::move(inode_list),
861       system_call_);
862 
863   return inode_results;
864 }
865 
866 // I think we could avoid this with auto_connect, which rxcpp doesn't seem to have.
867 //
868 // I can't figure out any other way to avoid this, or at least to allow connecting
869 // on the primary observable (instead of a secondary side-observable).
870 //
871 // If using the obvious publish+ref_count then the unmerged stream gets no items emitted into it.
872 // If tried to ref_count later, everything turns into no-op.
873 // If trying to call connect too early, the subscribe is missed.
874 template <typename T>
875 struct RxAnyConnectableFromObservable : public SearchDirectories::RxAnyConnectable {
connectiorap::inode2filename::RxAnyConnectableFromObservable876   virtual void connect() override {
877     observable.connect();
878   }
879 
~RxAnyConnectableFromObservableiorap::inode2filename::RxAnyConnectableFromObservable880   virtual ~RxAnyConnectableFromObservable() {}
881 
RxAnyConnectableFromObservableiorap::inode2filename::RxAnyConnectableFromObservable882   RxAnyConnectableFromObservable(rxcpp::connectable_observable<T> observable)
883     : observable(observable) {
884   }
885 
886   rxcpp::connectable_observable<T> observable;
887 };
888 
889 // Type deduction helper.
890 template <typename T>
891 std::unique_ptr<SearchDirectories::RxAnyConnectable>
MakeRxAnyConnectableFromObservable(rxcpp::connectable_observable<T> observable)892     MakeRxAnyConnectableFromObservable(rxcpp::connectable_observable<T> observable) {
893   SearchDirectories::RxAnyConnectable* ptr = new RxAnyConnectableFromObservable<T>{observable};
894   return std::unique_ptr<SearchDirectories::RxAnyConnectable>{ptr};
895 }
896 
897 std::pair<rxcpp::observable<InodeResult>, std::unique_ptr<SearchDirectories::RxAnyConnectable>>
FindFilenamesFromInodesPair(std::vector<std::string> root_directories,std::vector<Inode> inode_list,SearchMode mode)898     SearchDirectories::FindFilenamesFromInodesPair(
899         std::vector<std::string> root_directories,
900         std::vector<Inode> inode_list,
901         SearchMode mode) {
902   DCHECK(mode == SearchMode::kInProcessDirect) << " other modes not implemented yet";
903 
904   auto/*observable[2]*/ [inode_results, connectable] = SearchDirectoriesForMatchingInodes(
905       std::move(root_directories),
906       std::move(inode_list),
907       system_call_);
908 
909   std::unique_ptr<SearchDirectories::RxAnyConnectable> connectable_ptr =
910     MakeRxAnyConnectableFromObservable(connectable.as_dynamic());
911 
912   return {inode_results, std::move(connectable_ptr)};
913 }
914 
915 }  // namespace iorap::inode2filename
916