From 1994a5281bdd1633c8c9305b626ac1e521263426 Mon Sep 17 00:00:00 2001 From: Adam Rice <ricea@chromium.org> Date: Fri, 28 Mar 2025 15:44:51 -0700 Subject: [PATCH] Implement ReplayInsert() and ReplayErase() Implement net::NoVarySearchCache methods ReplayInsert() and ReplayErase() to play back previously journalled entries. BUG=399562754,382394774 Change-Id: Ic1e6122e7bbe908945fd427bda28cf67bd7407fe Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6406013 Commit-Queue: Adam Rice <ricea@chromium.org> Reviewed-by: Kenichi Ishibashi <bashi@chromium.org> Cr-Commit-Position: refs/heads/main@{#1439744} --- net/http/no_vary_search_cache.cc | 191 +++++++++++----- net/http/no_vary_search_cache.h | 36 +++ net/http/no_vary_search_cache_unittest.cc | 257 ++++++++++++++++++++-- 3 files changed, 412 insertions(+), 72 deletions(-) diff --git a/net/http/no_vary_search_cache.cc b/net/http/no_vary_search_cache.cc index 3ba467a7e3941..593b94f8b654e 100644 --- a/net/http/no_vary_search_cache.cc +++ b/net/http/no_vary_search_cache.cc @@ -109,6 +109,24 @@ bool URLIsAcceptable(const GURL& url) { !url.has_password(); } +bool BaseURLIsAcceptable(const GURL& base_url) { + return URLIsAcceptable(base_url) && !base_url.has_query() && + !base_url.has_ref(); +} + +// Given `base_url` and `query`, return the original URL that would have been +// used to construct them. +GURL ReconstructOriginalURLFromQuery(const GURL& base_url, + const std::optional<std::string>& query) { + if (!query.has_value()) { + return base_url; + } + + GURL::Replacements replacements; + replacements.SetQueryStr(query.value()); + return base_url.ReplaceComponents(replacements); +} + } // namespace // base::LinkedList only supports having an object in a single linked list at a @@ -223,13 +241,7 @@ class NoVarySearchCache::QueryString final // including any fragment). It's important to use this method to correctly // reconstruct URLs that have an empty query (end in '?'). GURL ReconstructOriginalURL(const GURL& base_url) { - if (!query_.has_value()) { - return base_url; - } - - GURL::Replacements replacements; - replacements.SetQueryStr(query_.value()); - return base_url.ReplaceComponents(replacements); + return ReconstructOriginalURLFromQuery(base_url, query_); } EraseHandle CreateEraseHandle() { @@ -411,54 +423,8 @@ void NoVarySearchCache::MaybeInsert(const HttpRequestInfo& request, const base::Time update_time = base::Time::Now(); - const BaseURLCacheKey cache_key(maybe_cache_key.value()); - const auto [it, _] = map_.try_emplace(cache_key); - DataMapType& data_map = it->second; - const auto [data_it, inserted] = - data_map.emplace(std::move(*maybe_nvs_data), it->first); - const HttpNoVarySearchData& nvs_data = data_it->first; - QueryStringList& query_strings = data_it->second; - - const auto call_observer = [this, &cache_key, &nvs_data, - update_time](const QueryString* query_string) { - if (observer_) { - observer_->OnInsert(cache_key.value(), nvs_data, query_string->query(), - update_time); - } - }; - - if (inserted) { - query_strings.nvs_data_ref = &nvs_data; - } else { - // There was already an entry for this `nvs_data`. We need to check if it - // has a match for the URL we're trying to insert. If it does, we should - // update or replace the existing QueryString. - if (auto result = - FindQueryStringInList(query_strings, base_url, url, nvs_data)) { - QueryString* match = result->match; - if (match->query() == query) { - // In the exact match case we can use the existing QueryString object. - match->set_update_time(update_time); - match->MoveToHead(query_strings.list); - match->MoveToHead(lru_); - call_observer(match); - return; - } - - // No-Vary-Search matches are transitive. Any future requests that might - // be a match for `match` are also a match for `url`. Since `url` is newer - // we will prefer it, and so `match` will never be used again and we can - // safely remove it from the cache. - --size_; - match->RemoveAndDelete(); - } - } - CHECK_LE(size_, max_size_); - ++size_; - auto* query_string = - QueryString::CreateAndInsert(query, query_strings, lru_, update_time); - call_observer(query_string); - EvictIfOverfull(); + DoInsert(url, base_url, std::move(maybe_cache_key.value()), + std::move(maybe_nvs_data.value()), query, update_time, observer_); } bool NoVarySearchCache::ClearData(UrlFilterType filter_type, @@ -505,6 +471,63 @@ void NoVarySearchCache::SetObserver(Observer* observer) { observer_ = observer; } +void NoVarySearchCache::ReplayInsert(std::string base_url_cache_key, + HttpNoVarySearchData nvs_data, + std::optional<std::string> query, + base::Time update_time) { + const std::string base_url_string = + HttpCache::GetResourceURLFromHttpCacheKey(base_url_cache_key); + const GURL base_url(base_url_string); + if (!BaseURLIsAcceptable(base_url)) { + return; + } + // The URL should have been stored in its canonical form. + if (base_url_string != base_url.possibly_invalid_spec()) { + return; + } + if (query && query->find('#') != std::string::npos) { + return; + } + const GURL url = ReconstructOriginalURLFromQuery(base_url, query); + + // To be extra careful to avoid re-entrancy, explicitly set `observer` to + // nullptr so that no notification is fired for this insertion. + DoInsert(url, base_url, std::move(base_url_cache_key), std::move(nvs_data), + query, update_time, /*observer=*/nullptr); +} + +void NoVarySearchCache::ReplayErase(const std::string& base_url_cache_key, + const HttpNoVarySearchData& nvs_data, + const std::optional<std::string>& query) { + const auto map_it = map_.find(BaseURLCacheKey(base_url_cache_key)); + if (map_it == map_.end()) { + return; + } + + DataMapType& data_map = map_it->second; + const auto data_it = data_map.find(nvs_data); + if (data_it == data_map.end()) { + return; + } + + QueryStringList& query_strings = data_it->second; + QueryString* query_string = nullptr; + ForEachQueryString(query_strings.list, + [&query_string, &query](QueryString* candidate) { + if (!query_string && candidate->query() == query) { + query_string = candidate; + } + }); + if (!query_string) { + return; + } + + // TODO(https://crbug.com/382394774): This could be made more efficient in the + // case when the map keys need to be deleted since we have `map_it` and + // `data_it` already available. + EraseQuery(query_string); +} + // This is out-of-line to discourage inlining so the bots can detect if it is // accidentally linked into the binary. size_t NoVarySearchCache::GetSizeForTesting() const { @@ -569,6 +592,64 @@ void NoVarySearchCache::EraseQuery(QueryString* query_string) { } } +void NoVarySearchCache::DoInsert(const GURL& url, + const GURL& base_url, + std::string base_url_cache_key, + HttpNoVarySearchData nvs_data, + std::optional<std::string_view> query, + base::Time update_time, + Observer* observer) { + const BaseURLCacheKey cache_key(std::move(base_url_cache_key)); + const auto [it, _] = map_.try_emplace(std::move(cache_key)); + const BaseURLCacheKey& cache_key_ref = it->first; + DataMapType& data_map = it->second; + const auto [data_it, inserted] = + data_map.emplace(std::move(nvs_data), it->first); + const HttpNoVarySearchData& nvs_data_ref = data_it->first; + QueryStringList& query_strings = data_it->second; + + const auto call_observer = [observer, &cache_key_ref, &nvs_data_ref, + update_time](const QueryString* query_string) { + if (observer) { + observer->OnInsert(cache_key_ref.value(), nvs_data_ref, + query_string->query(), update_time); + } + }; + + if (inserted) { + query_strings.nvs_data_ref = &nvs_data_ref; + } else { + // There was already an entry for this `nvs_data`. We need to check if it + // has a match for the URL we're trying to insert. If it does, we should + // update or replace the existing QueryString. + if (auto result = + FindQueryStringInList(query_strings, base_url, url, nvs_data_ref)) { + QueryString* match = result->match; + if (match->query() == query) { + // In the exact match case we can use the existing QueryString object. + match->set_update_time(update_time); + match->MoveToHead(query_strings.list); + match->MoveToHead(lru_); + call_observer(match); + return; + } + + // No-Vary-Search matches are transitive. Any future requests that might + // be a match for `match` are also a match for `url`. Since `url` is newer + // we will prefer it, and so `match` will never be used again and we can + // safely remove it from the cache. + --size_; + match->RemoveAndDelete(); + } + } + CHECK_LE(size_, max_size_); + ++size_; + auto* query_string = + QueryString::CreateAndInsert(query, query_strings, lru_, update_time); + call_observer(query_string); + EvictIfOverfull(); +} + // static void NoVarySearchCache::FindQueryStringsInTimeRange( DataMapType& data_map, diff --git a/net/http/no_vary_search_cache.h b/net/http/no_vary_search_cache.h index 3916ba6a4bc20..1f702a6a8c88d 100644 --- a/net/http/no_vary_search_cache.h +++ b/net/http/no_vary_search_cache.h @@ -170,6 +170,32 @@ class NET_EXPORT_PRIVATE NoVarySearchCache { // observing. void SetObserver(Observer* observer); + // Adds the specified entry to the cache as if by MaybeInsert(), evicting an + // older entry if the cache is full. The entry is treated as if newly used for + // the purposes of eviction. For use when replaying journalled entries. The + // arguments are expected to match a previous call to Observer::OnInsert() + // from a different instance of NoVarySearchCache, but with the same settings + // for cache partitioning. It can also be called with other valid arguments + // for testing. If a valid base URL cannot be extracted from + // `base_url_cache_key`, or `query` contains an invalid character, the call is + // ignored. This will never happen if the arguments are unchanged from a call + // to Observer::OnInsert() with the same partitioning. A valid base URL does + // not contain a query or a fragment. Observer methods are not called. + void ReplayInsert(std::string base_url_cache_key, + HttpNoVarySearchData nvs_data, + std::optional<std::string> query, + base::Time update_time); + + // Removes the specified entry from the cache as if by Erase(). For use when + // replaying journalled entries. The arguments are expected to match a + // previous call to Observer::OnErase from a different instance of + // NoVarySearchCache, with the same settings for cache partitioning + // base::Features. If `query` is not found the call silently + // does nothing. Observer methods are not called. + void ReplayErase(const std::string& base_url_cache_key, + const HttpNoVarySearchData& nvs_data, + const std::optional<std::string>& query); + // Returns the size (number of stored original query strings) of the cache. size_t GetSizeForTesting() const; @@ -234,6 +260,16 @@ class NET_EXPORT_PRIVATE NoVarySearchCache { // Erases `query_string` from the cache. void EraseQuery(QueryString* query_string); + // Inserts `query` or marks it as used in the cache. evicting an older entry + // if necessary to make space. `observer` is notified if set. + void DoInsert(const GURL& url, + const GURL& base_url, + std::string base_url_cache_key, + HttpNoVarySearchData nvs_data, + std::optional<std::string_view> query, + base::Time update_time, + Observer* observer); + // Scans all the QueryStrings in `data_map` to find ones in the range // [delete_begin, delete_end) and appends them to `matches`. `data_map` is // mutable to reflect that it is returning mutable pointers to QueryString diff --git a/net/http/no_vary_search_cache_unittest.cc b/net/http/no_vary_search_cache_unittest.cc index 6f1d79a71d12f..7c169ea216ba6 100644 --- a/net/http/no_vary_search_cache_unittest.cc +++ b/net/http/no_vary_search_cache_unittest.cc @@ -12,6 +12,7 @@ #include <utility> #include "base/memory/raw_ptr.h" +#include "base/memory/raw_ref.h" #include "base/pickle.h" #include "base/strings/strcat.h" #include "base/test/scoped_feature_list.h" @@ -151,6 +152,14 @@ class NoVarySearchCacheTest : public ::testing::TestWithParam<bool> { return cache.GetSizeForTesting() == 1u; } + std::string GenerateCacheKey(std::string_view url) { + const auto request = TestRequest(GURL(url)); + std::optional<std::string> maybe_cache_key = + HttpCache::GenerateCacheKeyForRequest(&request); + EXPECT_TRUE(maybe_cache_key); + return maybe_cache_key.value_or(std::string()); + } + private: base::test::ScopedFeatureList feature_list_; NoVarySearchCache cache_{kMaxSize}; @@ -161,6 +170,14 @@ class NoVarySearchCacheTest : public ::testing::TestWithParam<bool> { std::vector<scoped_refptr<HttpResponseHeaders>> response_header_storage_; }; +INSTANTIATE_TEST_SUITE_P(All, + NoVarySearchCacheTest, + ::testing::Bool(), + [](const testing::TestParamInfo<bool>& info) { + return info.param ? "SplitCacheEnabled" + : "SplitCacheDisabled"; + }); + TEST_P(NoVarySearchCacheTest, NewlyConstructedCacheIsEmpty) { EXPECT_EQ(cache().GetSizeForTesting(), 0u); } @@ -881,9 +898,24 @@ TEST_P(NoVarySearchCacheTest, TruncatedPickle) { } } -// An Observer object implemented by GoogleMock. -class MockObserver : public NoVarySearchCache::Observer { +// An Observer that registers and deregisters itself automatically. +class ScopedObserver : public NoVarySearchCache::Observer { public: + explicit ScopedObserver(NoVarySearchCache& cache) : cache_(cache) { + cache.SetObserver(this); + } + + ~ScopedObserver() override { cache_->SetObserver(nullptr); } + + private: + raw_ref<NoVarySearchCache> cache_; +}; + +// An Observer object implemented by GoogleMock. +class ScopedMockObserver : public ScopedObserver { + public: + using ScopedObserver::ScopedObserver; + MOCK_METHOD(void, OnInsert, (const std::string&, @@ -899,20 +931,6 @@ class MockObserver : public NoVarySearchCache::Observer { (override)); }; -// A version of MockObserver that registers and unregisters itself -// automatically. -class ScopedMockObserver : public ::testing::StrictMock<MockObserver> { - public: - explicit ScopedMockObserver(NoVarySearchCache& cache) : cache_(&cache) { - cache.SetObserver(this); - } - - ~ScopedMockObserver() override { cache_->SetObserver(nullptr); } - - private: - raw_ptr<NoVarySearchCache> cache_; -}; - // A matcher which matches No-Vary-Search: key-order const auto IsKeyOrder = Eq(HttpNoVarySearchData::CreateFromNoVaryParams({}, false)); @@ -1024,14 +1042,219 @@ TEST_P(NoVarySearchCacheTest, DontObserveLookup) { cache().Lookup(TestRequest("a=6")); } +// An Observer that clones all changes into a target NoVarySearchCache object. +class CloningObserver : public ScopedObserver { + public: + CloningObserver(NoVarySearchCache& source, NoVarySearchCache& target) + : ScopedObserver(source), target_(target) {} + + void OnInsert(const std::string& base_url_cache_key, + const HttpNoVarySearchData& nvs_data, + const std::optional<std::string>& query, + base::Time update_time) override { + target_->ReplayInsert(base_url_cache_key, nvs_data, query, update_time); + } + + // Called when an entry is erased by the Erase() method. + void OnErase(const std::string& base_url_cache_key, + const HttpNoVarySearchData& nvs_data, + const std::optional<std::string>& query) override { + target_->ReplayErase(base_url_cache_key, nvs_data, query); + } + + private: + raw_ref<NoVarySearchCache> target_; +}; + +struct CloneMaker { + NoVarySearchCache clone; + CloningObserver observer; + + explicit CloneMaker(NoVarySearchCache& source) + : clone(kMaxSize), observer(source, clone) {} +}; + +class NoVarySearchCacheReplayTest : public NoVarySearchCacheTest { + protected: + struct ReplayTestCase { + std::string_view description; + HttpRequestInfo to_insert; + std::string_view no_vary_search_value; + HttpRequestInfo to_lookup; + }; + + auto ReplayTestCases() { + return std::to_array<ReplayTestCase>({ + { + "Simple key-order", + TestRequest("c=2&a=1"), + "key-order", + TestRequest("a=1&c=2"), + }, + { + "Different no-vary-search", + TestRequest("d=3&e=5"), + "params=(\"d\")", + TestRequest("e=5"), + }, + { + "Different base URL", + TestRequest(GURL("https://www.example.com/other?c=2&a=1")), + "key-order", + TestRequest(GURL("https://www.example.com/other?a=1&c=2")), + }, + { + "Different site", + TestRequest(GURL("https://example.example/?c=2&a=1")), + "key-order", + TestRequest(GURL("https://example.example/?a=1&c=2")), + }, + { + "No question mark", + TestRequest(GURL("https://example2.example/")), + "params", + TestRequest(GURL("https://example2.example/?q=ignored")), + }, + }); + } +}; + INSTANTIATE_TEST_SUITE_P(All, - NoVarySearchCacheTest, + NoVarySearchCacheReplayTest, ::testing::Bool(), [](const testing::TestParamInfo<bool>& info) { return info.param ? "SplitCacheEnabled" : "SplitCacheDisabled"; }); +TEST_P(NoVarySearchCacheReplayTest, Inserts) { + const auto test_cases = ReplayTestCases(); + CloneMaker maker(cache()); + + for (const auto& [description, to_insert, no_vary_search_value, to_lookup] : + test_cases) { + cache().MaybeInsert(to_insert, TestHeaders(no_vary_search_value)); + } + + EXPECT_EQ(maker.clone.GetSizeForTesting(), test_cases.size()); + + for (const auto& [description, to_insert, no_vary_search_value, to_lookup] : + test_cases) { + SCOPED_TRACE(description); + auto source_lookup_result = cache().Lookup(to_lookup); + auto target_lookup_result = maker.clone.Lookup(to_lookup); + + ASSERT_TRUE(source_lookup_result); + ASSERT_TRUE(target_lookup_result); + EXPECT_EQ(source_lookup_result->original_url, + target_lookup_result->original_url); + } +} + +TEST_P(NoVarySearchCacheReplayTest, Erases) { + const auto test_cases = ReplayTestCases(); + CloneMaker maker(cache()); + + for (const auto& [description, to_insert, no_vary_search_value, to_lookup] : + test_cases) { + cache().MaybeInsert(to_insert, TestHeaders(no_vary_search_value)); + } + + for (const auto& [description, to_insert, no_vary_search_value, to_lookup] : + test_cases) { + SCOPED_TRACE(description); + auto source_lookup_result = cache().Lookup(to_lookup); + ASSERT_TRUE(source_lookup_result); + + cache().Erase(std::move(source_lookup_result->erase_handle)); + + EXPECT_FALSE(maker.clone.Lookup(to_lookup)); + } + + EXPECT_EQ(maker.clone.GetSizeForTesting(), 0u); + EXPECT_TRUE(maker.clone.IsTopLevelMapEmptyForTesting()); +} + +TEST_P(NoVarySearchCacheTest, ReplayInsertBadURLs) { + struct TestCase { + std::string_view description; + std::string_view bad_url; + }; + static constexpr auto kBadURLs = std::to_array<TestCase>({ + {"Invalid URL", ":"}, + {"Not canonical", "https://example%2Eexample/"}, + {"No path", "what:"}, + {"Has username", "https://bob@example.example/"}, + {"Has password", "https://:pass@example.example/"}, + {"Has query", "https://example.example/?"}, + {"Has ref", "https://example.example/#water"}, + }); + static constexpr std::string_view kRealURL = "https://example.example/test"; + const std::string real_cache_key = GenerateCacheKey(kRealURL); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + const std::optional<std::string> query = "t=1"; + const base::Time update_time; + for (const auto& [description, bad_url] : kBadURLs) { + SCOPED_TRACE(description); + std::string modified_cache_key = real_cache_key; + base::ReplaceFirstSubstringAfterOffset(&modified_cache_key, 0u, kRealURL, + bad_url); + cache().ReplayInsert(modified_cache_key, nvs_data, query, update_time); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + } +} + +TEST_P(NoVarySearchCacheTest, ReplayInsertBadQuery) { + const std::string cache_key = GenerateCacheKey("https://example.example/"); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + const base::Time update_time; + cache().ReplayInsert(cache_key, nvs_data, "t=1#what", update_time); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); +} + +TEST_P(NoVarySearchCacheTest, ReplayEraseOnEmptyCache) { + const std::string cache_key = GenerateCacheKey("https://example.example/"); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + cache().ReplayErase(cache_key, nvs_data, "t=1"); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); +} + +TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedCacheKey) { + const std::string cache_key = GenerateCacheKey("https://example.example/"); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + const std::optional<std::string> query = "t=1"; + const base::Time update_time; + cache().ReplayInsert(cache_key, nvs_data, query, update_time); + + cache().ReplayErase(cache_key + ".", nvs_data, query); + EXPECT_EQ(cache().GetSizeForTesting(), 1u); +} + +TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedNVSData) { + const std::string cache_key = GenerateCacheKey("https://example.example/"); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + const std::optional<std::string> query = "t=1"; + const base::Time update_time; + cache().ReplayInsert(cache_key, nvs_data, query, update_time); + + const auto mismatched_nvs_data = + HttpNoVarySearchData::CreateFromNoVaryParams({"z"}, true); + cache().ReplayErase(cache_key, mismatched_nvs_data, query); + EXPECT_EQ(cache().GetSizeForTesting(), 1u); +} + +TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedQuery) { + const std::string cache_key = GenerateCacheKey("https://example.example/"); + const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true); + const std::optional<std::string> query = "t=1"; + const base::Time update_time; + cache().ReplayInsert(cache_key, nvs_data, query, update_time); + + const std::optional<std::string> mismatched_query = "t=2"; + cache().ReplayErase(cache_key, nvs_data, mismatched_query); + EXPECT_EQ(cache().GetSizeForTesting(), 1u); +} + // TODO(https://crbug.com/390216627): Test the various experiments that affect // the cache key and make sure they also affect NoVarySearchCache.