0

Implement ReplayInsert() and ReplayErase()

Implement net::NoVarySearchCache methods ReplayInsert() and
ReplayErase() to play back previously journalled entries.

BUG=399562754,382394774

Change-Id: Ic1e6122e7bbe908945fd427bda28cf67bd7407fe
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6406013
Commit-Queue: Adam Rice <ricea@chromium.org>
Reviewed-by: Kenichi Ishibashi <bashi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1439744}
This commit is contained in:
Adam Rice 2025-03-28 15:44:51 -07:00 committed by Chromium LUCI CQ
parent ddeb98cba9
commit 1994a5281b
3 changed files with 412 additions and 72 deletions

@ -109,6 +109,24 @@ bool URLIsAcceptable(const GURL& url) {
!url.has_password();
}
bool BaseURLIsAcceptable(const GURL& base_url) {
return URLIsAcceptable(base_url) && !base_url.has_query() &&
!base_url.has_ref();
}
// Given `base_url` and `query`, return the original URL that would have been
// used to construct them.
GURL ReconstructOriginalURLFromQuery(const GURL& base_url,
const std::optional<std::string>& query) {
if (!query.has_value()) {
return base_url;
}
GURL::Replacements replacements;
replacements.SetQueryStr(query.value());
return base_url.ReplaceComponents(replacements);
}
} // namespace
// base::LinkedList only supports having an object in a single linked list at a
@ -223,13 +241,7 @@ class NoVarySearchCache::QueryString final
// including any fragment). It's important to use this method to correctly
// reconstruct URLs that have an empty query (end in '?').
GURL ReconstructOriginalURL(const GURL& base_url) {
if (!query_.has_value()) {
return base_url;
}
GURL::Replacements replacements;
replacements.SetQueryStr(query_.value());
return base_url.ReplaceComponents(replacements);
return ReconstructOriginalURLFromQuery(base_url, query_);
}
EraseHandle CreateEraseHandle() {
@ -411,54 +423,8 @@ void NoVarySearchCache::MaybeInsert(const HttpRequestInfo& request,
const base::Time update_time = base::Time::Now();
const BaseURLCacheKey cache_key(maybe_cache_key.value());
const auto [it, _] = map_.try_emplace(cache_key);
DataMapType& data_map = it->second;
const auto [data_it, inserted] =
data_map.emplace(std::move(*maybe_nvs_data), it->first);
const HttpNoVarySearchData& nvs_data = data_it->first;
QueryStringList& query_strings = data_it->second;
const auto call_observer = [this, &cache_key, &nvs_data,
update_time](const QueryString* query_string) {
if (observer_) {
observer_->OnInsert(cache_key.value(), nvs_data, query_string->query(),
update_time);
}
};
if (inserted) {
query_strings.nvs_data_ref = &nvs_data;
} else {
// There was already an entry for this `nvs_data`. We need to check if it
// has a match for the URL we're trying to insert. If it does, we should
// update or replace the existing QueryString.
if (auto result =
FindQueryStringInList(query_strings, base_url, url, nvs_data)) {
QueryString* match = result->match;
if (match->query() == query) {
// In the exact match case we can use the existing QueryString object.
match->set_update_time(update_time);
match->MoveToHead(query_strings.list);
match->MoveToHead(lru_);
call_observer(match);
return;
}
// No-Vary-Search matches are transitive. Any future requests that might
// be a match for `match` are also a match for `url`. Since `url` is newer
// we will prefer it, and so `match` will never be used again and we can
// safely remove it from the cache.
--size_;
match->RemoveAndDelete();
}
}
CHECK_LE(size_, max_size_);
++size_;
auto* query_string =
QueryString::CreateAndInsert(query, query_strings, lru_, update_time);
call_observer(query_string);
EvictIfOverfull();
DoInsert(url, base_url, std::move(maybe_cache_key.value()),
std::move(maybe_nvs_data.value()), query, update_time, observer_);
}
bool NoVarySearchCache::ClearData(UrlFilterType filter_type,
@ -505,6 +471,63 @@ void NoVarySearchCache::SetObserver(Observer* observer) {
observer_ = observer;
}
void NoVarySearchCache::ReplayInsert(std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time) {
const std::string base_url_string =
HttpCache::GetResourceURLFromHttpCacheKey(base_url_cache_key);
const GURL base_url(base_url_string);
if (!BaseURLIsAcceptable(base_url)) {
return;
}
// The URL should have been stored in its canonical form.
if (base_url_string != base_url.possibly_invalid_spec()) {
return;
}
if (query && query->find('#') != std::string::npos) {
return;
}
const GURL url = ReconstructOriginalURLFromQuery(base_url, query);
// To be extra careful to avoid re-entrancy, explicitly set `observer` to
// nullptr so that no notification is fired for this insertion.
DoInsert(url, base_url, std::move(base_url_cache_key), std::move(nvs_data),
query, update_time, /*observer=*/nullptr);
}
void NoVarySearchCache::ReplayErase(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query) {
const auto map_it = map_.find(BaseURLCacheKey(base_url_cache_key));
if (map_it == map_.end()) {
return;
}
DataMapType& data_map = map_it->second;
const auto data_it = data_map.find(nvs_data);
if (data_it == data_map.end()) {
return;
}
QueryStringList& query_strings = data_it->second;
QueryString* query_string = nullptr;
ForEachQueryString(query_strings.list,
[&query_string, &query](QueryString* candidate) {
if (!query_string && candidate->query() == query) {
query_string = candidate;
}
});
if (!query_string) {
return;
}
// TODO(https://crbug.com/382394774): This could be made more efficient in the
// case when the map keys need to be deleted since we have `map_it` and
// `data_it` already available.
EraseQuery(query_string);
}
// This is out-of-line to discourage inlining so the bots can detect if it is
// accidentally linked into the binary.
size_t NoVarySearchCache::GetSizeForTesting() const {
@ -569,6 +592,64 @@ void NoVarySearchCache::EraseQuery(QueryString* query_string) {
}
}
void NoVarySearchCache::DoInsert(const GURL& url,
const GURL& base_url,
std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string_view> query,
base::Time update_time,
Observer* observer) {
const BaseURLCacheKey cache_key(std::move(base_url_cache_key));
const auto [it, _] = map_.try_emplace(std::move(cache_key));
const BaseURLCacheKey& cache_key_ref = it->first;
DataMapType& data_map = it->second;
const auto [data_it, inserted] =
data_map.emplace(std::move(nvs_data), it->first);
const HttpNoVarySearchData& nvs_data_ref = data_it->first;
QueryStringList& query_strings = data_it->second;
const auto call_observer = [observer, &cache_key_ref, &nvs_data_ref,
update_time](const QueryString* query_string) {
if (observer) {
observer->OnInsert(cache_key_ref.value(), nvs_data_ref,
query_string->query(), update_time);
}
};
if (inserted) {
query_strings.nvs_data_ref = &nvs_data_ref;
} else {
// There was already an entry for this `nvs_data`. We need to check if it
// has a match for the URL we're trying to insert. If it does, we should
// update or replace the existing QueryString.
if (auto result =
FindQueryStringInList(query_strings, base_url, url, nvs_data_ref)) {
QueryString* match = result->match;
if (match->query() == query) {
// In the exact match case we can use the existing QueryString object.
match->set_update_time(update_time);
match->MoveToHead(query_strings.list);
match->MoveToHead(lru_);
call_observer(match);
return;
}
// No-Vary-Search matches are transitive. Any future requests that might
// be a match for `match` are also a match for `url`. Since `url` is newer
// we will prefer it, and so `match` will never be used again and we can
// safely remove it from the cache.
--size_;
match->RemoveAndDelete();
}
}
CHECK_LE(size_, max_size_);
++size_;
auto* query_string =
QueryString::CreateAndInsert(query, query_strings, lru_, update_time);
call_observer(query_string);
EvictIfOverfull();
}
// static
void NoVarySearchCache::FindQueryStringsInTimeRange(
DataMapType& data_map,

@ -170,6 +170,32 @@ class NET_EXPORT_PRIVATE NoVarySearchCache {
// observing.
void SetObserver(Observer* observer);
// Adds the specified entry to the cache as if by MaybeInsert(), evicting an
// older entry if the cache is full. The entry is treated as if newly used for
// the purposes of eviction. For use when replaying journalled entries. The
// arguments are expected to match a previous call to Observer::OnInsert()
// from a different instance of NoVarySearchCache, but with the same settings
// for cache partitioning. It can also be called with other valid arguments
// for testing. If a valid base URL cannot be extracted from
// `base_url_cache_key`, or `query` contains an invalid character, the call is
// ignored. This will never happen if the arguments are unchanged from a call
// to Observer::OnInsert() with the same partitioning. A valid base URL does
// not contain a query or a fragment. Observer methods are not called.
void ReplayInsert(std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time);
// Removes the specified entry from the cache as if by Erase(). For use when
// replaying journalled entries. The arguments are expected to match a
// previous call to Observer::OnErase from a different instance of
// NoVarySearchCache, with the same settings for cache partitioning
// base::Features. If `query` is not found the call silently
// does nothing. Observer methods are not called.
void ReplayErase(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query);
// Returns the size (number of stored original query strings) of the cache.
size_t GetSizeForTesting() const;
@ -234,6 +260,16 @@ class NET_EXPORT_PRIVATE NoVarySearchCache {
// Erases `query_string` from the cache.
void EraseQuery(QueryString* query_string);
// Inserts `query` or marks it as used in the cache. evicting an older entry
// if necessary to make space. `observer` is notified if set.
void DoInsert(const GURL& url,
const GURL& base_url,
std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string_view> query,
base::Time update_time,
Observer* observer);
// Scans all the QueryStrings in `data_map` to find ones in the range
// [delete_begin, delete_end) and appends them to `matches`. `data_map` is
// mutable to reflect that it is returning mutable pointers to QueryString

@ -12,6 +12,7 @@
#include <utility>
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ref.h"
#include "base/pickle.h"
#include "base/strings/strcat.h"
#include "base/test/scoped_feature_list.h"
@ -151,6 +152,14 @@ class NoVarySearchCacheTest : public ::testing::TestWithParam<bool> {
return cache.GetSizeForTesting() == 1u;
}
std::string GenerateCacheKey(std::string_view url) {
const auto request = TestRequest(GURL(url));
std::optional<std::string> maybe_cache_key =
HttpCache::GenerateCacheKeyForRequest(&request);
EXPECT_TRUE(maybe_cache_key);
return maybe_cache_key.value_or(std::string());
}
private:
base::test::ScopedFeatureList feature_list_;
NoVarySearchCache cache_{kMaxSize};
@ -161,6 +170,14 @@ class NoVarySearchCacheTest : public ::testing::TestWithParam<bool> {
std::vector<scoped_refptr<HttpResponseHeaders>> response_header_storage_;
};
INSTANTIATE_TEST_SUITE_P(All,
NoVarySearchCacheTest,
::testing::Bool(),
[](const testing::TestParamInfo<bool>& info) {
return info.param ? "SplitCacheEnabled"
: "SplitCacheDisabled";
});
TEST_P(NoVarySearchCacheTest, NewlyConstructedCacheIsEmpty) {
EXPECT_EQ(cache().GetSizeForTesting(), 0u);
}
@ -881,9 +898,24 @@ TEST_P(NoVarySearchCacheTest, TruncatedPickle) {
}
}
// An Observer object implemented by GoogleMock.
class MockObserver : public NoVarySearchCache::Observer {
// An Observer that registers and deregisters itself automatically.
class ScopedObserver : public NoVarySearchCache::Observer {
public:
explicit ScopedObserver(NoVarySearchCache& cache) : cache_(cache) {
cache.SetObserver(this);
}
~ScopedObserver() override { cache_->SetObserver(nullptr); }
private:
raw_ref<NoVarySearchCache> cache_;
};
// An Observer object implemented by GoogleMock.
class ScopedMockObserver : public ScopedObserver {
public:
using ScopedObserver::ScopedObserver;
MOCK_METHOD(void,
OnInsert,
(const std::string&,
@ -899,20 +931,6 @@ class MockObserver : public NoVarySearchCache::Observer {
(override));
};
// A version of MockObserver that registers and unregisters itself
// automatically.
class ScopedMockObserver : public ::testing::StrictMock<MockObserver> {
public:
explicit ScopedMockObserver(NoVarySearchCache& cache) : cache_(&cache) {
cache.SetObserver(this);
}
~ScopedMockObserver() override { cache_->SetObserver(nullptr); }
private:
raw_ptr<NoVarySearchCache> cache_;
};
// A matcher which matches No-Vary-Search: key-order
const auto IsKeyOrder =
Eq(HttpNoVarySearchData::CreateFromNoVaryParams({}, false));
@ -1024,14 +1042,219 @@ TEST_P(NoVarySearchCacheTest, DontObserveLookup) {
cache().Lookup(TestRequest("a=6"));
}
// An Observer that clones all changes into a target NoVarySearchCache object.
class CloningObserver : public ScopedObserver {
public:
CloningObserver(NoVarySearchCache& source, NoVarySearchCache& target)
: ScopedObserver(source), target_(target) {}
void OnInsert(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query,
base::Time update_time) override {
target_->ReplayInsert(base_url_cache_key, nvs_data, query, update_time);
}
// Called when an entry is erased by the Erase() method.
void OnErase(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query) override {
target_->ReplayErase(base_url_cache_key, nvs_data, query);
}
private:
raw_ref<NoVarySearchCache> target_;
};
struct CloneMaker {
NoVarySearchCache clone;
CloningObserver observer;
explicit CloneMaker(NoVarySearchCache& source)
: clone(kMaxSize), observer(source, clone) {}
};
class NoVarySearchCacheReplayTest : public NoVarySearchCacheTest {
protected:
struct ReplayTestCase {
std::string_view description;
HttpRequestInfo to_insert;
std::string_view no_vary_search_value;
HttpRequestInfo to_lookup;
};
auto ReplayTestCases() {
return std::to_array<ReplayTestCase>({
{
"Simple key-order",
TestRequest("c=2&a=1"),
"key-order",
TestRequest("a=1&c=2"),
},
{
"Different no-vary-search",
TestRequest("d=3&e=5"),
"params=(\"d\")",
TestRequest("e=5"),
},
{
"Different base URL",
TestRequest(GURL("https://www.example.com/other?c=2&a=1")),
"key-order",
TestRequest(GURL("https://www.example.com/other?a=1&c=2")),
},
{
"Different site",
TestRequest(GURL("https://example.example/?c=2&a=1")),
"key-order",
TestRequest(GURL("https://example.example/?a=1&c=2")),
},
{
"No question mark",
TestRequest(GURL("https://example2.example/")),
"params",
TestRequest(GURL("https://example2.example/?q=ignored")),
},
});
}
};
INSTANTIATE_TEST_SUITE_P(All,
NoVarySearchCacheTest,
NoVarySearchCacheReplayTest,
::testing::Bool(),
[](const testing::TestParamInfo<bool>& info) {
return info.param ? "SplitCacheEnabled"
: "SplitCacheDisabled";
});
TEST_P(NoVarySearchCacheReplayTest, Inserts) {
const auto test_cases = ReplayTestCases();
CloneMaker maker(cache());
for (const auto& [description, to_insert, no_vary_search_value, to_lookup] :
test_cases) {
cache().MaybeInsert(to_insert, TestHeaders(no_vary_search_value));
}
EXPECT_EQ(maker.clone.GetSizeForTesting(), test_cases.size());
for (const auto& [description, to_insert, no_vary_search_value, to_lookup] :
test_cases) {
SCOPED_TRACE(description);
auto source_lookup_result = cache().Lookup(to_lookup);
auto target_lookup_result = maker.clone.Lookup(to_lookup);
ASSERT_TRUE(source_lookup_result);
ASSERT_TRUE(target_lookup_result);
EXPECT_EQ(source_lookup_result->original_url,
target_lookup_result->original_url);
}
}
TEST_P(NoVarySearchCacheReplayTest, Erases) {
const auto test_cases = ReplayTestCases();
CloneMaker maker(cache());
for (const auto& [description, to_insert, no_vary_search_value, to_lookup] :
test_cases) {
cache().MaybeInsert(to_insert, TestHeaders(no_vary_search_value));
}
for (const auto& [description, to_insert, no_vary_search_value, to_lookup] :
test_cases) {
SCOPED_TRACE(description);
auto source_lookup_result = cache().Lookup(to_lookup);
ASSERT_TRUE(source_lookup_result);
cache().Erase(std::move(source_lookup_result->erase_handle));
EXPECT_FALSE(maker.clone.Lookup(to_lookup));
}
EXPECT_EQ(maker.clone.GetSizeForTesting(), 0u);
EXPECT_TRUE(maker.clone.IsTopLevelMapEmptyForTesting());
}
TEST_P(NoVarySearchCacheTest, ReplayInsertBadURLs) {
struct TestCase {
std::string_view description;
std::string_view bad_url;
};
static constexpr auto kBadURLs = std::to_array<TestCase>({
{"Invalid URL", ":"},
{"Not canonical", "https://example%2Eexample/"},
{"No path", "what:"},
{"Has username", "https://bob@example.example/"},
{"Has password", "https://:pass@example.example/"},
{"Has query", "https://example.example/?"},
{"Has ref", "https://example.example/#water"},
});
static constexpr std::string_view kRealURL = "https://example.example/test";
const std::string real_cache_key = GenerateCacheKey(kRealURL);
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
const std::optional<std::string> query = "t=1";
const base::Time update_time;
for (const auto& [description, bad_url] : kBadURLs) {
SCOPED_TRACE(description);
std::string modified_cache_key = real_cache_key;
base::ReplaceFirstSubstringAfterOffset(&modified_cache_key, 0u, kRealURL,
bad_url);
cache().ReplayInsert(modified_cache_key, nvs_data, query, update_time);
EXPECT_EQ(cache().GetSizeForTesting(), 0u);
}
}
TEST_P(NoVarySearchCacheTest, ReplayInsertBadQuery) {
const std::string cache_key = GenerateCacheKey("https://example.example/");
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
const base::Time update_time;
cache().ReplayInsert(cache_key, nvs_data, "t=1#what", update_time);
EXPECT_EQ(cache().GetSizeForTesting(), 0u);
}
TEST_P(NoVarySearchCacheTest, ReplayEraseOnEmptyCache) {
const std::string cache_key = GenerateCacheKey("https://example.example/");
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
cache().ReplayErase(cache_key, nvs_data, "t=1");
EXPECT_EQ(cache().GetSizeForTesting(), 0u);
}
TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedCacheKey) {
const std::string cache_key = GenerateCacheKey("https://example.example/");
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
const std::optional<std::string> query = "t=1";
const base::Time update_time;
cache().ReplayInsert(cache_key, nvs_data, query, update_time);
cache().ReplayErase(cache_key + ".", nvs_data, query);
EXPECT_EQ(cache().GetSizeForTesting(), 1u);
}
TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedNVSData) {
const std::string cache_key = GenerateCacheKey("https://example.example/");
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
const std::optional<std::string> query = "t=1";
const base::Time update_time;
cache().ReplayInsert(cache_key, nvs_data, query, update_time);
const auto mismatched_nvs_data =
HttpNoVarySearchData::CreateFromNoVaryParams({"z"}, true);
cache().ReplayErase(cache_key, mismatched_nvs_data, query);
EXPECT_EQ(cache().GetSizeForTesting(), 1u);
}
TEST_P(NoVarySearchCacheTest, ReplayEraseMismatchedQuery) {
const std::string cache_key = GenerateCacheKey("https://example.example/");
const auto nvs_data = HttpNoVarySearchData::CreateFromNoVaryParams({}, true);
const std::optional<std::string> query = "t=1";
const base::Time update_time;
cache().ReplayInsert(cache_key, nvs_data, query, update_time);
const std::optional<std::string> mismatched_query = "t=2";
cache().ReplayErase(cache_key, nvs_data, mismatched_query);
EXPECT_EQ(cache().GetSizeForTesting(), 1u);
}
// TODO(https://crbug.com/390216627): Test the various experiments that affect
// the cache key and make sure they also affect NoVarySearchCache.