Skip to content

Commit 37e9414

Browse files
author
Craig Radcliffe
authoredDec 14, 2020
dns cache: Replace copy-on-add/remove with lock-guarded global map (envoyproxy#14349)
* Change both dns cache and dfp cluster to use global maps guarded by mutexes. Previously, each of these maps would be copied on each add/remove, which led to performance issues as the maps grew. * Change worker DNS cache callback container to use a map from hostnames to lists of callbacks. This ensures that callbacks are only invoked for the hosts that are completely ready (e.g. the dynamic forward proxy's host info has been populated). * Hold the PrimaryHostInfo objects by shared_ptr to reduce the time required for the main thread to hold the map lock. Signed-off-by: Craig Radcliffe <craig.radcliffe@broadcom.com>
1 parent 935a659 commit 37e9414

File tree

10 files changed

+386
-238
lines changed

10 files changed

+386
-238
lines changed
 

‎source/common/common/cleanup.h

+38
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,42 @@ template <class T> class RaiiListElement {
5555
bool cancelled_;
5656
};
5757

58+
// RAII helper class to add an element to a std::list held inside an absl::flat_hash_map on
59+
// construction and erase it on destruction, unless the cancel method has been called. If the list
60+
// is empty after removal of the element, the destructor will also remove the list from the map.
61+
template <class Key, class Value> class RaiiMapOfListElement {
62+
public:
63+
using MapOfList = absl::flat_hash_map<Key, std::list<Value>>;
64+
65+
template <typename ConvertibleToKey>
66+
RaiiMapOfListElement(MapOfList& map, const ConvertibleToKey& key, Value value)
67+
: map_(map), list_(map_.try_emplace(key).first->second), key_(key), cancelled_(false) {
68+
it_ = list_.emplace(list_.begin(), value);
69+
}
70+
71+
virtual ~RaiiMapOfListElement() {
72+
if (!cancelled_) {
73+
erase();
74+
}
75+
}
76+
77+
void cancel() { cancelled_ = true; }
78+
79+
private:
80+
void erase() {
81+
ASSERT(!cancelled_);
82+
list_.erase(it_);
83+
if (list_.empty()) {
84+
map_.erase(key_);
85+
}
86+
cancelled_ = true;
87+
}
88+
89+
MapOfList& map_;
90+
std::list<Value>& list_;
91+
// Because of absl::flat_hash_map iterator instability we have to keep a copy of the key
92+
const Key key_;
93+
typename MapOfList::mapped_type::iterator it_;
94+
bool cancelled_;
95+
};
5896
} // namespace Envoy

‎source/extensions/clusters/dynamic_forward_proxy/cluster.cc

+87-93
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ Cluster::Cluster(
2525
added_via_api, factory_context.dispatcher().timeSource()),
2626
dns_cache_manager_(cache_manager_factory.get()),
2727
dns_cache_(dns_cache_manager_->getCache(config.dns_cache_config())),
28-
update_callbacks_handle_(dns_cache_->addUpdateCallbacks(*this)), local_info_(local_info),
29-
host_map_(std::make_shared<HostInfoMap>()) {
28+
update_callbacks_handle_(dns_cache_->addUpdateCallbacks(*this)), local_info_(local_info) {
3029
// Block certain TLS context parameters that don't make sense on a cluster-wide scale. We will
3130
// support these parameters dynamically in the future. This is not an exhaustive list of
3231
// parameters that don't make sense but should be the most obvious ones that a user might set
@@ -44,123 +43,116 @@ Cluster::Cluster(
4443

4544
void Cluster::startPreInit() {
4645
// If we are attaching to a pre-populated cache we need to initialize our hosts.
47-
auto existing_hosts = dns_cache_->hosts();
48-
if (!existing_hosts.empty()) {
49-
std::shared_ptr<HostInfoMap> new_host_map;
50-
std::unique_ptr<Upstream::HostVector> hosts_added;
51-
for (const auto& existing_host : existing_hosts) {
52-
addOrUpdateWorker(existing_host.first, existing_host.second, new_host_map, hosts_added);
53-
}
54-
swapAndUpdateMap(new_host_map, *hosts_added, {});
46+
std::unique_ptr<Upstream::HostVector> hosts_added;
47+
dns_cache_->iterateHostMap(
48+
[&](absl::string_view host, const Common::DynamicForwardProxy::DnsHostInfoSharedPtr& info) {
49+
addOrUpdateHost(host, info, hosts_added);
50+
});
51+
if (hosts_added) {
52+
updatePriorityState(*hosts_added, {});
5553
}
56-
5754
onPreInitComplete();
5855
}
5956

60-
void Cluster::addOrUpdateWorker(
61-
const std::string& host,
57+
void Cluster::addOrUpdateHost(
58+
absl::string_view host,
6259
const Extensions::Common::DynamicForwardProxy::DnsHostInfoSharedPtr& host_info,
63-
std::shared_ptr<HostInfoMap>& new_host_map,
6460
std::unique_ptr<Upstream::HostVector>& hosts_added) {
65-
// We should never get a host with no address from the cache.
66-
ASSERT(host_info->address() != nullptr);
67-
68-
// NOTE: Right now we allow a DNS cache to be shared between multiple clusters. Though we have
69-
// connection/request circuit breakers on the cluster, we don't have any way to control the
70-
// maximum hosts on a cluster. We currently assume that host data shared via shared pointer is a
71-
// marginal memory cost above that already used by connections and requests, so relying on
72-
// connection/request circuit breakers is sufficient. We may have to revisit this in the future.
73-
74-
HostInfoMapSharedPtr current_map = getCurrentHostMap();
75-
const auto host_map_it = current_map->find(host);
76-
if (host_map_it != current_map->end()) {
77-
// If we only have an address change, we can do that swap inline without any other updates.
78-
// The appropriate R/W locking is in place to allow this. The details of this locking are:
79-
// - Hosts are not thread local, they are global.
80-
// - We take a read lock when reading the address and a write lock when changing it.
81-
// - Address updates are very rare.
82-
// - Address reads are only done when a connection is being made and a "real" host
83-
// description is created or the host is queried via the admin endpoint. Both of
84-
// these operations are relatively rare and the read lock is held for a short period
85-
// of time.
86-
//
87-
// TODO(mattklein123): Right now the dynamic forward proxy / DNS cache works similar to how
88-
// logical DNS works, meaning that we only store a single address per
89-
// resolution. It would not be difficult to also expose strict DNS
90-
// semantics, meaning the cache would expose multiple addresses and the
91-
// cluster would create multiple logical hosts based on those addresses.
92-
// We will leave this is a follow up depending on need.
93-
ASSERT(host_info == host_map_it->second.shared_host_info_);
94-
ASSERT(host_map_it->second.shared_host_info_->address() !=
95-
host_map_it->second.logical_host_->address());
96-
ENVOY_LOG(debug, "updating dfproxy cluster host address '{}'", host);
97-
host_map_it->second.logical_host_->setNewAddress(host_info->address(), dummy_lb_endpoint_);
98-
return;
99-
}
61+
Upstream::LogicalHostSharedPtr emplaced_host;
62+
{
63+
absl::WriterMutexLock lock{&host_map_lock_};
64+
// We should never get a host with no address from the cache.
65+
ASSERT(host_info->address() != nullptr);
66+
67+
// NOTE: Right now we allow a DNS cache to be shared between multiple clusters. Though we have
68+
// connection/request circuit breakers on the cluster, we don't have any way to control the
69+
// maximum hosts on a cluster. We currently assume that host data shared via shared pointer is a
70+
// marginal memory cost above that already used by connections and requests, so relying on
71+
// connection/request circuit breakers is sufficient. We may have to revisit this in the future.
72+
const auto host_map_it = host_map_.find(host);
73+
if (host_map_it != host_map_.end()) {
74+
// If we only have an address change, we can do that swap inline without any other updates.
75+
// The appropriate R/W locking is in place to allow this. The details of this locking are:
76+
// - Hosts are not thread local, they are global.
77+
// - We take a read lock when reading the address and a write lock when changing it.
78+
// - Address updates are very rare.
79+
// - Address reads are only done when a connection is being made and a "real" host
80+
// description is created or the host is queried via the admin endpoint. Both of
81+
// these operations are relatively rare and the read lock is held for a short period
82+
// of time.
83+
//
84+
// TODO(mattklein123): Right now the dynamic forward proxy / DNS cache works similar to how
85+
// logical DNS works, meaning that we only store a single address per
86+
// resolution. It would not be difficult to also expose strict DNS
87+
// semantics, meaning the cache would expose multiple addresses and the
88+
// cluster would create multiple logical hosts based on those addresses.
89+
// We will leave this is a follow up depending on need.
90+
ASSERT(host_info == host_map_it->second.shared_host_info_);
91+
ASSERT(host_map_it->second.shared_host_info_->address() !=
92+
host_map_it->second.logical_host_->address());
93+
ENVOY_LOG(debug, "updating dfproxy cluster host address '{}'", host);
94+
host_map_it->second.logical_host_->setNewAddress(host_info->address(), dummy_lb_endpoint_);
95+
return;
96+
}
10097

101-
ENVOY_LOG(debug, "adding new dfproxy cluster host '{}'", host);
98+
ENVOY_LOG(debug, "adding new dfproxy cluster host '{}'", host);
10299

103-
if (new_host_map == nullptr) {
104-
new_host_map = std::make_shared<HostInfoMap>(*current_map);
100+
emplaced_host = host_map_
101+
.try_emplace(host, host_info,
102+
std::make_shared<Upstream::LogicalHost>(
103+
info(), std::string{host}, host_info->address(),
104+
dummy_locality_lb_endpoint_, dummy_lb_endpoint_, nullptr,
105+
time_source_))
106+
.first->second.logical_host_;
105107
}
106-
const auto emplaced =
107-
new_host_map->try_emplace(host, host_info,
108-
std::make_shared<Upstream::LogicalHost>(
109-
info(), host, host_info->address(), dummy_locality_lb_endpoint_,
110-
dummy_lb_endpoint_, nullptr, time_source_));
108+
109+
ASSERT(emplaced_host);
111110
if (hosts_added == nullptr) {
112111
hosts_added = std::make_unique<Upstream::HostVector>();
113112
}
114-
hosts_added->emplace_back(emplaced.first->second.logical_host_);
113+
hosts_added->emplace_back(emplaced_host);
115114
}
116115

117116
void Cluster::onDnsHostAddOrUpdate(
118117
const std::string& host,
119118
const Extensions::Common::DynamicForwardProxy::DnsHostInfoSharedPtr& host_info) {
120-
std::shared_ptr<HostInfoMap> new_host_map;
119+
ENVOY_LOG(debug, "Adding host info for {}", host);
120+
121121
std::unique_ptr<Upstream::HostVector> hosts_added;
122-
addOrUpdateWorker(host, host_info, new_host_map, hosts_added);
122+
addOrUpdateHost(host, host_info, hosts_added);
123123
if (hosts_added != nullptr) {
124-
ASSERT(!new_host_map->empty());
125124
ASSERT(!hosts_added->empty());
126-
// Swap in the new map. This will be picked up when the per-worker LBs are recreated via
127-
// the host set update.
128-
swapAndUpdateMap(new_host_map, *hosts_added, {});
125+
updatePriorityState(*hosts_added, {});
129126
}
130127
}
131128

132-
void Cluster::swapAndUpdateMap(const HostInfoMapSharedPtr& new_hosts_map,
133-
const Upstream::HostVector& hosts_added,
134-
const Upstream::HostVector& hosts_removed) {
135-
{
136-
absl::WriterMutexLock lock(&host_map_lock_);
137-
host_map_ = new_hosts_map;
138-
}
139-
129+
void Cluster::updatePriorityState(const Upstream::HostVector& hosts_added,
130+
const Upstream::HostVector& hosts_removed) {
140131
Upstream::PriorityStateManager priority_state_manager(*this, local_info_, nullptr);
141132
priority_state_manager.initializePriorityFor(dummy_locality_lb_endpoint_);
142-
for (const auto& host : (*new_hosts_map)) {
143-
priority_state_manager.registerHostForPriority(host.second.logical_host_,
144-
dummy_locality_lb_endpoint_);
133+
{
134+
absl::ReaderMutexLock lock{&host_map_lock_};
135+
for (const auto& host : host_map_) {
136+
priority_state_manager.registerHostForPriority(host.second.logical_host_,
137+
dummy_locality_lb_endpoint_);
138+
}
145139
}
146140
priority_state_manager.updateClusterPrioritySet(
147141
0, std::move(priority_state_manager.priorityState()[0].first), hosts_added, hosts_removed,
148142
absl::nullopt, absl::nullopt);
149143
}
150144

151145
void Cluster::onDnsHostRemove(const std::string& host) {
152-
HostInfoMapSharedPtr current_map = getCurrentHostMap();
153-
const auto host_map_it = current_map->find(host);
154-
ASSERT(host_map_it != current_map->end());
155-
const auto new_host_map = std::make_shared<HostInfoMap>(*current_map);
156146
Upstream::HostVector hosts_removed;
157-
hosts_removed.emplace_back(host_map_it->second.logical_host_);
158-
new_host_map->erase(host);
159-
ENVOY_LOG(debug, "removing dfproxy cluster host '{}'", host);
160-
161-
// Swap in the new map. This will be picked up when the per-worker LBs are recreated via
162-
// the host set update.
163-
swapAndUpdateMap(new_host_map, {}, hosts_removed);
147+
{
148+
absl::WriterMutexLock lock{&host_map_lock_};
149+
const auto host_map_it = host_map_.find(host);
150+
ASSERT(host_map_it != host_map_.end());
151+
hosts_removed.emplace_back(host_map_it->second.logical_host_);
152+
host_map_.erase(host);
153+
ENVOY_LOG(debug, "removing dfproxy cluster host '{}'", host);
154+
}
155+
updatePriorityState({}, hosts_removed);
164156
}
165157

166158
Upstream::HostConstSharedPtr
@@ -179,13 +171,15 @@ Cluster::LoadBalancer::chooseHost(Upstream::LoadBalancerContext* context) {
179171
if (host.empty()) {
180172
return nullptr;
181173
}
182-
183-
const auto host_it = host_map_->find(host);
184-
if (host_it == host_map_->end()) {
185-
return nullptr;
186-
} else {
187-
host_it->second.shared_host_info_->touch();
188-
return host_it->second.logical_host_;
174+
{
175+
absl::ReaderMutexLock lock{&cluster_.host_map_lock_};
176+
const auto host_it = cluster_.host_map_.find(host);
177+
if (host_it == cluster_.host_map_.end()) {
178+
return nullptr;
179+
} else {
180+
host_it->second.shared_host_info_->touch();
181+
return host_it->second.logical_host_;
182+
}
189183
}
190184
}
191185

‎source/extensions/clusters/dynamic_forward_proxy/cluster.h

+13-20
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,9 @@ class Cluster : public Upstream::BaseDynamicClusterImpl,
5252
};
5353

5454
using HostInfoMap = absl::flat_hash_map<std::string, HostInfo>;
55-
using HostInfoMapSharedPtr = std::shared_ptr<const HostInfoMap>;
5655

5756
struct LoadBalancer : public Upstream::LoadBalancer {
58-
LoadBalancer(const HostInfoMapSharedPtr& host_map) : host_map_(host_map) {}
57+
LoadBalancer(const Cluster& cluster) : cluster_(cluster) {}
5958

6059
// Upstream::LoadBalancer
6160
Upstream::HostConstSharedPtr chooseHost(Upstream::LoadBalancerContext* context) override;
@@ -64,16 +63,14 @@ class Cluster : public Upstream::BaseDynamicClusterImpl,
6463
return nullptr;
6564
}
6665

67-
const HostInfoMapSharedPtr host_map_;
66+
const Cluster& cluster_;
6867
};
6968

7069
struct LoadBalancerFactory : public Upstream::LoadBalancerFactory {
7170
LoadBalancerFactory(Cluster& cluster) : cluster_(cluster) {}
7271

7372
// Upstream::LoadBalancerFactory
74-
Upstream::LoadBalancerPtr create() override {
75-
return std::make_unique<LoadBalancer>(cluster_.getCurrentHostMap());
76-
}
73+
Upstream::LoadBalancerPtr create() override { return std::make_unique<LoadBalancer>(cluster_); }
7774

7875
Cluster& cluster_;
7976
};
@@ -90,19 +87,15 @@ class Cluster : public Upstream::BaseDynamicClusterImpl,
9087
Cluster& cluster_;
9188
};
9289

93-
HostInfoMapSharedPtr getCurrentHostMap() {
94-
absl::ReaderMutexLock lock(&host_map_lock_);
95-
return host_map_;
96-
}
97-
9890
void
99-
addOrUpdateWorker(const std::string& host,
100-
const Extensions::Common::DynamicForwardProxy::DnsHostInfoSharedPtr& host_info,
101-
std::shared_ptr<HostInfoMap>& new_host_map,
102-
std::unique_ptr<Upstream::HostVector>& hosts_added);
103-
void swapAndUpdateMap(const HostInfoMapSharedPtr& new_hosts_map,
104-
const Upstream::HostVector& hosts_added,
105-
const Upstream::HostVector& hosts_removed);
91+
addOrUpdateHost(absl::string_view host,
92+
const Extensions::Common::DynamicForwardProxy::DnsHostInfoSharedPtr& host_info,
93+
std::unique_ptr<Upstream::HostVector>& hosts_added)
94+
ABSL_LOCKS_EXCLUDED(host_map_lock_);
95+
96+
void updatePriorityState(const Upstream::HostVector& hosts_added,
97+
const Upstream::HostVector& hosts_removed)
98+
ABSL_LOCKS_EXCLUDED(host_map_lock_);
10699

107100
const Extensions::Common::DynamicForwardProxy::DnsCacheManagerSharedPtr dns_cache_manager_;
108101
const Extensions::Common::DynamicForwardProxy::DnsCacheSharedPtr dns_cache_;
@@ -112,8 +105,8 @@ class Cluster : public Upstream::BaseDynamicClusterImpl,
112105
const envoy::config::endpoint::v3::LbEndpoint dummy_lb_endpoint_;
113106
const LocalInfo::LocalInfo& local_info_;
114107

115-
absl::Mutex host_map_lock_;
116-
HostInfoMapSharedPtr host_map_ ABSL_GUARDED_BY(host_map_lock_);
108+
mutable absl::Mutex host_map_lock_;
109+
HostInfoMap host_map_ ABSL_GUARDED_BY(host_map_lock_);
117110

118111
friend class ClusterFactory;
119112
friend class ClusterTest;

‎source/extensions/common/dynamic_forward_proxy/dns_cache.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class DnsHostInfo {
2323
* Returns the host's currently resolved address. This address may change periodically due to
2424
* async re-resolution.
2525
*/
26-
virtual Network::Address::InstanceConstSharedPtr address() PURE;
26+
virtual Network::Address::InstanceConstSharedPtr address() const PURE;
2727

2828
/**
2929
* Returns the host that was actually resolved via DNS. If port was originally specified it will
@@ -172,10 +172,14 @@ class DnsCache {
172172
*/
173173
virtual AddUpdateCallbacksHandlePtr addUpdateCallbacks(UpdateCallbacks& callbacks) PURE;
174174

175+
using IterateHostMapCb = std::function<void(absl::string_view, const DnsHostInfoSharedPtr&)>;
176+
175177
/**
176-
* @return all hosts currently stored in the cache.
178+
* Iterates over all entries in the cache, calling a callback for each entry
179+
*
180+
* @param iterate_callback the callback to invoke for each entry in the cache
177181
*/
178-
virtual absl::flat_hash_map<std::string, DnsHostInfoSharedPtr> hosts() PURE;
182+
virtual void iterateHostMap(IterateHostMapCb iterate_callback) PURE;
179183

180184
/**
181185
* Retrieve the DNS host info of a given host currently stored in the cache.

0 commit comments

Comments
 (0)
Please sign in to comment.