Skip to content

Commit c399e19

Browse files
committed
Merge branch 'xds_client_fail_on_data_errors' into xds_resource_timer_change
2 parents d4159bf + 4f6e36a commit c399e19

18 files changed

+2555
-1302
lines changed

CMakeLists.txt

Lines changed: 566 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

build_autogenerated.yaml

Lines changed: 160 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/core/xds/grpc/xds_server_grpc.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ namespace {
3636
constexpr absl::string_view kServerFeatureIgnoreResourceDeletion =
3737
"ignore_resource_deletion";
3838

39+
constexpr absl::string_view kServerFeatureFailOnDataErrors =
40+
"fail_on_data_errors";
41+
3942
constexpr absl::string_view kServerFeatureResourceTimerIsTransientFailure =
4043
"resource_timer_is_transient_error";
4144

@@ -49,6 +52,11 @@ bool GrpcXdsServer::IgnoreResourceDeletion() const {
4952
kServerFeatureIgnoreResourceDeletion)) != server_features_.end();
5053
}
5154

55+
bool GrpcXdsServer::FailOnDataErrors() const {
56+
return server_features_.find(std::string(kServerFeatureFailOnDataErrors)) !=
57+
server_features_.end();
58+
}
59+
5260
bool GrpcXdsServer::ResourceTimerIsTransientFailure() const {
5361
return server_features_.find(std::string(
5462
kServerFeatureResourceTimerIsTransientFailure)) !=
@@ -136,6 +144,7 @@ void GrpcXdsServer::JsonPostLoad(const Json& json, const JsonArgs& args,
136144
if (feature_json.type() == Json::Type::kString &&
137145
(feature_json.string() == kServerFeatureIgnoreResourceDeletion ||
138146
// FIXME: env var guard
147+
feature_json.string() == kServerFeatureFailOnDataErrors ||
139148
feature_json.string() ==
140149
kServerFeatureResourceTimerIsTransientFailure ||
141150
feature_json.string() == kServerFeatureTrustedXdsServer)) {

src/core/xds/grpc/xds_server_grpc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class GrpcXdsServer final : public XdsBootstrap::XdsServer {
3535
const std::string& server_uri() const override { return server_uri_; }
3636

3737
bool IgnoreResourceDeletion() const override;
38+
bool FailOnDataErrors() const override;
3839
bool ResourceTimerIsTransientFailure() const override;
3940

4041
bool TrustedXdsServer() const;

src/core/xds/xds_client/xds_bootstrap.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,13 @@ bool XdsFederationEnabled() {
3434
return parse_succeeded && parsed_value;
3535
}
3636

37+
// TODO(roth): Remove this once the feature passes interop tests.
38+
bool XdsDataErrorHandlingEnabled() {
39+
auto value = GetEnv("GRPC_EXPERIMENTAL_XDS_DATA_ERROR_HANDLING");
40+
if (!value.has_value()) return false;
41+
bool parsed_value;
42+
bool parse_succeeded = gpr_parse_bool_value(value->c_str(), &parsed_value);
43+
return parse_succeeded && parsed_value;
44+
}
45+
3746
} // namespace grpc_core

src/core/xds/xds_client/xds_bootstrap.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
namespace grpc_core {
2727

2828
bool XdsFederationEnabled();
29+
bool XdsDataErrorHandlingEnabled();
2930

3031
class XdsBootstrap {
3132
public:
@@ -47,7 +48,11 @@ class XdsBootstrap {
4748

4849
virtual const std::string& server_uri() const = 0;
4950

51+
// TODO(roth): Remove this method once the data error handling
52+
// feature passes interop tests.
5053
virtual bool IgnoreResourceDeletion() const = 0;
54+
55+
virtual bool FailOnDataErrors() const = 0;
5156
virtual bool ResourceTimerIsTransientFailure() const = 0;
5257

5358
virtual bool Equals(const XdsServer& other) const = 0;

src/core/xds/xds_client/xds_client.cc

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -991,8 +991,8 @@ void XdsClient::XdsChannel::AdsCall::ParseResource(
991991
if (authority_it == xds_client()->authority_state_map_.end()) {
992992
return; // Skip resource -- we don't have a subscription for it.
993993
}
994-
// Found authority, so look up type.
995994
AuthorityState& authority_state = authority_it->second;
995+
// Found authority, so look up type.
996996
auto type_it = authority_state.resource_map.find(context->type);
997997
if (type_it == authority_state.resource_map.end()) {
998998
return; // Skip resource -- we don't have a subscription for it.
@@ -1021,8 +1021,15 @@ void XdsClient::XdsChannel::AdsCall::ParseResource(
10211021
}
10221022
// Update resource state based on whether the resource is valid.
10231023
if (!decode_status.ok()) {
1024+
// If the fail_on_data_errors server feature is present, drop the
1025+
// existing cached resource, if any.
1026+
const bool drop_cached_resource = XdsDataErrorHandlingEnabled() &&
1027+
xds_channel()->server_.FailOnDataErrors();
10241028
resource_state.SetNacked(context->version, decode_status.message(),
1025-
context->update_time_);
1029+
context->update_time_, drop_cached_resource);
1030+
// If there is no cached resource (either because we didn't have one
1031+
// or because we just dropped it due to fail_on_data_errors), then notify
1032+
// via OnResourceChanged(); otherwise, notify via OnAmbientError().
10261033
if (!resource_state.HasResource()) {
10271034
xds_client()->NotifyWatchersOnResourceChanged(
10281035
resource_state.WatcherStatus(), resource_state.watchers(),
@@ -1220,7 +1227,9 @@ void XdsClient::XdsChannel::AdsCall::OnRecvMessage(absl::string_view payload) {
12201227
// that the resource does not exist. For that case, we rely on
12211228
// the request timeout instead.
12221229
if (!resource_state.HasResource()) continue;
1223-
if (xds_channel()->server_.IgnoreResourceDeletion()) {
1230+
if (XdsDataErrorHandlingEnabled()
1231+
? !xds_channel()->server_.FailOnDataErrors()
1232+
: xds_channel()->server_.IgnoreResourceDeletion()) {
12241233
if (!resource_state.ignored_deletion()) {
12251234
LOG(ERROR)
12261235
<< "[xds_client " << xds_client() << "] xds server "
@@ -1338,7 +1347,9 @@ void XdsClient::ResourceState::SetAcked(
13381347

13391348
void XdsClient::ResourceState::SetNacked(const std::string& version,
13401349
absl::string_view details,
1341-
Timestamp update_time) {
1350+
Timestamp update_time,
1351+
bool drop_cached_resource) {
1352+
if (drop_cached_resource) resource_.reset();
13421353
client_status_ = ClientResourceStatus::NACKED;
13431354
failed_version_ = version;
13441355
failed_details_ = details;

src/core/xds/xds_client/xds_client.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ class XdsClient : public DualRefCounted<XdsClient> {
309309
std::string serialized_proto, std::string version,
310310
Timestamp update_time);
311311
void SetNacked(const std::string& version, absl::string_view details,
312-
Timestamp update_time);
312+
Timestamp update_time, bool drop_cached_resource);
313313
void SetDoesNotExist();
314314
void SetTransientError(const std::string& details);
315315

0 commit comments

Comments
 (0)