Skip to content

Commit

Permalink
config: adding update time statistic. (#10220)
Browse files Browse the repository at this point in the history
Signed-off-by: Jakub Sobon <mumak@google.com>
  • Loading branch information
mum4k authored Mar 10, 2020
1 parent c85bf32 commit 139f926
Show file tree
Hide file tree
Showing 18 changed files with 166 additions and 54 deletions.
1 change: 1 addition & 0 deletions docs/root/configuration/overview/mgmt_server.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,6 @@ The following statistics are generated for all subscriptions.
update_success, Counter, Total API fetches completed successfully
update_failure, Counter, Total API fetches that failed because of network errors
update_rejected, Counter, Total API fetches that failed because of schema/validation errors
update_time, Gauge, Timestamp of the last successful API fetch attempt as milliseconds since the epoch. Refreshed even after a trivial configuration reload that contained no configuration changes.
version, Gauge, Hash of the contents from the last successful API fetch
control_plane.connected_state, Gauge, A boolean (1 for connected and 0 for disconnected) that indicates the current connection state with management server
1 change: 1 addition & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Version history
* admin: added support for displaying ip address subject alternate names in :ref:`certs<operations_admin_interface_certs>` end point.
* buffer: force copy when appending small slices to OwnedImpl buffer to avoid fragmentation.
* config: use type URL to select an extension whenever the config type URL (or its previous versions) uniquely identify a typed extension, see :ref:`extension configuration <config_overview_extension_configuration>`.
* config: added stat :ref:`update_time <config_cluster_manager_cds>`.
* datasource: added retry policy for remote async data source.
* dns: the STRICT_DNS cluster now only resolves to 0 hosts if DNS resolution successfully returns 0 hosts.
* dns: added support for :ref:`dns_failure_refresh_rate <envoy_api_field_config.common.dynamic_forward_proxy.v2alpha.DnsCacheConfig.dns_failure_refresh_rate>` for the :ref:`dns cache <envoy_api_msg_config.common.dynamic_forward_proxy.v2alpha.DnsCacheConfig>` to set the DNS refresh rate during failures.
Expand Down
1 change: 1 addition & 0 deletions include/envoy/config/subscription.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ using SubscriptionPtr = std::unique_ptr<Subscription>;
COUNTER(update_failure) \
COUNTER(update_rejected) \
COUNTER(update_success) \
GAUGE(update_time, NeverImport) \
GAUGE(version, NeverImport)

/**
Expand Down
6 changes: 6 additions & 0 deletions source/common/common/utility.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class SpecifierConstantValues {
};

using SpecifierConstants = ConstSingleton<SpecifierConstantValues>;
using UnsignedMilliseconds = std::chrono::duration<uint64_t, std::milli>;

} // namespace

Expand Down Expand Up @@ -215,6 +216,11 @@ bool DateUtil::timePointValid(MonotonicTime time_point) {
.count() != 0;
}

uint64_t DateUtil::nowToMilliseconds(TimeSource& time_source) {
const SystemTime& now = time_source.systemTime();
return std::chrono::time_point_cast<UnsignedMilliseconds>(now).time_since_epoch().count();
}

const char StringUtil::WhitespaceChars[] = " \t\f\v\n\r";

const char* StringUtil::strtoull(const char* str, uint64_t& out, int base) {
Expand Down
6 changes: 6 additions & 0 deletions source/common/common/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ class DateUtil {
* @return whether a time_point contains a valid, not default constructed time.
*/
static bool timePointValid(MonotonicTime time_point);

/**
* @param time_source time keeping source.
* @return uint64_t the number of milliseconds since the epoch.
*/
static uint64_t nowToMilliseconds(TimeSource& time_source);
};

/**
Expand Down
3 changes: 3 additions & 0 deletions source/common/config/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ envoy_cc_library(
"//include/envoy/event:dispatcher_interface",
"//include/envoy/filesystem:filesystem_interface",
"//source/common/common:minimal_logger_lib",
"//source/common/common:utility_lib",
"//source/common/config:utility_lib",
"//source/common/protobuf",
"//source/common/protobuf:message_validator_lib",
Expand Down Expand Up @@ -127,6 +128,7 @@ envoy_cc_library(
"//include/envoy/config:subscription_interface",
"//include/envoy/upstream:cluster_manager_interface",
"//source/common/common:minimal_logger_lib",
"//source/common/common:utility_lib",
"//source/common/memory:utils_lib",
"//source/common/protobuf",
"@envoy_api//envoy/api/v2:pkg_cc_proto",
Expand Down Expand Up @@ -178,6 +180,7 @@ envoy_cc_library(
"//include/envoy/event:dispatcher_interface",
"//source/common/buffer:buffer_lib",
"//source/common/common:minimal_logger_lib",
"//source/common/common:utility_lib",
"//source/common/config:utility_lib",
"//source/common/http:headers_lib",
"//source/common/http:rest_api_fetcher_lib",
Expand Down
2 changes: 2 additions & 0 deletions source/common/config/filesystem_subscription_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "envoy/service/discovery/v3/discovery.pb.h"

#include "common/common/macros.h"
#include "common/common/utility.h"
#include "common/config/utility.h"
#include "common/protobuf/protobuf.h"
#include "common/protobuf/utility.h"
Expand Down Expand Up @@ -51,6 +52,7 @@ void FilesystemSubscriptionImpl::refresh() {
MessageUtil::loadFromFile(path_, message, validation_visitor_, api_);
config_update_available = true;
callbacks_.onConfigUpdate(message.resources(), message.version_info());
stats_.update_time_.set(DateUtil::nowToMilliseconds(api_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(message.version_info()));
stats_.update_success_.inc();
ENVOY_LOG(debug, "Filesystem config update accepted for {}: {}", path_, message.DebugString());
Expand Down
3 changes: 3 additions & 0 deletions source/common/config/grpc_subscription_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "common/common/assert.h"
#include "common/common/logger.h"
#include "common/common/utility.h"
#include "common/grpc/common.h"
#include "common/protobuf/protobuf.h"
#include "common/protobuf/utility.h"
Expand Down Expand Up @@ -61,6 +62,7 @@ void GrpcSubscriptionImpl::onConfigUpdate(
callbacks_.onConfigUpdate(resources, version_info);
stats_.update_success_.inc();
stats_.update_attempt_.inc();
stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(version_info));
ENVOY_LOG(debug, "gRPC config for {} accepted with {} resources with version {}", type_url_,
resources.size(), version_info);
Expand All @@ -74,6 +76,7 @@ void GrpcSubscriptionImpl::onConfigUpdate(
stats_.update_attempt_.inc();
callbacks_.onConfigUpdate(added_resources, removed_resources, system_version_info);
stats_.update_success_.inc();
stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(system_version_info));
}

Expand Down
2 changes: 2 additions & 0 deletions source/common/config/http_subscription_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "common/buffer/buffer_impl.h"
#include "common/common/assert.h"
#include "common/common/macros.h"
#include "common/common/utility.h"
#include "common/config/utility.h"
#include "common/config/version_converter.h"
#include "common/http/headers.h"
Expand Down Expand Up @@ -86,6 +87,7 @@ void HttpSubscriptionImpl::parseResponse(const Http::ResponseMessage& response)
try {
callbacks_.onConfigUpdate(message.resources(), message.version_info());
request_.set_version_info(message.version_info());
stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(request_.version_info()));
stats_.update_success_.inc();
} catch (const EnvoyException& e) {
Expand Down
1 change: 1 addition & 0 deletions test/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ envoy_cc_test(
],
deps = [
"//source/common/common:utility_lib",
"//test/test_common:simulated_time_system_lib",
"//test/test_common:test_time_lib",
"//test/test_common:utility_lib",
],
Expand Down
8 changes: 8 additions & 0 deletions test/common/common/utility_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "common/common/utility.h"

#include "test/test_common/simulated_time_system.h"
#include "test/test_common/test_time.h"
#include "test/test_common/utility.h"

Expand Down Expand Up @@ -104,6 +105,13 @@ TEST(DateUtil, All) {
EXPECT_TRUE(DateUtil::timePointValid(test_time.timeSystem().systemTime()));
}

TEST(DateUtil, NowToMilliseconds) {
Event::SimulatedTimeSystem test_time;
const SystemTime time_with_millis(std::chrono::seconds(12345) + std::chrono::milliseconds(67));
test_time.setSystemTime(time_with_millis);
EXPECT_EQ(12345067, DateUtil::nowToMilliseconds(test_time));
}

TEST(InputConstMemoryStream, All) {
{
InputConstMemoryStream istream{nullptr, 0};
Expand Down
1 change: 1 addition & 0 deletions test/common/config/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ envoy_cc_test_library(
deps = [
"//source/common/config:utility_lib",
"//test/mocks/stats:stats_mocks",
"//test/test_common:simulated_time_system_lib",
],
)

Expand Down
32 changes: 28 additions & 4 deletions test/common/config/filesystem_subscription_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ class FilesystemSubscriptionImplTest : public testing::Test,
// Validate that the client can recover from bad JSON responses.
TEST_F(FilesystemSubscriptionImplTest, BadJsonRecovery) {
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0));
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
EXPECT_CALL(callbacks_,
onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason::UpdateRejected, _));
updateFile(";!@#badjso n");
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0));
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster1"}, "0", true);
EXPECT_TRUE(statsAre(3, 1, 0, 1, 0, 7148434200721666028));
EXPECT_TRUE(statsAre(3, 1, 0, 1, 0, TEST_TIME_MILLIS, 7148434200721666028));
}

// Validate that a file that is initially available results in a successful update.
TEST_F(FilesystemSubscriptionImplTest, InitialFile) {
updateFile("{\"versionInfo\": \"0\", \"resources\": []}", false);
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 1, 0, 0, 0, 7148434200721666028));
EXPECT_TRUE(statsAre(1, 1, 0, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));
}

// Validate that if we fail to set a watch, we get a sensible warning.
Expand All @@ -53,6 +53,30 @@ TEST(MiscFilesystemSubscriptionImplTest, BadWatch) {
EnvoyException, "bad path");
}

// Validate that the update_time statistic isn't changed when the configuration update gets
// rejected.
TEST_F(FilesystemSubscriptionImplTest, UpdateTimeNotChangedOnUpdateReject) {
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
EXPECT_CALL(callbacks_,
onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason::UpdateRejected, _));
updateFile(";!@#badjso n");
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0, 0));
}

// Validate that the update_time statistic is changed after a trivial configuration update
// (update that resulted in no change).
TEST_F(FilesystemSubscriptionImplTest, UpdateTimeChangedOnUpdateSuccess) {
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster1"}, "0", true);
EXPECT_TRUE(statsAre(2, 1, 0, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));
// Advance the simulated time.
simTime().setSystemTime(SystemTime(std::chrono::milliseconds(TEST_TIME_MILLIS + 1)));
deliverConfigUpdate({"cluster0", "cluster1"}, "0", true);
EXPECT_TRUE(statsAre(3, 2, 0, 0, 0, TEST_TIME_MILLIS + 1, 7148434200721666028));
}

} // namespace
} // namespace Config
} // namespace Envoy
8 changes: 5 additions & 3 deletions test/common/config/filesystem_subscription_test_harness.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class FilesystemSubscriptionTestHarness : public SubscriptionTestHarness {
public:
FilesystemSubscriptionTestHarness()
: path_(TestEnvironment::temporaryPath("eds.json")),
api_(Api::createApiForTest(stats_store_)), dispatcher_(api_->allocateDispatcher()),
api_(Api::createApiForTest(stats_store_, simTime())),
dispatcher_(api_->allocateDispatcher()),
subscription_(*dispatcher_, path_, callbacks_, stats_, validation_visitor_, *api_) {}

~FilesystemSubscriptionTestHarness() override { TestEnvironment::removePath(path_); }
Expand Down Expand Up @@ -84,11 +85,12 @@ class FilesystemSubscriptionTestHarness : public SubscriptionTestHarness {
}

AssertionResult statsAre(uint32_t attempt, uint32_t success, uint32_t rejected, uint32_t failure,
uint32_t init_fetch_timeout, uint64_t version) override {
uint32_t init_fetch_timeout, uint64_t update_time,
uint64_t version) override {
// The first attempt always fail unless there was a file there to begin with.
return SubscriptionTestHarness::statsAre(attempt, success, rejected,
failure + (file_at_start_ ? 0 : 1), init_fetch_timeout,
version);
update_time, version);
}

void expectConfigUpdateFailed() override { stats_.update_failure_.inc(); }
Expand Down
46 changes: 34 additions & 12 deletions test/common/config/grpc_subscription_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ TEST_F(GrpcSubscriptionImplTest, StreamCreationFailure) {
EXPECT_CALL(random_, random());
EXPECT_CALL(*timer_, enableTimer(_, _));
subscription_->start({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0));
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0, 0));
// Ensure this doesn't cause an issue by sending a request, since we don't
// have a gRPC stream.
subscription_->updateResourceInterest({"cluster2"});
Expand All @@ -32,51 +32,73 @@ TEST_F(GrpcSubscriptionImplTest, StreamCreationFailure) {

expectSendMessage({"cluster2"}, "", true);
timer_cb_();
EXPECT_TRUE(statsAre(3, 0, 0, 1, 0, 0));
EXPECT_TRUE(statsAre(3, 0, 0, 1, 0, 0, 0));
verifyControlPlaneStats(1);
}

// Validate that the client can recover from a remote stream closure via retry.
TEST_F(GrpcSubscriptionImplTest, RemoteStreamClose) {
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0));
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
// onConfigUpdateFailed() should not be called for gRPC stream connection failure
EXPECT_CALL(callbacks_,
onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason::ConnectionFailure, _))
.Times(0);
EXPECT_CALL(*timer_, enableTimer(_, _));
EXPECT_CALL(random_, random());
mux_->grpcStreamForTest().onRemoteClose(Grpc::Status::WellKnownGrpcStatus::Canceled, "");
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0));
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0, 0));
verifyControlPlaneStats(0);

// Retry and succeed.
EXPECT_CALL(*async_client_, startRaw(_, _, _, _)).WillOnce(Return(&async_stream_));
expectSendMessage({"cluster0", "cluster1"}, "", true);
timer_cb_();
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0));
EXPECT_TRUE(statsAre(2, 0, 0, 1, 0, 0, 0));
}

// Validate that When the management server gets multiple requests for the same version, it can
// ignore later ones. This allows the nonce to be used.
TEST_F(GrpcSubscriptionImplTest, RepeatedNonce) {
InSequence s;
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0));
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
// First with the initial, empty version update to "0".
updateResourceInterest({"cluster2"});
EXPECT_TRUE(statsAre(2, 0, 0, 0, 0, 0));
EXPECT_TRUE(statsAre(2, 0, 0, 0, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster2"}, "0", false);
EXPECT_TRUE(statsAre(3, 0, 1, 0, 0, 0));
EXPECT_TRUE(statsAre(3, 0, 1, 0, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster2"}, "0", true);
EXPECT_TRUE(statsAre(4, 1, 1, 0, 0, 7148434200721666028));
EXPECT_TRUE(statsAre(4, 1, 1, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));
// Now with version "0" update to "1".
updateResourceInterest({"cluster3"});
EXPECT_TRUE(statsAre(5, 1, 1, 0, 0, 7148434200721666028));
EXPECT_TRUE(statsAre(5, 1, 1, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));
deliverConfigUpdate({"cluster3"}, "1", false);
EXPECT_TRUE(statsAre(6, 1, 2, 0, 0, 7148434200721666028));
EXPECT_TRUE(statsAre(6, 1, 2, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));
deliverConfigUpdate({"cluster3"}, "1", true);
EXPECT_TRUE(statsAre(7, 2, 2, 0, 0, 13237225503670494420U));
EXPECT_TRUE(statsAre(7, 2, 2, 0, 0, TEST_TIME_MILLIS, 13237225503670494420U));
}

TEST_F(GrpcSubscriptionImplTest, UpdateTimeNotChangedOnUpdateReject) {
InSequence s;
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster2"}, "0", false);
EXPECT_TRUE(statsAre(2, 0, 1, 0, 0, 0, 0));
}

TEST_F(GrpcSubscriptionImplTest, UpdateTimeChangedOnUpdateSuccess) {
InSequence s;
startSubscription({"cluster0", "cluster1"});
EXPECT_TRUE(statsAre(1, 0, 0, 0, 0, 0, 0));
deliverConfigUpdate({"cluster0", "cluster2"}, "0", true);
EXPECT_TRUE(statsAre(2, 1, 0, 0, 0, TEST_TIME_MILLIS, 7148434200721666028));

// Advance the simulated time and verify that a trivial update (no change) also changes the update
// time.
simTime().setSystemTime(SystemTime(std::chrono::milliseconds(TEST_TIME_MILLIS + 1)));
deliverConfigUpdate({"cluster0", "cluster2"}, "0", true);
EXPECT_TRUE(statsAre(2, 2, 0, 0, 0, TEST_TIME_MILLIS + 1, 7148434200721666028));
}

} // namespace
Expand Down
Loading

0 comments on commit 139f926

Please sign in to comment.