From 3ad5b4f7026fd104ddf1bd381183b05b32dd4984 Mon Sep 17 00:00:00 2001 From: jean-christophe81 <98889244+jean-christophe81@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:57:09 +0100 Subject: [PATCH] enh(agent): new agent check_health (#1944) (#1970) * enh(agent): new agent check_health (#1944) * add agent check_health * fix installer test REFS: MON-156263 --- .github/scripts/agent_installer_test.ps1 | 2 +- agent/CMakeLists.txt | 1 + agent/doc/agent-doc.md | 6 +- agent/inc/com/centreon/agent/check.hh | 51 ++- agent/inc/com/centreon/agent/check_exec.hh | 6 +- agent/inc/com/centreon/agent/check_health.hh | 63 ++++ agent/inc/com/centreon/agent/config.hh | 28 ++ agent/inc/com/centreon/agent/drive_size.hh | 3 +- .../com/centreon/agent/native_check_base.hh | 3 +- .../centreon/agent/native_check_cpu_base.hh | 3 +- agent/inc/com/centreon/agent/scheduler.hh | 10 +- agent/inc/com/centreon/agent/version.hh.in | 2 + .../inc/com/centreon/agent/check_cpu.hh | 3 +- agent/native_linux/src/check_cpu.cc | 6 +- .../inc/com/centreon/agent/check_cpu.hh | 3 +- .../inc/com/centreon/agent/check_memory.hh | 3 +- .../inc/com/centreon/agent/check_service.hh | 3 +- .../inc/com/centreon/agent/check_uptime.hh | 3 +- agent/native_windows/src/check_cpu.cc | 6 +- agent/native_windows/src/check_memory.cc | 6 +- agent/native_windows/src/check_service.cc | 6 +- agent/native_windows/src/check_uptime.cc | 9 +- agent/precomp_inc/precomp.hh | 8 + agent/src/bireactor.cc | 6 +- agent/src/check.cc | 59 ++- agent/src/check_exec.cc | 11 +- agent/src/check_health.cc | 299 +++++++++++++++ agent/src/config.cc | 3 +- agent/src/config_win.cc | 3 +- agent/src/drive_size.cc | 11 +- agent/src/main.cc | 45 +-- agent/src/main_win.cc | 47 +-- agent/src/native_check_base.cc | 6 +- agent/src/native_check_cpu_base.cc | 6 +- agent/src/scheduler.cc | 36 +- agent/src/streaming_client.cc | 2 +- agent/test/CMakeLists.txt | 1 + agent/test/check_exec_test.cc | 37 +- agent/test/check_health_test.cc | 339 ++++++++++++++++++ agent/test/check_linux_cpu_test.cc | 25 +- agent/test/check_test.cc | 9 +- agent/test/check_uptime_test.cc | 27 +- agent/test/check_windows_cpu_test.cc | 18 +- agent/test/check_windows_memory_test.cc | 4 +- agent/test/check_windows_service_test.cc | 33 +- agent/test/drive_size_test.cc | 27 +- agent/test/scheduler_test.cc | 45 ++- tests/broker-engine/cma.robot | 68 ++++ 48 files changed, 1214 insertions(+), 187 deletions(-) create mode 100644 agent/inc/com/centreon/agent/check_health.hh create mode 100644 agent/src/check_health.cc create mode 100644 agent/test/check_health_test.cc diff --git a/.github/scripts/agent_installer_test.ps1 b/.github/scripts/agent_installer_test.ps1 index 814b455b609..e631c681e52 100644 --- a/.github/scripts/agent_installer_test.ps1 +++ b/.github/scripts/agent_installer_test.ps1 @@ -18,7 +18,7 @@ # This script test CMA installer in silent mode -Set-PSDebug -Trace 2 +#Set-PSDebug -Trace 2 function f_start_process([string]$sProcess, [string]$sArgs, [ref]$pOutPut) { <# diff --git a/agent/CMakeLists.txt b/agent/CMakeLists.txt index ddb71e5461b..fcb871ef427 100644 --- a/agent/CMakeLists.txt +++ b/agent/CMakeLists.txt @@ -114,6 +114,7 @@ set( SRC_COMMON ${SRC_DIR}/check.cc ${SRC_DIR}/check_exec.cc ${SRC_DIR}/drive_size.cc + ${SRC_DIR}/check_health.cc ${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.cc ${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.pb.cc ${SRC_DIR}/opentelemetry/proto/metrics/v1/metrics.pb.cc diff --git a/agent/doc/agent-doc.md b/agent/doc/agent-doc.md index 7d051131b16..062b962f3f5 100644 --- a/agent/doc/agent-doc.md +++ b/agent/doc/agent-doc.md @@ -123,4 +123,8 @@ So it works like that: * check_drive_size post query in drive_size_thread queue * drive_size_thread call os_fs_stats * drive_size_thread post result in io_context -* io_context calls check_drive_size::_completion_handler \ No newline at end of file +* io_context calls check_drive_size::_completion_handler + +### check_health +This little check sends agent's statistics to the poller. In order to do that, each check shares a common checks_statistics object. +This object is created by scheduler each time agent receives config from poller. This object contains last check interval and last check duration of each command. The first time it's executed, it can send unknown state if there is no other yet executed checks. \ No newline at end of file diff --git a/agent/inc/com/centreon/agent/check.hh b/agent/inc/com/centreon/agent/check.hh index e10da69bbc1..e4c1511b7ec 100644 --- a/agent/inc/com/centreon/agent/check.hh +++ b/agent/inc/com/centreon/agent/check.hh @@ -30,6 +30,44 @@ using engine_to_agent_request_ptr = using time_point = std::chrono::system_clock::time_point; using duration = std::chrono::system_clock::duration; +class checks_statistics { + struct check_stat { + std::string cmd_name; + duration last_check_interval; + duration last_check_duration; + }; + + using statistic_container = multi_index::multi_index_container< + check_stat, + multi_index::indexed_by< + multi_index::hashed_unique< + BOOST_MULTI_INDEX_MEMBER(check_stat, std::string, cmd_name)>, + boost::multi_index::ordered_non_unique, + boost::multi_index::ordered_non_unique>>; + + statistic_container _stats; + + public: + using pointer = std::shared_ptr; + + void add_interval_stat(const std::string& cmd_name, + const duration& check_interval); + + void add_duration_stat(const std::string& cmd_name, + const duration& check_interval); + + const auto& get_ordered_by_interval() const { return _stats.get<1>(); } + const auto& get_ordered_by_duration() const { return _stats.get<2>(); } + + size_t size() const { return _stats.size(); } +}; + /** * @brief nagios status values * @@ -90,6 +128,8 @@ class time_step { time_point value() const { return _start_point + _step_index * _step; } uint64_t get_step_index() const { return _step_index; } + + duration get_step() const { return _step; } }; /** @@ -130,6 +170,10 @@ class check : public std::enable_shared_from_this { unsigned _running_check_index = 0; completion_handler _completion_handler; + // statistics used by check_health + time_point _last_start; + checks_statistics::pointer _stat; + protected: std::shared_ptr _io_context; std::shared_ptr _logger; @@ -159,7 +203,8 @@ class check : public std::enable_shared_from_this { const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - completion_handler&& handler); + completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~check() = default; @@ -178,6 +223,8 @@ class check : public std::enable_shared_from_this { time_point get_start_expected() const { return _start_expected.value(); } + const time_step & get_raw_start_expected() const { return _start_expected; } + const std::string& get_service() const { return _service; } const std::string& get_command_name() const { return _command_name; } @@ -201,6 +248,8 @@ class check : public std::enable_shared_from_this { static std::optional get_bool(const std::string& cmd_name, const char* field_name, const rapidjson::Value& val); + + const checks_statistics& get_stats() const { return *_stat; } }; } // namespace com::centreon::agent diff --git a/agent/inc/com/centreon/agent/check_exec.hh b/agent/inc/com/centreon/agent/check_exec.hh index 49cdc2c04d2..37b932c1d6f 100644 --- a/agent/inc/com/centreon/agent/check_exec.hh +++ b/agent/inc/com/centreon/agent/check_exec.hh @@ -97,7 +97,8 @@ class check_exec : public check { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static std::shared_ptr load( const std::shared_ptr& io_context, @@ -108,7 +109,8 @@ class check_exec : public check { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); void start_check(const duration& timeout) override; diff --git a/agent/inc/com/centreon/agent/check_health.hh b/agent/inc/com/centreon/agent/check_health.hh new file mode 100644 index 00000000000..d62dafd3392 --- /dev/null +++ b/agent/inc/com/centreon/agent/check_health.hh @@ -0,0 +1,63 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_HEALTH_CHECK_HH +#define CENTREON_AGENT_HEALTH_CHECK_HH + +#include "check.hh" + +namespace com::centreon::agent { + +class check_health : public check { + unsigned _warning_check_interval; + unsigned _critical_check_interval; + unsigned _warning_check_duration; + unsigned _critical_check_duration; + + std::string _info_output; + + // we use this timer to delay measure in order to have some checks yet done + // when we will compute the first statistics + asio::system_timer _measure_timer; + + void _measure_timer_handler(const boost::system::error_code& err, + unsigned start_check_index); + + public: + check_health(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler, + const checks_statistics::pointer& stat); + + static void help(std::ostream& help_stream); + + void start_check(const duration& timeout) override; + + e_status compute(std::string* output, std::list* perfs); +}; + +} // namespace com::centreon::agent + +#endif // CENTREON_AGENT_HEALTH_CHECK_HH diff --git a/agent/inc/com/centreon/agent/config.hh b/agent/inc/com/centreon/agent/config.hh index 6808041f316..0a7669ccfb1 100644 --- a/agent/inc/com/centreon/agent/config.hh +++ b/agent/inc/com/centreon/agent/config.hh @@ -18,6 +18,7 @@ #ifndef CENTREON_AGENT_CONFIG_HH #define CENTREON_AGENT_CONFIG_HH +#include #include "com/centreon/common/grpc/grpc_config.hh" namespace com::centreon::agent { @@ -45,9 +46,36 @@ class config { bool _reverse_connection; unsigned _second_max_reconnect_backoff; + static std::unique_ptr _global_conf; + public: + static const config& load(const std::string& path) { + _global_conf = std::make_unique(path); + return *_global_conf; + } + + /** + * @brief used only for UT + * + * @param reverse_connection + * @return const config& + */ + static const config& load(bool reverse_connection) { + _global_conf = std::make_unique(reverse_connection); + return *_global_conf; + } + + static const config& instance() { return *_global_conf; } + config(const std::string& path); + /** + * @brief used only for UT + * + * @param reverse_connection + */ + config(bool reverse_connection) : _reverse_connection(reverse_connection) {} + const std::string& get_endpoint() const { return _endpoint; } spdlog::level::level_enum get_log_level() const { return _log_level; }; log_type get_log_type() const { return _log_type; } diff --git a/agent/inc/com/centreon/agent/drive_size.hh b/agent/inc/com/centreon/agent/drive_size.hh index 8b33cf10c74..94c4d3d4609 100644 --- a/agent/inc/com/centreon/agent/drive_size.hh +++ b/agent/inc/com/centreon/agent/drive_size.hh @@ -255,7 +255,8 @@ class check_drive_size : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~check_drive_size() = default; diff --git a/agent/inc/com/centreon/agent/native_check_base.hh b/agent/inc/com/centreon/agent/native_check_base.hh index a5c1d48ab8d..158cad781d1 100644 --- a/agent/inc/com/centreon/agent/native_check_base.hh +++ b/agent/inc/com/centreon/agent/native_check_base.hh @@ -135,7 +135,8 @@ class native_check_base : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> shared_from_this() { return std::static_pointer_cast>( diff --git a/agent/inc/com/centreon/agent/native_check_cpu_base.hh b/agent/inc/com/centreon/agent/native_check_cpu_base.hh index 32131bb30d8..0460cbf008c 100644 --- a/agent/inc/com/centreon/agent/native_check_cpu_base.hh +++ b/agent/inc/com/centreon/agent/native_check_cpu_base.hh @@ -221,7 +221,8 @@ class native_check_cpu : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~native_check_cpu() = default; diff --git a/agent/inc/com/centreon/agent/scheduler.hh b/agent/inc/com/centreon/agent/scheduler.hh index 6e18e473581..623b31bb617 100644 --- a/agent/inc/com/centreon/agent/scheduler.hh +++ b/agent/inc/com/centreon/agent/scheduler.hh @@ -42,7 +42,8 @@ class scheduler : public std::enable_shared_from_this { const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, - check::completion_handler&&)>; + check::completion_handler&&, + const checks_statistics::pointer& /*stat*/)>; private: using check_queue = @@ -164,7 +165,8 @@ class scheduler : public std::enable_shared_from_this { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& conf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); engine_to_agent_request_ptr get_last_message_to_agent() const { return _conf; @@ -187,10 +189,10 @@ scheduler::scheduler( const std::shared_ptr& config, sender&& met_sender, chck_builder&& builder) - : _metric_sender(met_sender), - _io_context(io_context), + : _io_context(io_context), _logger(logger), _supervised_host(supervised_host), + _metric_sender(met_sender), _send_timer(*io_context), _check_timer(*io_context), _check_builder(builder), diff --git a/agent/inc/com/centreon/agent/version.hh.in b/agent/inc/com/centreon/agent/version.hh.in index f4c2d2e0136..205199267dd 100644 --- a/agent/inc/com/centreon/agent/version.hh.in +++ b/agent/inc/com/centreon/agent/version.hh.in @@ -25,4 +25,6 @@ constexpr unsigned CENTREON_AGENT_VERSION_MAJOR = @COLLECT_MAJOR@; constexpr unsigned CENTREON_AGENT_VERSION_MINOR = @COLLECT_MINOR@.0; constexpr unsigned CENTREON_AGENT_VERSION_PATCH = @COLLECT_PATCH@.0; +#define CENTREON_AGENT_VERSION "@COLLECT_MAJOR@.@COLLECT_MINOR@.@COLLECT_PATCH@" + #endif // !CCE_VERSION_HH diff --git a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh index 34750e2edad..9481f61fa0a 100644 --- a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh @@ -86,7 +86,8 @@ class check_cpu const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static void help(std::ostream& help_stream); diff --git a/agent/native_linux/src/check_cpu.cc b/agent/native_linux/src/check_cpu.cc index 8de1a025756..1959d1acd2f 100644 --- a/agent/native_linux/src/check_cpu.cc +++ b/agent/native_linux/src/check_cpu.cc @@ -183,7 +183,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_cpu( io_context, logger, @@ -194,7 +195,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) + std::move(handler), + stat) { com::centreon::common::rapidjson_helper arg(args); diff --git a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh index 806a6cfca7b..f1d8421293d 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh @@ -132,7 +132,8 @@ class check_cpu const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); ~check_cpu(); diff --git a/agent/native_windows/inc/com/centreon/agent/check_memory.hh b/agent/native_windows/inc/com/centreon/agent/check_memory.hh index f5b9c6aaaba..76a11b928a2 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_memory.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_memory.hh @@ -82,7 +82,8 @@ class check_memory : public native_check_base< const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> diff --git a/agent/native_windows/inc/com/centreon/agent/check_service.hh b/agent/native_windows/inc/com/centreon/agent/check_service.hh index 4625ab0e906..d35de66a8f3 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_service.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_service.hh @@ -177,7 +177,8 @@ class check_service const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> diff --git a/agent/native_windows/inc/com/centreon/agent/check_uptime.hh b/agent/native_windows/inc/com/centreon/agent/check_uptime.hh index 93748c73017..3a43d32f1c1 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_uptime.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_uptime.hh @@ -41,7 +41,8 @@ class check_uptime : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static void help(std::ostream& help_stream); diff --git a/agent/native_windows/src/check_cpu.cc b/agent/native_windows/src/check_cpu.cc index b3dd6d60ab5..96ccc641ae3 100644 --- a/agent/native_windows/src/check_cpu.cc +++ b/agent/native_windows/src/check_cpu.cc @@ -394,7 +394,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_cpu( io_context, logger, @@ -405,7 +406,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) + std::move(handler), + stat) { try { diff --git a/agent/native_windows/src/check_memory.cc b/agent/native_windows/src/check_memory.cc index 3f46188d43c..ec94ba4c4da 100644 --- a/agent/native_windows/src/check_memory.cc +++ b/agent/native_windows/src/check_memory.cc @@ -376,7 +376,8 @@ check_memory::check_memory(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_base(io_context, logger, first_start_expected, @@ -386,7 +387,8 @@ check_memory::check_memory(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) { + std::move(handler), + stat) { _no_percent_unit = "B"; if (args.IsObject()) { for (auto member_iter = args.MemberBegin(); member_iter != args.MemberEnd(); diff --git a/agent/native_windows/src/check_service.cc b/agent/native_windows/src/check_service.cc index 931bd49c255..f62679ab656 100644 --- a/agent/native_windows/src/check_service.cc +++ b/agent/native_windows/src/check_service.cc @@ -531,7 +531,8 @@ check_service::check_service( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_base(io_context, logger, first_start_expected, @@ -541,7 +542,8 @@ check_service::check_service( cmd_line, args, cnf, - std::move(handler)), + std::move(handler), + stat), _filter(args), _enumerator(_enumerator_constructor()) { if (!args.IsObject()) { diff --git a/agent/native_windows/src/check_uptime.cc b/agent/native_windows/src/check_uptime.cc index 6aa7dd83e58..31256ac3e26 100644 --- a/agent/native_windows/src/check_uptime.cc +++ b/agent/native_windows/src/check_uptime.cc @@ -53,7 +53,8 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -62,7 +63,8 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _second_warning_threshold(0), _second_critical_threshold(0) { com::centreon::common::rapidjson_helper arg(args); @@ -91,6 +93,9 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, * @param timeout unused */ void check_uptime::start_check([[maybe_unused]] const duration& timeout) { + if (!_start_check(timeout)) { + return; + } std::string output; common::perfdata perf; e_status status = compute(GetTickCount64(), &output, &perf); diff --git a/agent/precomp_inc/precomp.hh b/agent/precomp_inc/precomp.hh index df066d6fe5a..e53dd163dca 100644 --- a/agent/precomp_inc/precomp.hh +++ b/agent/precomp_inc/precomp.hh @@ -50,9 +50,17 @@ namespace asio = boost::asio; #include #include +#include +#include +#include +#include #include #include #include +#include "com/centreon/exceptions/msg_fmt.hh" + +namespace multi_index = boost::multi_index; + #endif diff --git a/agent/src/bireactor.cc b/agent/src/bireactor.cc index e26346be55c..712d81c903f 100644 --- a/agent/src/bireactor.cc +++ b/agent/src/bireactor.cc @@ -42,11 +42,11 @@ bireactor::bireactor( const std::string_view& class_name, const std::string& peer) : _write_pending(false), - _alive(true), _class_name(class_name), _peer(peer), _io_context(io_context), - _logger(logger) { + _logger(logger), + _alive(true) { SPDLOG_LOGGER_DEBUG(_logger, "create {} this={:p} peer:{}", _class_name, static_cast(this), _peer); } @@ -204,4 +204,4 @@ template class bireactor< template class bireactor< ::grpc::ServerBidiReactor>; -} // namespace com::centreon::agent \ No newline at end of file +} // namespace com::centreon::agent diff --git a/agent/src/check.cc b/agent/src/check.cc index fa6c28da6ed..a730ad4a4c4 100644 --- a/agent/src/check.cc +++ b/agent/src/check.cc @@ -16,12 +16,46 @@ * For more information : contact@centreon.com */ -#include "com/centreon/exceptions/msg_fmt.hh" - #include "check.hh" using namespace com::centreon::agent; +/** + * @brief update check interval of a check + * + * @param cmd_name name of command (entered by user in centreon UI) + * @param last_check_interval + */ +void checks_statistics::add_interval_stat(const std::string& cmd_name, + const duration& last_check_interval) { + auto it = _stats.find(cmd_name); + if (it == _stats.end()) { + _stats.insert({cmd_name, last_check_interval, {}}); + } else { + _stats.get<0>().modify(it, [last_check_interval](check_stat& it) { + it.last_check_interval = last_check_interval; + }); + } +} + +/** + * @brief update check duration of a check + * + * @param cmd_name name of command (entered by user in centreon UI) + * @param last_check_duration + */ +void checks_statistics::add_duration_stat(const std::string& cmd_name, + const duration& last_check_duration) { + auto it = _stats.find(cmd_name); + if (it == _stats.end()) { + _stats.insert({cmd_name, {}, last_check_duration}); + } else { + _stats.get<0>().modify(it, [last_check_duration](check_stat& it) { + it.last_check_duration = last_check_duration; + }); + } +} + const std::array check::status_label = { "OK: ", "WARNING: ", "CRITICAL: ", "UNKNOWN: "}; @@ -47,16 +81,18 @@ check::check(const std::shared_ptr& io_context, const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - completion_handler&& handler) + completion_handler&& handler, + const checks_statistics::pointer& stat) : _start_expected(first_start_expected, check_interval), _service(serv), _command_name(command_name), _command_line(cmd_line), _conf(cnf), - _io_context(io_context), - _logger(logger), _time_out_timer(*io_context), - _completion_handler(handler) {} + _completion_handler(handler), + _stat(stat), + _io_context(io_context), + _logger(logger) {} /** * @brief start timeout timer and init some flags used by timeout and completion @@ -87,6 +123,15 @@ bool check::_start_check(const duration& timeout) { _running_check = true; _start_timeout_timer(timeout); SPDLOG_LOGGER_TRACE(_logger, "start check for service {}", _service); + + time_point now = std::chrono::system_clock::now(); + + if (_last_start.time_since_epoch().count() != 0) { + _stat->add_interval_stat(_command_name, now - _last_start); + } + + _last_start = now; + return true; } @@ -148,6 +193,8 @@ void check::on_completion( _time_out_timer.cancel(); _running_check = false; ++_running_check_index; + _stat->add_duration_stat(_command_name, + std::chrono::system_clock::now() - _last_start); _completion_handler(shared_from_this(), status, perfdata, outputs); } } diff --git a/agent/src/check_exec.cc b/agent/src/check_exec.cc index 281a4eaf9e0..27a1250f9aa 100644 --- a/agent/src/check_exec.cc +++ b/agent/src/check_exec.cc @@ -122,7 +122,8 @@ check_exec::check_exec(const std::shared_ptr& io_context, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -131,7 +132,8 @@ check_exec::check_exec(const std::shared_ptr& io_context, cmd_name, cmd_line, cnf, - std::move(handler)) {} + std::move(handler), + stat) {} /** * @brief create and initialize a check_exec object (don't use constructor) @@ -158,10 +160,11 @@ std::shared_ptr check_exec::load( const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { std::shared_ptr ret = std::make_shared( io_context, logger, first_start_expected, check_interval, serv, cmd_name, - cmd_line, cnf, std::move(handler)); + cmd_line, cnf, std::move(handler), stat); ret->_init(); return ret; } diff --git a/agent/src/check_health.cc b/agent/src/check_health.cc new file mode 100644 index 00000000000..2e9668acb21 --- /dev/null +++ b/agent/src/check_health.cc @@ -0,0 +1,299 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "check_health.hh" +#include +#include "com/centreon/common/rapidjson_helper.hh" +#include "config.hh" +#include "version.hh" + +using namespace com::centreon::agent; + +/** + * @brief Construct a new check_health object + * + * @param io_context + * @param logger + * @param first_start_expected + * @param check_interval + * @param serv + * @param cmd_name + * @param cmd_line + * @param args + * @param cnf + * @param handler + */ +check_health::check_health(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler, + const checks_statistics::pointer& stat) + : check(io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + cnf, + std::move(handler), + stat), + _measure_timer(*io_context) { + com::centreon::common::rapidjson_helper arg(args); + try { + if (args.IsObject()) { + _warning_check_interval = arg.get_unsigned("warning-interval", 0); + _critical_check_interval = arg.get_unsigned("critical-interval", 0); + _warning_check_duration = arg.get_unsigned("warning-runtime", 0); + _critical_check_duration = arg.get_unsigned("critical-runtime", 0); + } + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(_logger, "check_health, fail to parse arguments: {}", + e.what()); + throw; + } + + if (config::instance().use_reverse_connection()) { + _info_output = "Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current " + "configuration: {} checks - Average runtime: {}s"; + } else { + _info_output = "Version: " CENTREON_AGENT_VERSION + " - Connection mode: Agent initiated - Current " + "configuration: {} checks - Average runtime: {}s"; + } +} + +/** + * @brief start a timer to do the job + * + * @param timeout unused + */ +void check_health::start_check([[maybe_unused]] const duration& timeout) { + if (!_start_check(timeout)) { + return; + } + + // we wait a little in order to have statistics check_interval/2 + _measure_timer.expires_from_now(get_raw_start_expected().get_step() / 2); + _measure_timer.async_wait( + [me = shared_from_this(), start_check_index = _get_running_check_index()]( + const boost::system::error_code& err) mutable { + std::static_pointer_cast(me)->_measure_timer_handler( + err, start_check_index); + }); +} + +/** + * @brief timer handler that do the job + * + * @param err set if canceled + * @param start_check_index used by on_completion + */ +void check_health::_measure_timer_handler(const boost::system::error_code& err, + unsigned start_check_index) { + if (err) { + return; + } + std::string output; + std::list perf; + e_status status = compute(&output, &perf); + + on_completion(start_check_index, status, perf, {output}); +} + +/** + * @brief calculate status, output and perfdata from statistics + * + * @param ms_uptime + * @param output + * @param perfs + * @return e_status + */ +e_status check_health::compute(std::string* output, + std::list* perf) { + e_status ret = e_status::ok; + + const checks_statistics& stats = get_stats(); + + if (stats.size() == 0) { + *output = "UNKNOWN: No check yet performed"; + return e_status::unknown; + } + + absl::flat_hash_set written_to_output; + + unsigned average_runtime = 0; + for (const auto& stat : stats.get_ordered_by_duration()) { + average_runtime += std::chrono::duration_cast( + stat.last_check_duration) + .count(); + } + + auto append_state_to_output = [&](e_status status, std::string* temp_output, + const auto& iter) { + if (written_to_output.insert(iter->cmd_name).second) { + if (temp_output->empty()) { + *temp_output = status_label[status]; + } else { + temp_output->push_back(','); + temp_output->push_back(' '); + } + if (status > ret) { + ret = status; + } + absl::StrAppend(temp_output, iter->cmd_name, " runtime:", + std::chrono::duration_cast( + iter->last_check_duration) + .count(), + "s interval:", + std::chrono::duration_cast( + iter->last_check_interval) + .count(), + "s"); + } + }; + + std::string critical_output; + if (_critical_check_duration > 0) { + auto critical_duration = std::chrono::seconds(_critical_check_duration); + for (auto iter = stats.get_ordered_by_duration().rbegin(); + iter != stats.get_ordered_by_duration().rend() && + iter->last_check_duration > critical_duration; + ++iter) { + append_state_to_output(e_status::critical, &critical_output, iter); + } + } + + if (_critical_check_interval > 0) { + auto critical_interval = std::chrono::seconds(_critical_check_interval); + for (auto iter = stats.get_ordered_by_interval().rbegin(); + iter != stats.get_ordered_by_interval().rend() && + iter->last_check_interval > critical_interval; + ++iter) { + append_state_to_output(e_status::critical, &critical_output, iter); + } + } + + std::string warning_output; + if (_warning_check_duration) { + auto warning_duration = std::chrono::seconds(_warning_check_duration); + for (auto iter = stats.get_ordered_by_duration().rbegin(); + iter != stats.get_ordered_by_duration().rend() && + iter->last_check_duration > warning_duration; + ++iter) { + append_state_to_output(e_status::warning, &warning_output, iter); + } + } + + if (_warning_check_interval) { + auto warning_interval = std::chrono::seconds(_warning_check_interval); + for (auto iter = stats.get_ordered_by_interval().rbegin(); + iter != stats.get_ordered_by_interval().rend() && + iter->last_check_interval > warning_interval; + ++iter) { + append_state_to_output(e_status::warning, &warning_output, iter); + } + } + + unsigned max_check_interval = + std::chrono::duration_cast( + stats.get_ordered_by_interval().rbegin()->last_check_interval) + .count(); + unsigned max_check_duration = + std::chrono::duration_cast( + stats.get_ordered_by_duration().rbegin()->last_check_duration) + .count(); + + auto& interval_perf = perf->emplace_back(); + interval_perf.name("interval"); + interval_perf.unit("s"); + interval_perf.value(max_check_interval); + if (_warning_check_interval > 0) { + interval_perf.warning_low(0); + interval_perf.warning(_warning_check_interval); + } + if (_critical_check_interval > 0) { + interval_perf.critical_low(0); + interval_perf.critical(_critical_check_interval); + } + + auto& duration_perf = perf->emplace_back(); + duration_perf.name("runtime"); + duration_perf.unit("s"); + duration_perf.value(max_check_duration); + if (_warning_check_duration > 0) { + duration_perf.warning_low(0); + duration_perf.warning(_warning_check_duration); + } + if (_critical_check_duration > 0) { + duration_perf.critical_low(0); + duration_perf.critical(_critical_check_duration); + } + + if (ret != e_status::ok) { + if (!critical_output.empty()) { + output->append(critical_output); + if (!warning_output.empty()) { + *output += " - "; + output->append(warning_output); + } + } else if (!warning_output.empty()) { + output->append(warning_output); + } + *output += " - "; + } else { + *output = "OK: "; + } + fmt::format_to(std::back_inserter(*output), _info_output, get_stats().size(), + average_runtime / get_stats().size()); + + return ret; +} + +void check_health::help(std::ostream& help_stream) { + help_stream << R"( +- health params: + - warning-interval (s): warning if a check interval is greater than this value + - critical-interval (s): critical if a check interval is greater than this value + - warning-runtime (s): warning if a check duration is greater than this value + - critical-runtime (s): critical if a check duration is greater than this value + An example of configuration: + { + "check": "health", + "args": { + "warning-runtime": 30, + "critical-runtime": 50, + "warning-interval": 60, + "critical-interval": "90" + } + } + Examples of output: + CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 runtime:20s interval:10s - Version: 24.11.0 - Connection mode: Poller initiated - Current configuration: 2 checks - Average runtime: 22s + Metrics: + runtime + interval + +)"; +} diff --git a/agent/src/config.cc b/agent/src/config.cc index 47098e0628c..a6cb759e614 100644 --- a/agent/src/config.cc +++ b/agent/src/config.cc @@ -19,7 +19,6 @@ #include #include "com/centreon/common/rapidjson_helper.hh" -#include "com/centreon/exceptions/msg_fmt.hh" #include "config.hh" using namespace com::centreon::agent; @@ -103,6 +102,8 @@ const std::string_view config::config_schema(R"( )"); +std::unique_ptr config::_global_conf; + config::config(const std::string& path) { static common::json_validator validator(config_schema); rapidjson::Document file_content_d; diff --git a/agent/src/config_win.cc b/agent/src/config_win.cc index 64d822b04dc..8abe509d395 100644 --- a/agent/src/config_win.cc +++ b/agent/src/config_win.cc @@ -18,11 +18,12 @@ #include -#include "com/centreon/exceptions/msg_fmt.hh" #include "config.hh" using namespace com::centreon::agent; +std::unique_ptr config::_global_conf; + /** * @brief Construct a new config::config object * diff --git a/agent/src/drive_size.cc b/agent/src/drive_size.cc index 6f27507d6e2..f2e58b14a1d 100644 --- a/agent/src/drive_size.cc +++ b/agent/src/drive_size.cc @@ -17,10 +17,7 @@ */ #include "drive_size.hh" -#include "check.hh" -#include "com/centreon/common/perfdata.hh" #include "com/centreon/common/rapidjson_helper.hh" -#include "com/centreon/exceptions/msg_fmt.hh" using namespace com::centreon::agent; @@ -336,7 +333,8 @@ check_drive_size::check_drive_size( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -345,7 +343,8 @@ check_drive_size::check_drive_size( cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _filter(std::make_shared(args)), _prct_threshold(false), _free_threshold(false), @@ -590,7 +589,7 @@ void check_drive_size::thread_kill() { void check_drive_size::help(std::ostream& help_stream) { help_stream << R"( -- storage params:" +- storage params: unit (default %): unit of threshold. If different from % threshold are in bytes free (default used): true: threshold is applied on free space and service become warning if free sapce is lower than threshold false: threshold is applied on used space and service become warning if used space is higher than threshold diff --git a/agent/src/main.cc b/agent/src/main.cc index 8617d55f9f6..aac284607ea 100644 --- a/agent/src/main.cc +++ b/agent/src/main.cc @@ -21,6 +21,8 @@ #include #include "check_cpu.hh" +#include "check_health.hh" + #include "config.hh" #include "drive_size.hh" #include "streaming_client.hh" @@ -107,12 +109,14 @@ int main(int argc, char* argv[]) { argv[0], config::config_schema); std::cout << std::endl << "Native checks options:" << std::endl; check_cpu::help(std::cout); + check_health::help(std::cout); return 1; } - std::unique_ptr conf; try { - conf = std::make_unique(argv[1]); + // mandatory to convert arg to a string to ensure of the choice of load + // method by compiler + config::load(std::string(argv[1])); } catch (const std::exception& e) { SPDLOG_ERROR("fail to parse config file {}: {}", argv[1], e.what()); return 1; @@ -126,20 +130,21 @@ int main(int argc, char* argv[]) { const std::string logger_name = "centreon-monitoring-agent"; - if (conf->get_log_type() == config::to_file) { + const config& conf = config::instance(); + if (conf.get_log_type() == config::to_file) { try { - if (!conf->get_log_file().empty()) { - if (conf->get_log_files_max_size() > 0 && - conf->get_log_files_max_number() > 0) { + if (!conf.get_log_file().empty()) { + if (conf.get_log_files_max_size() > 0 && + conf.get_log_files_max_number() > 0) { g_logger = spdlog::rotating_logger_mt( - logger_name, conf->get_log_file(), - conf->get_log_files_max_size() * 0x100000, - conf->get_log_files_max_number()); + logger_name, conf.get_log_file(), + conf.get_log_files_max_size() * 0x100000, + conf.get_log_files_max_number()); } else { SPDLOG_INFO( "no log-max-file-size option or no log-max-files option provided " "=> logs will not be rotated by centagent"); - g_logger = spdlog::basic_logger_mt(logger_name, conf->get_log_file()); + g_logger = spdlog::basic_logger_mt(logger_name, conf.get_log_file()); } } else { SPDLOG_ERROR( @@ -147,14 +152,14 @@ int main(int argc, char* argv[]) { g_logger = spdlog::stdout_color_mt(logger_name); } } catch (const std::exception& e) { - SPDLOG_CRITICAL("Can't log to {}: {}", conf->get_log_file(), e.what()); + SPDLOG_CRITICAL("Can't log to {}: {}", conf.get_log_file(), e.what()); return 2; } } else { g_logger = spdlog::stdout_color_mt(logger_name); } - g_logger->set_level(conf->get_log_level()); + g_logger->set_level(conf.get_log_level()); g_logger->flush_on(spdlog::level::warn); @@ -174,23 +179,23 @@ int main(int argc, char* argv[]) { _signals.async_wait(signal_handler); grpc_conf = std::make_shared( - conf->get_endpoint(), conf->use_encryption(), - read_file(conf->get_public_cert_file()), - read_file(conf->get_private_key_file()), - read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30, conf->get_second_max_reconnect_backoff()); + conf.get_endpoint(), conf.use_encryption(), + read_file(conf.get_public_cert_file()), + read_file(conf.get_private_key_file()), + read_file(conf.get_ca_certificate_file()), conf.get_ca_name(), true, 30, + conf.get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); return -1; } - if (conf->use_reverse_connection()) { + if (conf.use_reverse_connection()) { _streaming_server = streaming_server::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } else { _streaming_client = streaming_client::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } try { diff --git a/agent/src/main_win.cc b/agent/src/main_win.cc index 099edcdeee1..09128e057b4 100644 --- a/agent/src/main_win.cc +++ b/agent/src/main_win.cc @@ -18,6 +18,7 @@ #include #include "check_cpu.hh" +#include "check_health.hh" #include "check_memory.hh" #include "check_service.hh" #include "check_uptime.hh" @@ -125,6 +126,7 @@ void show_help() { check_uptime::help(std::cout); check_drive_size::help(std::cout); check_service::help(std::cout); + check_health::help(std::cout); } /** @@ -135,11 +137,10 @@ void show_help() { * @return int exit status returned to command line (0 success) */ int _main(bool service_start) { - const char* registry_path = "SOFTWARE\\Centreon\\" SERVICE_NAME; + std::string registry_path = "SOFTWARE\\Centreon\\" SERVICE_NAME; - std::unique_ptr conf; try { - conf = std::make_unique(registry_path); + config::load(registry_path); } catch (const std::exception& e) { SPDLOG_ERROR("fail to read conf from registry {}: {}", registry_path, e.what()); @@ -163,37 +164,39 @@ int _main(bool service_start) { g_logger = std::make_shared("", sink); }; + const config& conf = config::instance(); + try { - if (conf->get_log_type() == config::to_file) { - if (!conf->get_log_file().empty()) { - if (conf->get_log_files_max_size() > 0 && - conf->get_log_files_max_number() > 0) { + if (conf.get_log_type() == config::to_file) { + if (!conf.get_log_file().empty()) { + if (conf.get_log_files_max_size() > 0 && + conf.get_log_files_max_number() > 0) { g_logger = spdlog::rotating_logger_mt( - logger_name, conf->get_log_file(), - conf->get_log_files_max_size() * 0x100000, - conf->get_log_files_max_number()); + logger_name, conf.get_log_file(), + conf.get_log_files_max_size() * 0x100000, + conf.get_log_files_max_number()); } else { SPDLOG_INFO( "no log-max-file-size option or no log-max-files option provided " "=> logs will not be rotated by centagent"); - g_logger = spdlog::basic_logger_mt(logger_name, conf->get_log_file()); + g_logger = spdlog::basic_logger_mt(logger_name, conf.get_log_file()); } } else { SPDLOG_ERROR( "log-type=file needs the option log-file => log to event log"); create_event_logger(); } - } else if (conf->get_log_type() == config::to_stdout) { + } else if (conf.get_log_type() == config::to_stdout) { g_logger = spdlog::stdout_color_mt(logger_name); } else { create_event_logger(); } } catch (const std::exception& e) { - SPDLOG_CRITICAL("Can't log to {}: {}", conf->get_log_file(), e.what()); + SPDLOG_CRITICAL("Can't log to {}: {}", conf.get_log_file(), e.what()); return 2; } - g_logger->set_level(conf->get_log_level()); + g_logger->set_level(conf.get_log_level()); g_logger->flush_on(spdlog::level::warn); @@ -206,23 +209,23 @@ int _main(bool service_start) { _signals.async_wait(signal_handler); grpc_conf = std::make_shared( - conf->get_endpoint(), conf->use_encryption(), - read_file(conf->get_public_cert_file()), - read_file(conf->get_private_key_file()), - read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30, conf->get_second_max_reconnect_backoff()); + conf.get_endpoint(), conf.use_encryption(), + read_file(conf.get_public_cert_file()), + read_file(conf.get_private_key_file()), + read_file(conf.get_ca_certificate_file()), conf.get_ca_name(), true, 30, + conf.get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); return -1; } - if (conf->use_reverse_connection()) { + if (conf.use_reverse_connection()) { _streaming_server = streaming_server::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } else { _streaming_client = streaming_client::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } try { diff --git a/agent/src/native_check_base.cc b/agent/src/native_check_base.cc index 6f005c02f57..593f89b4069 100644 --- a/agent/src/native_check_base.cc +++ b/agent/src/native_check_base.cc @@ -94,7 +94,8 @@ native_check_base::native_check_base( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -103,7 +104,8 @@ native_check_base::native_check_base( cmd_name, cmd_line, cnf, - std::move(handler)) {} + std::move(handler), + stat) {} /** * @brief start a measure diff --git a/agent/src/native_check_cpu_base.cc b/agent/src/native_check_cpu_base.cc index d646c0fda2d..01f04e50023 100644 --- a/agent/src/native_check_cpu_base.cc +++ b/agent/src/native_check_cpu_base.cc @@ -228,7 +228,8 @@ native_check_cpu::native_check_cpu( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -237,7 +238,8 @@ native_check_cpu::native_check_cpu( cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _nb_core(std::thread::hardware_concurrency()), _cpu_detailed(false), diff --git a/agent/src/scheduler.cc b/agent/src/scheduler.cc index 8718a148183..08741b12a2c 100644 --- a/agent/src/scheduler.cc +++ b/agent/src/scheduler.cc @@ -17,7 +17,10 @@ */ #include "scheduler.hh" +#include +#include "check.hh" #include "check_cpu.hh" +#include "check_health.hh" #ifdef _WIN32 #include "check_memory.hh" #include "check_service.hh" @@ -174,6 +177,9 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { conf->config().check_interval()); if (nb_check > 0) { + // raz stats in order to not keep statistics of deleted checks + checks_statistics::pointer stat = std::make_shared(); + duration time_between_check = std::chrono::microseconds(conf->config().check_interval() * 1000000) / nb_check; @@ -201,7 +207,8 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { const std::list& perfdata, const std::list& outputs) { me->_check_handler(check, status, perfdata, outputs); - }); + }, + stat); last_inserted_iter = _waiting_check_queue.emplace_hint( last_inserted_iter, check_to_schedule); next += time_between_check; @@ -303,10 +310,10 @@ void scheduler::stop() { * @param outputs */ void scheduler::_store_result_in_metrics( - const check::pointer& check, - unsigned status, - const std::list& perfdata, - const std::list& outputs) { + [[maybe_unused]] const check::pointer& check, + [[maybe_unused]] unsigned status, + [[maybe_unused]] const std::list& perfdata, + [[maybe_unused]] const std::list& outputs) { // auto scope_metrics = // get_scope_metrics(check->get_host(), check->get_service()); // unsigned now = std::chrono::duration_cast( @@ -544,7 +551,8 @@ std::shared_ptr scheduler::default_check_builder( const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& conf, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { using namespace std::literals; // test native checks where cmd_lin is a json try { @@ -562,24 +570,28 @@ std::shared_ptr scheduler::default_check_builder( if (check_type == "cpu_percentage"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); + } else if (check_type == "health"sv) { + return std::make_shared( + io_context, logger, first_start_expected, check_interval, service, + cmd_name, cmd_line, *args, conf, std::move(handler), stat); #ifdef _WIN32 } else if (check_type == "uptime"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "storage"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "memory"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "service"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); #endif } else { throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, @@ -588,6 +600,6 @@ std::shared_ptr scheduler::default_check_builder( } catch (const std::exception&) { return check_exec::load(io_context, logger, first_start_expected, check_interval, service, cmd_name, cmd_line, conf, - std::move(handler)); + std::move(handler), stat); } } diff --git a/agent/src/streaming_client.cc b/agent/src/streaming_client.cc index ab38cc67717..d93b7d93a65 100644 --- a/agent/src/streaming_client.cc +++ b/agent/src/streaming_client.cc @@ -191,7 +191,7 @@ void streaming_client::_send(const std::shared_ptr& request) { * @param request */ void streaming_client::on_incomming_request( - const std::shared_ptr& caller, + const std::shared_ptr& caller [[maybe_unused]], const std::shared_ptr& request) { // incoming request is used in main thread _io_context->post([request, sched = _sched]() { sched->update(request); }); diff --git a/agent/test/CMakeLists.txt b/agent/test/CMakeLists.txt index 6d5152ee6ec..6150262e440 100644 --- a/agent/test/CMakeLists.txt +++ b/agent/test/CMakeLists.txt @@ -20,6 +20,7 @@ set( SRC_COMMON check_test.cc check_exec_test.cc drive_size_test.cc + check_health_test.cc scheduler_test.cc test_main.cc ) diff --git a/agent/test/check_exec_test.cc b/agent/test/check_exec_test.cc index 60f49bc77eb..23966c702b4 100644 --- a/agent/test/check_exec_test.cc +++ b/agent/test/check_exec_test.cc @@ -17,6 +17,8 @@ */ #include +#include +#include "check.hh" #include "check_exec.hh" @@ -47,9 +49,11 @@ TEST(check_exec_test, echo) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { { std::lock_guard l(mut); @@ -57,7 +61,8 @@ TEST(check_exec_test, echo) { outputs = output; } cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::unique_lock l(mut); @@ -75,14 +80,17 @@ TEST(check_exec_test, timeout) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { status = statuss; outputs = output; cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); int pid = check->get_pid(); @@ -119,9 +127,11 @@ TEST(check_exec_test, bad_command) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { { std::lock_guard l(mut); @@ -131,7 +141,8 @@ TEST(check_exec_test, bad_command) { SPDLOG_INFO("end of {}", command_line); std::this_thread::sleep_for(std::chrono::milliseconds(50)); cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::unique_lock l(mut); @@ -156,14 +167,16 @@ TEST(check_exec_test, recurse_not_lock) { g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int, - const std::list& perfdata, - const std::list& output) { + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& output) { if (!cpt) { ++cpt; caller->start_check(std::chrono::seconds(1)); } else cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::mutex mut; diff --git a/agent/test/check_health_test.cc b/agent/test/check_health_test.cc new file mode 100644 index 00000000000..339241d8ab9 --- /dev/null +++ b/agent/test/check_health_test.cc @@ -0,0 +1,339 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include +#include +#include "check.hh" +#include "com/centreon/common/rapidjson_helper.hh" + +#include "check_health.hh" +#include "config.hh" +#include "version.hh" + +extern std::shared_ptr g_io_context; + +using namespace com::centreon::agent; +using namespace std::string_literals; +using namespace com::centreon::common::literals; +using namespace std::chrono_literals; + +TEST(check_health_test, no_threshold_no_reverse) { + config::load(false); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : ""})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::ok); + EXPECT_EQ(output, "OK: Version: " CENTREON_AGENT_VERSION + " - Connection mode: Agent initiated - Current " + "configuration: 2 checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, no_threshold_reverse) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : ""})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::ok); + EXPECT_EQ(output, "OK: Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current " + "configuration: 2 checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_1) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "9", "critical-interval" : "14"})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 9); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_2) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "9", "critical-interval" : "14", "warning-runtime": 19, "critical-runtime":24})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 24); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 9); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_3) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : "14", "warning-runtime": 19})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_4) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : "16", "warning-runtime": 19})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::warning); + EXPECT_EQ(output, + "WARNING: command2 runtime:25s interval:15s, command1 runtime:20s " + "interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 16); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} diff --git a/agent/test/check_linux_cpu_test.cc b/agent/test/check_linux_cpu_test.cc index 37bb7495786..f407a0089c4 100644 --- a/agent/test/check_linux_cpu_test.cc +++ b/agent/test/check_linux_cpu_test.cc @@ -157,7 +157,8 @@ TEST(proc_stat_file_test, no_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -166,8 +167,6 @@ TEST(proc_stat_file_test, no_threshold) { ASSERT_EQ(perfs.size(), 5); - constexpr float nan_to_cmp = NAN; - for (const auto& perf : perfs) { ASSERT_TRUE(std::isnan(perf.critical_low())); ASSERT_TRUE(std::isnan(perf.critical())); @@ -222,8 +221,6 @@ TEST(proc_stat_file_test, no_threshold_detailed) { std::string output; std::list perfs; - static const char* conf_doc = R"({"cpu-detailed":true})"; - using namespace com::centreon::common::literals; rapidjson::Document check_args = R"({"cpu-detailed":"true"})"_json; @@ -234,7 +231,8 @@ TEST(proc_stat_file_test, no_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -366,7 +364,8 @@ TEST(proc_stat_file_test, threshold_nodetailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -447,7 +446,8 @@ TEST(proc_stat_file_test, threshold_nodetailed2) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -506,7 +506,8 @@ TEST(proc_stat_file_test, threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -580,7 +581,8 @@ TEST(proc_stat_file_test, threshold_detailed2) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -654,7 +656,8 @@ TEST(proc_stat_file_test, threshold_detailed3) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); diff --git a/agent/test/check_test.cc b/agent/test/check_test.cc index 71ec5999f0e..ca81bdb37e4 100644 --- a/agent/test/check_test.cc +++ b/agent/test/check_test.cc @@ -60,7 +60,8 @@ class dummy_check : public check { command_name, command_line, nullptr, - handler), + handler, + std::make_shared()), _command_duration(command_duration), _command_timer(*g_io_context) {} }; @@ -80,7 +81,8 @@ TEST(check_test, timeout) { serv, cmd_name, cmd_line, std::chrono::milliseconds(500), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& outputs) { status = statuss; if (outputs.size() == 1) { @@ -117,7 +119,8 @@ TEST(check_test, no_timeout) { serv, cmd_name, cmd_line, std::chrono::milliseconds(100), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& outputs) { status = statuss; if (outputs.size() == 1) { diff --git a/agent/test/check_uptime_test.cc b/agent/test/check_uptime_test.cc index df851bf88d1..d08756a387e 100644 --- a/agent/test/check_uptime_test.cc +++ b/agent/test/check_uptime_test.cc @@ -39,7 +39,8 @@ TEST(native_check_uptime, ok) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -69,7 +70,8 @@ TEST(native_check_uptime, ok_m) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -99,7 +101,8 @@ TEST(native_check_uptime, ok_h) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -129,7 +132,8 @@ TEST(native_check_uptime, ok_d) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -159,7 +163,8 @@ TEST(native_check_uptime, ok_w) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -189,7 +194,8 @@ TEST(native_check_uptime, warning) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -219,7 +225,8 @@ TEST(native_check_uptime, warning_bis) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -249,7 +256,8 @@ TEST(native_check_uptime, critical) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -278,7 +286,8 @@ TEST(native_check_uptime, critical_bis) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; diff --git a/agent/test/check_windows_cpu_test.cc b/agent/test/check_windows_cpu_test.cc index 67b41cd7abf..cce9d371679 100644 --- a/agent/test/check_windows_cpu_test.cc +++ b/agent/test/check_windows_cpu_test.cc @@ -83,7 +83,8 @@ TEST(native_check_cpu_windows, output_no_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); @@ -145,7 +146,8 @@ TEST(native_check_cpu_windows, output_no_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); @@ -240,7 +242,8 @@ TEST(native_check_cpu_windows, output_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ( @@ -313,7 +316,8 @@ TEST(native_check_cpu_windows, output_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ( @@ -463,7 +467,8 @@ TEST(native_check_cpu_windows, compare_kernel_dph) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); rapidjson::Document pdh_check_args = R"({"use-nt-query-system-information":"false" })"_json; @@ -475,7 +480,8 @@ TEST(native_check_cpu_windows, compare_kernel_dph) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); auto first_nt = nt_checker.get_cpu_time_snapshot(true); auto first_pdh = pdh_checker.get_cpu_time_snapshot(true); diff --git a/agent/test/check_windows_memory_test.cc b/agent/test/check_windows_memory_test.cc index fe0fecd5545..cd5ca009d7f 100644 --- a/agent/test/check_windows_memory_test.cc +++ b/agent/test/check_windows_memory_test.cc @@ -20,7 +20,6 @@ #include -#include "com/centreon/common/perfdata.hh" #include "com/centreon/common/rapidjson_helper.hh" #include "check_memory.hh" @@ -51,7 +50,8 @@ class test_check : public check_memory { [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}) {} + const std::list& outputs) {}, + std::make_shared()) {} std::shared_ptr> diff --git a/agent/test/check_windows_service_test.cc b/agent/test/check_windows_service_test.cc index dea07374f7f..65facd72938 100644 --- a/agent/test/check_windows_service_test.cc +++ b/agent/test/check_windows_service_test.cc @@ -131,7 +131,8 @@ TEST(check_service, service_no_threshold_all_running) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -199,7 +200,8 @@ TEST(check_service, service_no_threshold_one_by_state) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -264,7 +266,8 @@ TEST(check_service, service_filter_exclude_all_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -327,7 +330,8 @@ TEST(check_service, service_filter_allow_some_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -396,7 +400,8 @@ TEST(check_service, service_filter_exclude_some_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -467,7 +472,8 @@ TEST(check_service, service_filter_allow_some_service_warning_running) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -538,7 +544,8 @@ TEST(check_service, service_filter_allow_some_service_warning_stopped) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -608,7 +615,8 @@ TEST(check_service, service_filter_allow_some_service_critical_state) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -678,7 +686,8 @@ TEST(check_service, service_filter_start_auto_true) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -747,7 +756,8 @@ TEST(check_service, service_filter_start_auto_false) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -818,7 +828,8 @@ TEST(check_service, "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); diff --git a/agent/test/drive_size_test.cc b/agent/test/drive_size_test.cc index ddb0c9c2690..f9757f990c0 100644 --- a/agent/test/drive_size_test.cc +++ b/agent/test/drive_size_test.cc @@ -130,7 +130,8 @@ TEST_F(drive_size_test, test_fs_filter1) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -197,7 +198,8 @@ TEST_F(drive_size_test, test_fs_filter_percent) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -264,7 +266,8 @@ TEST_F(drive_size_test, test_fs_filter2) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -319,7 +322,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_2) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -383,7 +387,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_3) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -441,7 +446,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_4) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); { @@ -506,7 +512,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_5) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -566,7 +573,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_6) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -626,7 +634,8 @@ TEST_F(drive_size_test, test_fs_filter_free_percent) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); diff --git a/agent/test/scheduler_test.cc b/agent/test/scheduler_test.cc index 63354e85246..b2bd15bc81b 100644 --- a/agent/test/scheduler_test.cc +++ b/agent/test/scheduler_test.cc @@ -45,7 +45,8 @@ class tempo_check : public check { const engine_to_agent_request_ptr& cnf, int command_exit_status, duration completion_delay, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, exp, @@ -54,7 +55,8 @@ class tempo_check : public check { cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -72,7 +74,8 @@ class tempo_check : public check { _completion_timer.async_wait([me = shared_from_this(), this, check_running_index = _get_running_check_index()]( - const boost::system::error_code& err) { + [[maybe_unused]] const boost::system:: + error_code& err) { SPDLOG_TRACE("end of completion timer for serv {}", get_service()); me->on_completion( check_running_index, _command_exit_status, @@ -145,7 +148,9 @@ TEST_F(scheduler_test, no_config) { duration /* check interval */, const std::string& /*service*/, const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, - check::completion_handler&&) { return std::shared_ptr(); }); + check::completion_handler&&, const checks_statistics::pointer&) { + return std::shared_ptr(); + }); std::weak_ptr weak_shed(sched); sched.reset(); @@ -188,11 +193,12 @@ TEST_F(scheduler_test, correct_schedule) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(50), std::move(handler)); + std::chrono::milliseconds(50), std::move(handler), stat); }); std::this_thread::sleep_for(std::chrono::milliseconds(10100)); @@ -261,11 +267,12 @@ TEST_F(scheduler_test, time_out) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(1500), std::move(handler)); + std::chrono::milliseconds(1500), std::move(handler), stat); }); std::unique_lock l(m); export_cond.wait(l); @@ -301,7 +308,6 @@ TEST_F(scheduler_test, time_out) { TEST_F(scheduler_test, correct_output_examplar) { std::shared_ptr exported_request; std::condition_variable export_cond; - time_point now = std::chrono::system_clock::now(); std::shared_ptr sched = scheduler::load( g_io_context, spdlog::default_logger(), "my_host", create_conf(2, 1, 2, 10, 1), @@ -315,11 +321,12 @@ TEST_F(scheduler_test, correct_output_examplar) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(10), std::move(handler)); + std::chrono::milliseconds(10), std::move(handler), stat); }); std::mutex m; std::unique_lock l(m); @@ -398,7 +405,8 @@ class concurent_check : public check { const engine_to_agent_request_ptr& cnf, int command_exit_status, duration completion_delay, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, exp, @@ -407,7 +415,8 @@ class concurent_check : public check { cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -424,7 +433,8 @@ class concurent_check : public check { _completion_timer.async_wait([me = shared_from_this(), this, check_running_index = _get_running_check_index()]( - const boost::system::error_code& err) { + [[maybe_unused]] const boost::system:: + error_code& err) { active_checks.erase(this); checked.insert(this); SPDLOG_TRACE("end of completion timer for serv {}", get_service()); @@ -448,14 +458,15 @@ TEST_F(scheduler_test, max_concurent) { std::shared_ptr sched = scheduler::load( g_io_context, spdlog::default_logger(), "my_host", create_conf(200, 10, 1, 10, 1), - [&](const std::shared_ptr& req) {}, + [&]([[maybe_unused]] const std::shared_ptr& req) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point start_expected, duration check_interval, const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, @@ -463,7 +474,7 @@ TEST_F(scheduler_test, max_concurent) { 10) /*the - 10 is for some delay in test execution from start expected*/ , - std::move(handler)); + std::move(handler), stat); }); // to many tests to be completed in eleven second diff --git a/tests/broker-engine/cma.robot b/tests/broker-engine/cma.robot index a7209a9852b..9678c3538d1 100644 --- a/tests/broker-engine/cma.robot +++ b/tests/broker-engine/cma.robot @@ -675,6 +675,74 @@ BEOTEL_CENTREON_AGENT_CHECK_NATIVE_SERVICE Should Be True ${result} resources table not updated +BEOTEL_CENTREON_AGENT_CHECK_HEALTH + [Documentation] agent check health and we expect to get it in check result + [Tags] broker engine opentelemetry MON-147934 + Ctn Config Engine ${1} ${2} ${2} + Ctn Add Otl ServerModule + ... 0 + ... {"otel_server":{"host": "0.0.0.0","port": 4317},"max_length_grpc_log":0,"centreon_agent":{"check_interval":10, "export_period":15}} + Ctn Config Add Otl Connector + ... 0 + ... OTEL connector + ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name + Ctn Engine Config Replace Value In Services ${0} service_1 check_command cpu_check + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check + Ctn Set Services Passive 0 service_[1-2] + + + Ctn Engine Config Add Command ${0} cpu_check {"check": "cpu_percentage"} OTEL connector + Ctn Engine Config Add Command ${0} health_check {"check": "health"} OTEL connector + Ctn Engine Config Add Command ${0} health_check_warning {"check": "health", "args":{"warning-interval": "5"} } OTEL connector + Ctn Engine Config Add Command ${0} health_check_critical {"check": "health", "args":{"warning-interval": "5", "critical-interval": "6"} } OTEL connector + + Ctn Engine Config Set Value 0 log_level_checks trace + + Ctn Clear Db metrics + + Ctn Config Broker central + Ctn Config Broker module + Ctn Config Broker rrd + Ctn Config Centreon Agent + Ctn Broker Config Log central sql trace + + Ctn Config BBDO3 1 + Ctn Clear Retention + + ${start} Ctn Get Round Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Start Agent + + # Let's wait for the otel server start + Ctn Wait For Otel Server To Be Ready ${start} + + Log To Console service_1 and service_2 must be ok + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 0 120 HARD + Should Be True ${result} resources table not updated for service_1 + + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 0 60 HARD + Should Be True ${result} resources table not updated for service_2 + + ${metrics_list} Create List cpu.utilization.percentage 0#core.cpu.utilization.percentage + ${result} Ctn Compare Metrics Of Service 1 ${metrics_list} 30 + Should Be True ${result} cpu metrics not updated + + ${metrics_list} Create List runtime interval + ${result} Ctn Compare Metrics Of Service 2 ${metrics_list} 30 + Should Be True ${result} health metrics not updated + + Log To Console service_2 must be warning + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check_warning + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 1 60 ANY + Should Be True ${result} resources table not updated for service_2 + + Log To Console service_2 must be critical + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check_critical + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 2 60 ANY + Should Be True ${result} resources table not updated for service_2 *** Keywords ***