Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(system_monitor): move top command execution to a timer #948

Merged
merged 4 commits into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@ class ProcessMonitor : public rclcpp::Node
*/
explicit ProcessMonitor(const rclcpp::NodeOptions & options);

/**
* @brief Update the diagnostic state
*/
void update();

protected:
using DiagStatus = diagnostic_msgs::msg::DiagnosticStatus;

Expand Down Expand Up @@ -110,6 +105,11 @@ class ProcessMonitor : public rclcpp::Node
std::vector<std::shared_ptr<DiagTask>> * tasks, const std::string & message,
const std::string & error_command, const std::string & content);

/**
* @brief timer callback to execute top command
*/
void onTimer();

diagnostic_updater::Updater updater_; //!< @brief Updater class which advertises to /diagnostics

char hostname_[HOST_NAME_MAX + 1]; //!< @brief host name
Expand All @@ -118,7 +118,11 @@ class ProcessMonitor : public rclcpp::Node
std::vector<std::shared_ptr<DiagTask>>
load_tasks_; //!< @brief list of diagnostics tasks for high load procs
std::vector<std::shared_ptr<DiagTask>>
memory_tasks_; //!< @brief list of diagnostics tasks for high memory procs
memory_tasks_; //!< @brief list of diagnostics tasks for high memory procs
rclcpp::TimerBase::SharedPtr timer_; //!< @brief timer to execute top command
std::string top_output_; //!< @brief output from top command
bool is_top_error_; //!< @brief flag if an error occurs
double elapsed_ms_; //!< @brief Execution time of top command
};

#endif // SYSTEM_MONITOR__PROCESS_MONITOR__PROCESS_MONITOR_HPP_
1 change: 1 addition & 0 deletions system/system_monitor/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<depend>rclcpp</depend>
<depend>rclcpp_components</depend>
<depend>std_msgs</depend>
<depend>tier4_autoware_utils</depend>
<depend>tier4_external_api_msgs</depend>

<exec_depend>chrony</exec_depend>
Expand Down
62 changes: 41 additions & 21 deletions system/system_monitor/src/process_monitor/process_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include "system_monitor/system_monitor_utility.hpp"

#include <tier4_autoware_utils/system/stop_watch.hpp>

#include <fmt/format.h>

#include <memory>
Expand All @@ -33,6 +35,8 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options)
updater_(this),
num_of_procs_(declare_parameter<int>("num_of_procs", 5))
{
using namespace std::literals::chrono_literals;

int index;

gethostname(hostname_, sizeof(hostname_));
Expand All @@ -50,34 +54,23 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options)
memory_tasks_.push_back(task);
updater_.add(*task);
}
}

void ProcessMonitor::update() { updater_.force_update(); }
// Start timer to execute top command
timer_ = rclcpp::create_timer(this, get_clock(), 1s, std::bind(&ProcessMonitor::onTimer, this));
ito-san marked this conversation as resolved.
Show resolved Hide resolved
}

void ProcessMonitor::monitorProcesses(diagnostic_updater::DiagnosticStatusWrapper & stat)
{
// Remember start time to measure elapsed time
const auto t_start = SystemMonitorUtility::startMeasurement();
std::string str = top_output_;

bp::ipstream is_err;
bp::ipstream is_out;
std::ostringstream os;

// Get processes
bp::child c("top -bn1 -o %CPU -w 128", bp::std_out > is_out, bp::std_err > is_err);
c.wait();
if (c.exit_code() != 0) {
is_err >> os.rdbuf();
if (is_top_error_) {
stat.summary(DiagStatus::ERROR, "top error");
stat.add("top", os.str().c_str());
setErrorContent(&load_tasks_, "top error", "top", os.str().c_str());
setErrorContent(&memory_tasks_, "top error", "top", os.str().c_str());
stat.add("top", str);
setErrorContent(&load_tasks_, "top error", "top", str);
setErrorContent(&memory_tasks_, "top error", "top", str);
return;
}

is_out >> os.rdbuf();
std::string str = os.str();

// Get task summary
getTasksSummary(stat, str);
// Remove header
Expand All @@ -89,8 +82,7 @@ void ProcessMonitor::monitorProcesses(diagnostic_updater::DiagnosticStatusWrappe
// Get high memory processes
getHighMemoryProcesses(str);

// Measure elapsed time since start time and report
SystemMonitorUtility::stopMeasurement(t_start, stat);
stat.addf("execution time", "%f ms", elapsed_ms_);
}

void ProcessMonitor::getTasksSummary(
Expand Down Expand Up @@ -310,5 +302,33 @@ void ProcessMonitor::setErrorContent(
}
}

void ProcessMonitor::onTimer()
{
is_top_error_ = false;

// Start to measure elapsed time
tier4_autoware_utils::StopWatch<std::chrono::milliseconds> stop_watch;
stop_watch.tic("execution_time");

bp::ipstream is_err;
bp::ipstream is_out;
std::ostringstream os;

// Get processes
bp::child c("top -bn1 -o %CPU -w 128", bp::std_out > is_out, bp::std_err > is_err);
c.wait();
if (c.exit_code() != 0) {
is_top_error_ = true;
is_err >> os.rdbuf();
top_output_ = os.str();
return;
}

is_out >> os.rdbuf();
top_output_ = os.str();

elapsed_ms_ = stop_watch.toc("execution_time");
}

#include <rclcpp_components/register_node_macro.hpp>
RCLCPP_COMPONENTS_REGISTER_NODE(ProcessMonitor)