| // Copyright 2024 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "heartd/daemon/heartbeat_tracker.h" |
| |
| #include <algorithm> |
| #include <utility> |
| |
| #include <base/time/time.h> |
| #include <mojo/public/cpp/bindings/pending_receiver.h> |
| |
| #include "heartd/daemon/utils/mojo_output.h" |
| #include "heartd/mojom/heartd.mojom.h" |
| |
| namespace heartd { |
| |
| namespace { |
| |
| namespace mojom = ::ash::heartd::mojom; |
| |
| } // namespace |
| |
| HeartbeatTracker::HeartbeatTracker( |
| mojom::ServiceName name, mojo::PendingReceiver<mojom::Pacemaker> receiver) |
| : name_(name), |
| receiver_(this, std::move(receiver)), |
| last_dryrun_response_(mojom::HeartbeatResponse::kSuccess) { |
| receiver_.set_disconnect_handler(base::BindOnce( |
| &HeartbeatTracker::OnPacemakerDisconnect, base::Unretained(this))); |
| last_touch_time_ = base::Time().Now(); |
| } |
| |
| HeartbeatTracker::~HeartbeatTracker() = default; |
| |
| void HeartbeatTracker::SendHeartbeat(SendHeartbeatCallback callback) { |
| last_touch_time_ = base::Time().Now(); |
| std::move(callback).Run(last_dryrun_response_); |
| } |
| |
| void HeartbeatTracker::StopMonitor(StopMonitorCallback callback) { |
| LOG(INFO) << "Stop monitoring heartbeat for service: " << ToStr(name_); |
| stop_monitor_ = true; |
| std::move(callback).Run(); |
| } |
| |
| bool HeartbeatTracker::IsPacemakerBound() { |
| return receiver_.is_bound(); |
| } |
| |
| bool HeartbeatTracker::IsStopMonitor() { |
| return stop_monitor_; |
| } |
| |
| void HeartbeatTracker::RebindPacemaker( |
| mojo::PendingReceiver<mojom::Pacemaker> receiver) { |
| CHECK(!IsPacemakerBound()) |
| << "Failed to rebind pacemaker for service: " << ToStr(name_); |
| stop_monitor_ = false; |
| receiver_.Bind(std::move(receiver)); |
| receiver_.set_disconnect_handler(base::BindOnce( |
| &HeartbeatTracker::OnPacemakerDisconnect, base::Unretained(this))); |
| } |
| |
| void HeartbeatTracker::OnPacemakerDisconnect() { |
| // We don't need to increase the |failure_count_| here because once the |
| // pacemaker is disconnected, |last_touch_time_| won't change anymore so |
| // |failure_count_| will be increased in |VerifyTimeGap| periodically. |
| receiver_.reset(); |
| } |
| |
| void HeartbeatTracker::SetupArgument( |
| mojom::HeartbeatServiceArgumentPtr argument) { |
| base::TimeDelta threshold = |
| base::Seconds(argument->verification_window_seconds); |
| verification_window_ = std::max(verification_window_, threshold); |
| actions_ = std::move(argument->actions); |
| } |
| |
| uint8_t HeartbeatTracker::GetFailureCount() { |
| return failure_count_; |
| } |
| |
| base::TimeDelta HeartbeatTracker::GetVerificationWindow() { |
| return verification_window_; |
| } |
| |
| void HeartbeatTracker::SetLastDryRunResponse( |
| mojom::HeartbeatResponse response) { |
| last_dryrun_response_ = response; |
| } |
| |
| bool HeartbeatTracker::VerifyTimeGap(const base::Time& current_time) { |
| auto gap = current_time - last_touch_time_; |
| // The `verification_window_` is always larger than the heartbeat frequency, |
| // so it's likely that we think client is alive while the mojo connection has |
| // dropped. It's not a big problem because the `failure_count_` will always |
| // increase in later verification. However, checking the mojo connection helps |
| // to catch the issue earlier, it's a nice to have. |
| if (gap > verification_window_ || !IsPacemakerBound()) { |
| ++failure_count_; |
| LOG(INFO) << "Service [" << ToStr(name_) << "] failure count increase: " |
| << static_cast<int>(failure_count_); |
| return false; |
| } |
| |
| failure_count_ = 0; |
| return true; |
| } |
| |
| std::vector<mojom::ActionType> HeartbeatTracker::GetActions() { |
| std::vector<mojom::ActionType> result; |
| for (const auto& action : actions_) { |
| result.push_back(action->action); |
| } |
| return result; |
| } |
| |
| std::vector<mojom::ActionType> HeartbeatTracker::GetFailureCountActions() { |
| std::vector<mojom::ActionType> result; |
| for (const auto& action : actions_) { |
| if (failure_count_ == action->failure_count) { |
| result.push_back(action->action); |
| } else if (failure_count_ > action->failure_count && |
| (action->action == mojom::ActionType::kNormalReboot || |
| action->action == mojom::ActionType::kForceReboot)) { |
| // It's possible that the reboot action is skipped due to the threshold |
| // setting. So even if the failure count is not exactly the same as the |
| // configuration, we should still report the action if it's reboot action. |
| result.push_back(action->action); |
| } |
| } |
| |
| return result; |
| } |
| |
| } // namespace heartd |