| // Copyright 2018 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "shill/network/connection_diagnostics.h" |
| |
| #include <memory> |
| #include <string> |
| #include <string_view> |
| #include <utility> |
| |
| #include <base/check.h> |
| #include <base/functional/bind.h> |
| #include <base/logging.h> |
| #include <base/strings/stringprintf.h> |
| #include <net-base/http_url.h> |
| |
| #include "shill/dns_client.h" |
| #include "shill/error.h" |
| #include "shill/event_dispatcher.h" |
| #include "shill/logging.h" |
| #include "shill/manager.h" |
| #include "shill/metrics.h" |
| #include "shill/network/icmp_session.h" |
| |
| namespace { |
| // These strings are dependent on ConnectionDiagnostics::Type. Any changes to |
| // this array should be synced with ConnectionDiagnostics::Type. |
| const char* const kEventNames[] = {"Portal detection", "Ping DNS servers", |
| "DNS resolution", "Ping (target web server)", |
| "Ping (gateway)"}; |
| // These strings are dependent on ConnectionDiagnostics::Phase. Any changes to |
| // this array should be synced with ConnectionDiagnostics::Phase. |
| const char* const kPhaseNames[] = {"Start", "End", "End (Content)", "End (DNS)", |
| "End (HTTP/CXN)"}; |
| // These strings are dependent on ConnectionDiagnostics::Result. Any changes to |
| // this array should be synced with ConnectionDiagnostics::Result. |
| const char* const kResultNames[] = {"Success", "Failure", "Timeout"}; |
| |
| } // namespace |
| |
| namespace shill { |
| |
| namespace Logging { |
| static auto kModuleLogScope = ScopeLogger::kWiFi; |
| } // namespace Logging |
| |
| const char ConnectionDiagnostics::kIssueIPCollision[] = |
| "IP collision detected. Another host on the local network has been " |
| "assigned the same IP address."; |
| const char ConnectionDiagnostics::kIssueRouting[] = "Routing problem detected."; |
| const char ConnectionDiagnostics::kIssueHTTP[] = |
| "Target hostname is pingable. Connectivity problems might be caused by a " |
| "firewall, a web proxy, or a captive portal"; |
| const char ConnectionDiagnostics::kIssueDNSServerMisconfig[] = |
| "DNS servers responding to DNS queries, but sending invalid responses. " |
| "DNS servers might be misconfigured."; |
| const char ConnectionDiagnostics::kIssueDNSServerNoResponse[] = |
| "At least one DNS server is pingable, but is not responding to DNS " |
| "requests. DNS server issue detected."; |
| const char ConnectionDiagnostics::kIssueNoDNSServersConfigured[] = |
| "No DNS servers have been configured for this connection -- either the " |
| "DHCP server or user configuration is invalid."; |
| const char ConnectionDiagnostics::kIssueDNSServersInvalid[] = |
| "All configured DNS server addresses are invalid."; |
| const char ConnectionDiagnostics::kIssueNone[] = |
| "No connection issue detected."; |
| const char ConnectionDiagnostics::kIssueGatewayUpstream[] = |
| "We can find a route to the target web server at a remote IP address, " |
| "and the local gateway is pingable. Gateway issue or upstream " |
| "connectivity problem detected."; |
| const char ConnectionDiagnostics::kIssueGatewayNotResponding[] = |
| "This gateway appears to be on the local network, but is not responding to " |
| "pings."; |
| const char ConnectionDiagnostics::kIssueServerNotResponding[] = |
| "This web server appears to be on the local network, but is not responding " |
| "to pings."; |
| const char ConnectionDiagnostics::kIssueGatewayArpFailed[] = |
| "No ARP entry for the gateway. Either the gateway does not exist on the " |
| "local network, or there are link layer issues."; |
| const char ConnectionDiagnostics::kIssueServerArpFailed[] = |
| "No ARP entry for the web server. Either the web server does not exist on " |
| "the local network, or there are link layer issues."; |
| const char ConnectionDiagnostics::kIssueInternalError[] = |
| "The connection diagnostics encountered an internal failure."; |
| const char ConnectionDiagnostics::kIssueGatewayNoNeighborEntry[] = |
| "No neighbor table entry for the gateway. Either the gateway does not " |
| "exist on the local network, or there are link layer issues."; |
| const char ConnectionDiagnostics::kIssueServerNoNeighborEntry[] = |
| "No neighbor table entry for the web server. Either the web server does " |
| "not exist on the local network, or there are link layer issues."; |
| const char ConnectionDiagnostics::kIssueGatewayNeighborEntryNotConnected[] = |
| "Neighbor table entry for the gateway is not in a connected state. Either " |
| "the web server does not exist on the local network, or there are link " |
| "layer issues."; |
| const char ConnectionDiagnostics::kIssueServerNeighborEntryNotConnected[] = |
| "Neighbor table entry for the web server is not in a connected state. " |
| "Either the web server does not exist on the local network, or there are " |
| "link layer issues."; |
| const int ConnectionDiagnostics::kMaxDNSRetries = 2; |
| |
| ConnectionDiagnostics::ConnectionDiagnostics( |
| std::string_view iface_name, |
| int iface_index, |
| const net_base::IPAddress& ip_address, |
| const net_base::IPAddress& gateway, |
| const std::vector<net_base::IPAddress>& dns_list, |
| EventDispatcher* dispatcher, |
| Metrics* metrics, |
| ResultCallback result_callback) |
| : dispatcher_(dispatcher), |
| metrics_(metrics), |
| iface_name_(iface_name), |
| iface_index_(iface_index), |
| ip_address_(ip_address), |
| gateway_(gateway), |
| dns_list_(dns_list), |
| icmp_session_(new IcmpSession(dispatcher_)), |
| num_dns_attempts_(0), |
| running_(false), |
| result_callback_(std::move(result_callback)), |
| weak_ptr_factory_(this) { |
| dns_client_ = std::make_unique<DnsClient>( |
| ip_address_.GetFamily(), iface_name, DnsClient::kDnsTimeout, dispatcher_, |
| base::BindRepeating(&ConnectionDiagnostics::OnDNSResolutionComplete, |
| weak_ptr_factory_.GetWeakPtr())); |
| for (size_t i = 0; i < dns_list_.size(); i++) { |
| id_to_pending_dns_server_icmp_session_[i] = |
| std::make_unique<IcmpSession>(dispatcher_); |
| } |
| } |
| |
| ConnectionDiagnostics::~ConnectionDiagnostics() { |
| Stop(); |
| } |
| |
| bool ConnectionDiagnostics::Start(const net_base::HttpUrl& url) { |
| if (running()) { |
| LOG(ERROR) << iface_name_ << ": Diagnostics already started"; |
| return false; |
| } |
| |
| LOG(INFO) << iface_name_ << ": Starting diagnostics for " << url.ToString(); |
| target_url_ = url; |
| running_ = true; |
| // Ping DNS servers to make sure at least one is reachable before resolving |
| // the hostname of |target_url_|; |
| dispatcher_->PostTask(FROM_HERE, |
| base::BindOnce(&ConnectionDiagnostics::PingDNSServers, |
| weak_ptr_factory_.GetWeakPtr())); |
| return true; |
| } |
| |
| void ConnectionDiagnostics::Stop() { |
| running_ = false; |
| num_dns_attempts_ = 0; |
| diagnostic_events_.clear(); |
| dns_client_.reset(); |
| icmp_session_->Stop(); |
| id_to_pending_dns_server_icmp_session_.clear(); |
| target_url_ = std::nullopt; |
| } |
| |
| // static |
| std::string ConnectionDiagnostics::EventToString(const Event& event) { |
| auto message = base::StringPrintf( |
| "Event: %-26sPhase: %-17sResult: %-10s", kEventNames[event.type], |
| kPhaseNames[event.phase], kResultNames[event.result]); |
| if (!event.message.empty()) { |
| message.append("Msg: " + event.message); |
| } |
| return message; |
| } |
| |
| void ConnectionDiagnostics::AddEvent(Type type, Phase phase, Result result) { |
| AddEventWithMessage(type, phase, result, ""); |
| } |
| |
| void ConnectionDiagnostics::AddEventWithMessage(Type type, |
| Phase phase, |
| Result result, |
| const std::string& message) { |
| diagnostic_events_.push_back(Event(type, phase, result, message)); |
| } |
| |
| void ConnectionDiagnostics::ReportResultAndStop(const std::string& issue) { |
| metrics_->NotifyConnectionDiagnosticsIssue(issue); |
| for (size_t i = 0; i < diagnostic_events_.size(); ++i) { |
| LOG(INFO) << iface_name_ << ": Diagnostics event #" << i << ": " |
| << EventToString(diagnostic_events_[i]); |
| } |
| LOG(INFO) << iface_name_ << ": Connection diagnostics result: " << issue; |
| if (!result_callback_.is_null()) { |
| std::move(result_callback_).Run(issue, diagnostic_events_); |
| } |
| Stop(); |
| } |
| |
| void ConnectionDiagnostics::ResolveTargetServerIPAddress( |
| const std::vector<std::string>& dns_list) { |
| Error e; |
| if (!dns_client_->Start(dns_list, target_url_->host(), &e)) { |
| LOG(ERROR) << iface_name_ << ": could not start DNS on -- " << e.message(); |
| AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultFailure, |
| e.message()); |
| ReportResultAndStop(kIssueInternalError); |
| return; |
| } |
| |
| AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultSuccess, |
| base::StringPrintf("Attempt #%d", num_dns_attempts_)); |
| SLOG(2) << __func__ << ": looking up " << target_url_->host() << " (attempt " |
| << num_dns_attempts_ << ")"; |
| ++num_dns_attempts_; |
| } |
| |
| void ConnectionDiagnostics::PingDNSServers() { |
| if (dns_list_.empty()) { |
| LOG(ERROR) << iface_name_ << ": no DNS servers for network connection on " |
| << iface_name_; |
| AddEventWithMessage(kTypePingDNSServers, kPhaseStart, kResultFailure, |
| "No DNS servers for this connection"); |
| ReportResultAndStop(kIssueNoDNSServersConfigured); |
| return; |
| } |
| |
| pingable_dns_servers_.clear(); |
| size_t num_invalid_dns_server_addr = 0; |
| size_t num_failed_icmp_session_start = 0; |
| for (size_t i = 0; i < dns_list_.size(); ++i) { |
| // If we encounter any errors starting ping for any DNS server, carry on |
| // attempting to ping the other DNS servers rather than failing. We only |
| // need to successfully ping a single DNS server to decide whether or not |
| // DNS servers can be reached. |
| const auto& dns_server_ip_addr = dns_list_[i]; |
| auto session_iter = id_to_pending_dns_server_icmp_session_.find(i); |
| if (session_iter == id_to_pending_dns_server_icmp_session_.end()) |
| continue; |
| |
| if (!session_iter->second->Start( |
| dns_server_ip_addr, iface_index_, iface_name_, |
| base::BindOnce(&ConnectionDiagnostics::OnPingDNSServerComplete, |
| weak_ptr_factory_.GetWeakPtr(), i))) { |
| LOG(ERROR) << iface_name_ << "Failed to initiate ping for DNS server at " |
| << dns_server_ip_addr; |
| ++num_failed_icmp_session_start; |
| id_to_pending_dns_server_icmp_session_.erase(i); |
| continue; |
| } |
| |
| SLOG(2) << __func__ << ": pinging DNS server at " << dns_server_ip_addr; |
| } |
| |
| if (id_to_pending_dns_server_icmp_session_.empty()) { |
| AddEventWithMessage( |
| kTypePingDNSServers, kPhaseStart, kResultFailure, |
| "Could not start ping for any of the given DNS servers"); |
| if (num_invalid_dns_server_addr == dns_list_.size()) { |
| ReportResultAndStop(kIssueDNSServersInvalid); |
| } else if (num_failed_icmp_session_start == dns_list_.size()) { |
| ReportResultAndStop(kIssueInternalError); |
| } |
| } else { |
| AddEvent(kTypePingDNSServers, kPhaseStart, kResultSuccess); |
| } |
| } |
| |
| void ConnectionDiagnostics::PingHost(const net_base::IPAddress& address) { |
| SLOG(2) << __func__; |
| |
| const Type event_type = |
| (address == gateway_) ? kTypePingGateway : kTypePingTargetServer; |
| if (!icmp_session_->Start( |
| address, iface_index_, iface_name_, |
| base::BindOnce(&ConnectionDiagnostics::OnPingHostComplete, |
| weak_ptr_factory_.GetWeakPtr(), event_type, |
| address))) { |
| LOG(ERROR) << iface_name_ << ": failed to start ICMP session with " |
| << address.ToString(); |
| AddEventWithMessage( |
| event_type, kPhaseStart, kResultFailure, |
| "Failed to start ICMP session with " + address.ToString()); |
| ReportResultAndStop(kIssueInternalError); |
| return; |
| } |
| |
| AddEventWithMessage(event_type, kPhaseStart, kResultSuccess, |
| "Pinging " + address.ToString()); |
| } |
| |
| void ConnectionDiagnostics::OnPingDNSServerComplete( |
| int dns_server_index, const std::vector<base::TimeDelta>& result) { |
| SLOG(2) << __func__ << "(DNS server index " << dns_server_index << ")"; |
| |
| if (!id_to_pending_dns_server_icmp_session_.erase(dns_server_index)) { |
| // This should not happen, since we expect exactly one callback for each |
| // IcmpSession started with a unique |dns_server_index| value in |
| // ConnectionDiagnostics::PingDNSServers. However, if this does happen for |
| // any reason, |id_to_pending_dns_server_icmp_session_| might never become |
| // empty, and we might never move to the next step after pinging DNS |
| // servers. Stop diagnostics immediately to prevent this from happening. |
| LOG(ERROR) << iface_name_ |
| << ": no matching pending DNS server ICMP session found"; |
| ReportResultAndStop(kIssueInternalError); |
| return; |
| } |
| |
| if (IcmpSession::AnyRepliesReceived(result)) { |
| pingable_dns_servers_.push_back(dns_list_[dns_server_index].ToString()); |
| } |
| if (!id_to_pending_dns_server_icmp_session_.empty()) { |
| SLOG(2) << __func__ << ": not yet finished pinging all DNS servers"; |
| return; |
| } |
| |
| if (pingable_dns_servers_.empty()) { |
| AddEventWithMessage( |
| kTypePingDNSServers, kPhaseEnd, kResultFailure, |
| "No DNS servers responded to pings. Pinging the gateway at " + |
| gateway_.ToString()); |
| // If no DNS servers can be pinged, try to ping the gateway. |
| dispatcher_->PostTask( |
| FROM_HERE, base::BindOnce(&ConnectionDiagnostics::PingHost, |
| weak_ptr_factory_.GetWeakPtr(), gateway_)); |
| return; |
| } |
| |
| if (pingable_dns_servers_.size() != dns_list_.size()) { |
| AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess, |
| "Pinged some, but not all, DNS servers successfully"); |
| } else { |
| AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess, |
| "Pinged all DNS servers successfully"); |
| } |
| |
| if (num_dns_attempts_ < kMaxDNSRetries) { |
| dispatcher_->PostTask( |
| FROM_HERE, |
| base::BindOnce(&ConnectionDiagnostics::ResolveTargetServerIPAddress, |
| weak_ptr_factory_.GetWeakPtr(), pingable_dns_servers_)); |
| } else { |
| SLOG(2) << __func__ << ": max DNS resolution attempts reached"; |
| ReportResultAndStop(kIssueDNSServerNoResponse); |
| } |
| } |
| |
| void ConnectionDiagnostics::OnDNSResolutionComplete( |
| const base::expected<net_base::IPAddress, Error>& address) { |
| SLOG(2) << __func__; |
| |
| if (address.has_value()) { |
| AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseEnd, kResultSuccess, |
| "Target address is " + address->ToString()); |
| dispatcher_->PostTask( |
| FROM_HERE, base::BindOnce(&ConnectionDiagnostics::PingHost, |
| weak_ptr_factory_.GetWeakPtr(), *address)); |
| } else if (address.error().type() == Error::kOperationTimeout) { |
| AddEventWithMessage( |
| kTypeResolveTargetServerIP, kPhaseEnd, kResultTimeout, |
| "DNS resolution timed out: " + address.error().message()); |
| dispatcher_->PostTask(FROM_HERE, |
| base::BindOnce(&ConnectionDiagnostics::PingDNSServers, |
| weak_ptr_factory_.GetWeakPtr())); |
| } else { |
| AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseEnd, kResultFailure, |
| "DNS resolution failed: " + address.error().message()); |
| ReportResultAndStop(kIssueDNSServerMisconfig); |
| } |
| } |
| |
| void ConnectionDiagnostics::OnPingHostComplete( |
| Type ping_event_type, |
| const net_base::IPAddress& address_pinged, |
| const std::vector<base::TimeDelta>& result) { |
| SLOG(2) << __func__; |
| |
| auto message = base::StringPrintf("Destination: %s, Latencies: ", |
| address_pinged.ToString().c_str()); |
| for (const auto& latency : result) { |
| if (latency.is_zero()) { |
| message.append("NA "); |
| } else { |
| message.append(base::StringPrintf("%4.2fms ", latency.InMillisecondsF())); |
| } |
| } |
| |
| Result result_type = |
| IcmpSession::AnyRepliesReceived(result) ? kResultSuccess : kResultFailure; |
| if (IcmpSession::IsPacketLossPercentageGreaterThan(result, 50)) { |
| LOG(WARNING) << iface_name_ << ": high packet loss when pinging " |
| << address_pinged.ToString(); |
| } |
| AddEventWithMessage(ping_event_type, kPhaseEnd, result_type, message); |
| if (result_type == kResultSuccess) { |
| // If pinging the target web server succeeded, we have found a HTTP issue or |
| // broken portal. Otherwise, if pinging the gateway succeeded, we have found |
| // an upstream connectivity problem or gateway issue. |
| ReportResultAndStop(ping_event_type == kTypePingGateway |
| ? kIssueGatewayUpstream |
| : kIssueHTTP); |
| } else if (result_type == kResultFailure && |
| ping_event_type == kTypePingTargetServer) { |
| // If pinging the target web server fails, try pinging the gateway. |
| dispatcher_->PostTask( |
| FROM_HERE, base::BindOnce(&ConnectionDiagnostics::PingHost, |
| weak_ptr_factory_.GetWeakPtr(), gateway_)); |
| } else if (result_type == kResultFailure && |
| ping_event_type == kTypePingGateway) { |
| ReportResultAndStop(kIssueGatewayUpstream); |
| } else { |
| LOG(WARNING) << iface_name_ << ": " << __func__ |
| << " received unexpected event type " << ping_event_type |
| << " while pinging " << address_pinged.ToString(); |
| ReportResultAndStop(kIssueInternalError); |
| } |
| } |
| |
| bool ConnectionDiagnostics::DoesPreviousEventMatch(Type type, |
| Phase phase, |
| Result result, |
| size_t num_events_ago) { |
| int event_index = diagnostic_events_.size() - 1 - num_events_ago; |
| if (event_index < 0) { |
| LOG(ERROR) << iface_name_ << ": requested event " << num_events_ago |
| << " before the last event, but we only have " |
| << diagnostic_events_.size() << " logged"; |
| return false; |
| } |
| |
| return (diagnostic_events_[event_index].type == type && |
| diagnostic_events_[event_index].phase == phase && |
| diagnostic_events_[event_index].result == result); |
| } |
| |
| std::unique_ptr<ConnectionDiagnostics> ConnectionDiagnosticsFactory::Create( |
| std::string_view iface_name, |
| int iface_index, |
| const net_base::IPAddress& ip_address, |
| const net_base::IPAddress& gateway, |
| const std::vector<net_base::IPAddress>& dns_list, |
| EventDispatcher* dispatcher, |
| Metrics* metrics, |
| ConnectionDiagnostics::ResultCallback result_callback) { |
| return std::make_unique<ConnectionDiagnostics>( |
| iface_name, iface_index, ip_address, gateway, dns_list, dispatcher, |
| metrics, std::move(result_callback)); |
| } |
| |
| } // namespace shill |