blob: 2a3231ee1c8379f036840c4874a925ab5096c4a6 [file] [log] [blame]
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "vm_tools/concierge/termina_vm.h"
#include <arpa/inet.h>
#include <linux/capability.h>
#include <signal.h>
#include <sys/wait.h>
#include <unistd.h>
#include <utility>
#include <base/bind.h>
#include <base/files/file.h>
#include <base/files/file_util.h>
#include <base/files/scoped_file.h>
#include <base/guid.h>
#include <base/logging.h>
#include <base/memory/ptr_util.h>
#include <base/strings/string_number_conversions.h>
#include <base/strings/stringprintf.h>
#include <base/sys_info.h>
#include <base/time/time.h>
#include <google/protobuf/repeated_field.h>
#include <grpcpp/grpcpp.h>
#include "vm_tools/common/constants.h"
#include "vm_tools/concierge/tap_device_builder.h"
#include "vm_tools/concierge/vm_util.h"
using std::string;
namespace vm_tools {
namespace concierge {
namespace {
// Name of the control socket used for controlling crosvm.
constexpr char kCrosvmSocket[] = "crosvm.sock";
// Path to the logger(1) binary.
constexpr char kLoggerBin[] = "/usr/bin/logger";
// Path to the wayland socket.
constexpr char kWaylandSocket[] = "/run/chrome/wayland-0";
// How long to wait before timing out on shutdown RPCs.
constexpr int64_t kShutdownTimeoutSeconds = 30;
// How long to wait before timing out on StartTermina RPCs.
constexpr int64_t kStartTerminaTimeoutSeconds = 150;
// How long to wait before timing out on regular RPCs.
constexpr int64_t kDefaultTimeoutSeconds = 10;
// How long to wait before timing out on child process exits.
constexpr base::TimeDelta kChildExitTimeout = base::TimeDelta::FromSeconds(10);
// Offset in a subnet of the gateway/host.
constexpr size_t kHostAddressOffset = 0;
// Offset in a subnet of the client/guest.
constexpr size_t kGuestAddressOffset = 1;
// The CPU cgroup where all the Termina crosvm processes should belong to.
constexpr char kTerminaCpuCgroup[] = "/sys/fs/cgroup/cpu/vms/termina";
} // namespace
TerminaVm::TerminaVm(
arc_networkd::MacAddress mac_addr,
std::unique_ptr<arc_networkd::Subnet> subnet,
uint32_t vsock_cid,
std::unique_ptr<SeneschalServerProxy> seneschal_server_proxy,
base::FilePath runtime_dir,
std::string stateful_device,
VmFeatures features)
: mac_addr_(std::move(mac_addr)),
subnet_(std::move(subnet)),
vsock_cid_(vsock_cid),
seneschal_server_proxy_(std::move(seneschal_server_proxy)),
features_(features),
stateful_device_(stateful_device) {
CHECK(subnet_);
CHECK(base::DirectoryExists(runtime_dir));
// Take ownership of the runtime directory.
CHECK(runtime_dir_.Set(runtime_dir));
}
TerminaVm::~TerminaVm() {
Shutdown();
}
std::unique_ptr<TerminaVm> TerminaVm::Create(
base::FilePath kernel,
base::FilePath rootfs,
std::vector<TerminaVm::Disk> disks,
arc_networkd::MacAddress mac_addr,
std::unique_ptr<arc_networkd::Subnet> subnet,
uint32_t vsock_cid,
std::unique_ptr<SeneschalServerProxy> seneschal_server_proxy,
base::FilePath runtime_dir,
std::string stateful_device,
VmFeatures features) {
auto vm = base::WrapUnique(
new TerminaVm(std::move(mac_addr), std::move(subnet), vsock_cid,
std::move(seneschal_server_proxy), std::move(runtime_dir),
std::move(stateful_device), features));
if (!vm->Start(std::move(kernel), std::move(rootfs), std::move(disks))) {
vm.reset();
}
return vm;
}
std::string TerminaVm::GetVmSocketPath() const {
return runtime_dir_.GetPath().Append(kCrosvmSocket).value();
}
bool TerminaVm::Start(base::FilePath kernel,
base::FilePath rootfs,
std::vector<TerminaVm::Disk> disks) {
// Set up the tap device.
base::ScopedFD tap_fd =
BuildTapDevice(mac_addr_, GatewayAddress(), Netmask(), true /*vnet_hdr*/);
if (!tap_fd.is_valid()) {
LOG(ERROR) << "Unable to build and configure TAP device";
return false;
}
// Build up the process arguments.
// clang-format off
std::vector<string> args = {
kCrosvmBin, "run",
"--cpus", std::to_string(base::SysInfo::NumberOfProcessors()),
"--mem", GetVmMemoryMiB(),
"--root", rootfs.value(),
"--tap-fd", std::to_string(tap_fd.get()),
"--cid", std::to_string(vsock_cid_),
"--socket", GetVmSocketPath(),
"--wayland-sock", kWaylandSocket,
"--cras-audio",
"--params", "snd_intel8x0.inside_vm=1 snd_intel8x0.ac97_clock=48000",
};
// clang-format on
if (USE_CROSVM_WL_DMABUF)
args.emplace_back("--wayland-dmabuf");
if (features_.gpu)
args.emplace_back("--gpu");
if (features_.software_tpm)
args.emplace_back("--software-tpm");
// Add any extra disks.
for (const auto& disk : disks) {
if (disk.writable) {
args.emplace_back("--rwdisk");
} else {
args.emplace_back("--disk");
}
args.emplace_back(disk.path.value());
}
// Finally list the path to the kernel.
args.emplace_back(kernel.value());
// Put everything into the brillo::ProcessImpl.
for (string& arg : args) {
process_.AddArg(std::move(arg));
}
// Change the process group before exec so that crosvm sending SIGKILL to the
// whole process group doesn't kill us as well. The function also changes the
// cpu cgroup for Termina crosvm processes.
process_.SetPreExecCallback(base::Bind(
&SetUpCrosvmProcess, base::FilePath(kTerminaCpuCgroup).Append("tasks")));
// Redirect STDOUT to a pipe.
process_.RedirectUsingPipe(STDOUT_FILENO, false /* is_input */);
if (!process_.Start()) {
LOG(ERROR) << "Failed to start VM process";
return false;
}
// Setup kernel logger process.
// Setup logger arguments.
std::vector<string> logger_args = {
kLoggerBin,
// Host syslog deamon requires priority to be set.
"-p",
"auth.info",
"--skip-empty",
// Tag each to specify the VM number.
"--tag",
base::StringPrintf("VM(%u)", vsock_cid_),
};
for (string& arg : logger_args) {
logger_process_.AddArg(std::move(arg));
}
// Bind crosvm's output pipe to the logger's input pipe.
logger_process_.BindFd(process_.GetPipe(STDOUT_FILENO), STDIN_FILENO);
// If the Logger file fails to start, just leave a warning.
if (!logger_process_.Start()) {
LOG(ERROR) << "Failed to start the logger process for VM " << vsock_cid_;
}
// Create a stub for talking to the maitre'd instance inside the VM.
stub_ = std::make_unique<vm_tools::Maitred::Stub>(grpc::CreateChannel(
base::StringPrintf("vsock:%u:%u", vsock_cid_, vm_tools::kMaitredPort),
grpc::InsecureChannelCredentials()));
return true;
}
bool TerminaVm::Shutdown() {
// Do a sanity check here to make sure the process is still around. It may
// have crashed and we don't want to be waiting around for an RPC response
// that's never going to come. kill with a signal value of 0 is explicitly
// documented as a way to check for the existence of a process.
if (!CheckProcessExists(process_.pid())) {
// The process is already gone.
process_.Release();
return true;
}
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kShutdownTimeoutSeconds, GPR_TIMESPAN)));
vm_tools::EmptyMessage empty;
grpc::Status status = stub_->Shutdown(&ctx, empty, &empty);
// brillo::ProcessImpl doesn't provide a timed wait function and while the
// Shutdown RPC may have been successful we can't really trust crosvm to
// actually exit. This may result in an untimed wait() blocking indefinitely.
// Instead, do a timed wait here and only return success if the process
// _actually_ exited as reported by the kernel, which is really the only
// thing we can trust here.
if (status.ok() && WaitForChild(process_.pid(), kChildExitTimeout)) {
process_.Release();
return true;
}
LOG(WARNING) << "Shutdown RPC failed for VM " << vsock_cid_ << " with error "
<< "code " << status.error_code() << ": "
<< status.error_message();
// Try to shut it down via the crosvm socket.
RunCrosvmCommand("stop");
// We can't actually trust the exit codes that crosvm gives us so just see if
// it exited.
if (WaitForChild(process_.pid(), kChildExitTimeout)) {
process_.Release();
return true;
}
LOG(WARNING) << "Failed to stop VM " << vsock_cid_ << " via crosvm socket";
// Kill the process with SIGTERM.
if (process_.Kill(SIGTERM, kChildExitTimeout.InSeconds())) {
return true;
}
LOG(WARNING) << "Failed to kill VM " << vsock_cid_ << " with SIGTERM";
// Kill it with fire.
if (process_.Kill(SIGKILL, kChildExitTimeout.InSeconds())) {
return true;
}
LOG(ERROR) << "Failed to kill VM " << vsock_cid_ << " with SIGKILL";
return false;
}
bool TerminaVm::ConfigureNetwork(const std::vector<string>& nameservers,
const std::vector<string>& search_domains) {
LOG(INFO) << "Configuring network for VM " << vsock_cid_;
vm_tools::NetworkConfigRequest request;
vm_tools::EmptyMessage response;
vm_tools::IPv4Config* config = request.mutable_ipv4_config();
config->set_address(IPv4Address());
config->set_gateway(GatewayAddress());
config->set_netmask(Netmask());
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kDefaultTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->ConfigureNetwork(&ctx, request, &response);
if (!status.ok()) {
LOG(ERROR) << "Failed to configure network for VM " << vsock_cid_ << ": "
<< status.error_message();
return false;
}
// TODO(smbarber): check return value here once all VMs have SetResolvConfig.
// Ignore the return value here for now. If the guest VM doesn't yet
// implement the SetResolvConfig RPC, it's not a failure.
SetResolvConfig(nameservers, search_domains);
return true;
}
void TerminaVm::RunCrosvmCommand(string command) {
vm_tools::concierge::RunCrosvmCommand(std::move(command), GetVmSocketPath());
}
bool TerminaVm::Mount(string source,
string target,
string fstype,
uint64_t mountflags,
string options) {
LOG(INFO) << "Mounting " << source << " on " << target << " inside VM "
<< vsock_cid_;
vm_tools::MountRequest request;
vm_tools::MountResponse response;
request.mutable_source()->swap(source);
request.mutable_target()->swap(target);
request.mutable_fstype()->swap(fstype);
request.set_mountflags(mountflags);
request.mutable_options()->swap(options);
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kDefaultTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->Mount(&ctx, request, &response);
if (!status.ok() || response.error() != 0) {
LOG(ERROR) << "Failed to mount " << request.source() << " on "
<< request.target() << " inside VM " << vsock_cid_ << ": "
<< (status.ok() ? strerror(response.error())
: status.error_message());
return false;
}
return true;
}
bool TerminaVm::StartTermina(std::string lxd_subnet,
std::string stateful_device,
std::string* out_error) {
// We record the kernel version early to ensure that no container has
// been started and the VM can still be trusted.
RecordKernelVersionForEnterpriseReporting();
vm_tools::StartTerminaRequest request;
vm_tools::StartTerminaResponse response;
request.set_tremplin_ipv4_address(GatewayAddress());
request.mutable_lxd_ipv4_subnet()->swap(lxd_subnet);
request.mutable_stateful_device()->swap(stateful_device);
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kStartTerminaTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->StartTermina(&ctx, request, &response);
if (!status.ok()) {
LOG(ERROR) << "Failed to start Termina: " << status.error_message();
out_error->assign(status.error_message());
return false;
}
return true;
}
void TerminaVm::RecordKernelVersionForEnterpriseReporting() {
grpc::ClientContext ctx_get_kernel_version;
ctx_get_kernel_version.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kStartTerminaTimeoutSeconds, GPR_TIMESPAN)));
vm_tools::EmptyMessage empty;
vm_tools::GetKernelVersionResponse grpc_response;
grpc::Status get_kernel_version_status =
stub_->GetKernelVersion(&ctx_get_kernel_version, empty, &grpc_response);
if (!get_kernel_version_status.ok()) {
LOG(WARNING) << "Failed to retrieve kernel version for VM " << vsock_cid_
<< ": " << get_kernel_version_status.error_message();
} else {
kernel_version_ =
grpc_response.kernel_release() + " " + grpc_response.kernel_version();
}
}
bool TerminaVm::AttachUsbDevice(uint8_t bus,
uint8_t addr,
uint16_t vid,
uint16_t pid,
int fd,
UsbControlResponse* response) {
return vm_tools::concierge::AttachUsbDevice(GetVmSocketPath(), bus, addr, vid,
pid, fd, response);
}
bool TerminaVm::DetachUsbDevice(uint8_t port, UsbControlResponse* response) {
return vm_tools::concierge::DetachUsbDevice(GetVmSocketPath(), port,
response);
}
bool TerminaVm::ListUsbDevice(std::vector<UsbDevice>* device) {
return vm_tools::concierge::ListUsbDevice(GetVmSocketPath(), device);
}
void TerminaVm::HandleSuspendImminent() {
RunCrosvmCommand("suspend");
}
void TerminaVm::HandleSuspendDone() {
RunCrosvmCommand("resume");
}
bool TerminaVm::Mount9P(uint32_t port, string target) {
LOG(INFO) << "Mounting 9P file system from port " << port << " on " << target;
vm_tools::Mount9PRequest request;
vm_tools::MountResponse response;
request.set_port(port);
request.set_target(std::move(target));
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kDefaultTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->Mount9P(&ctx, request, &response);
if (!status.ok() || response.error() != 0) {
LOG(ERROR) << "Failed to mount 9P server on " << request.target()
<< " inside VM " << vsock_cid_ << ": "
<< (status.ok() ? strerror(response.error())
: status.error_message());
return false;
}
return true;
}
bool TerminaVm::SetResolvConfig(const std::vector<string>& nameservers,
const std::vector<string>& search_domains) {
LOG(INFO) << "Setting resolv config for VM " << vsock_cid_;
vm_tools::SetResolvConfigRequest request;
vm_tools::EmptyMessage response;
vm_tools::ResolvConfig* resolv_config = request.mutable_resolv_config();
google::protobuf::RepeatedPtrField<string> request_nameservers(
nameservers.begin(), nameservers.end());
resolv_config->mutable_nameservers()->Swap(&request_nameservers);
google::protobuf::RepeatedPtrField<string> request_search_domains(
search_domains.begin(), search_domains.end());
resolv_config->mutable_search_domains()->Swap(&request_search_domains);
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kDefaultTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->SetResolvConfig(&ctx, request, &response);
if (!status.ok()) {
LOG(ERROR) << "Failed to set resolv config for VM " << vsock_cid_ << ": "
<< status.error_message();
return false;
}
return true;
}
bool TerminaVm::SetTime(string* failure_reason) {
DCHECK(failure_reason);
base::Time now = base::Time::Now();
struct timeval current = now.ToTimeVal();
vm_tools::SetTimeRequest request;
vm_tools::EmptyMessage response;
google::protobuf::Timestamp* timestamp = request.mutable_time();
timestamp->set_seconds(current.tv_sec);
timestamp->set_nanos(current.tv_usec * 1000);
grpc::ClientContext ctx;
ctx.set_deadline(gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(kDefaultTimeoutSeconds, GPR_TIMESPAN)));
grpc::Status status = stub_->SetTime(&ctx, request, &response);
if (!status.ok()) {
LOG(ERROR) << "Failed to set guest time on VM " << vsock_cid_ << ":"
<< status.error_message();
*failure_reason = status.error_message();
return false;
}
return true;
}
bool TerminaVm::GetVmEnterpriseReportingInfo(
GetVmEnterpriseReportingInfoResponse* response) {
LOG(INFO) << "Get enterprise reporting info";
if (kernel_version_.empty()) {
response->set_success(false);
response->set_failure_reason(
"Kernel version could not be recorded at startup.");
return false;
}
response->set_success(true);
response->set_vm_kernel_version(kernel_version_);
return true;
}
// static
bool TerminaVm::SetVmCpuRestriction(CpuRestrictionState cpu_restriction_state) {
// TODO(sonnyrao): Adjust |cpu_shares|.
int cpu_shares = 1024;
switch (cpu_restriction_state) {
case CPU_RESTRICTION_FOREGROUND:
break;
case CPU_RESTRICTION_BACKGROUND:
cpu_shares = 64;
break;
default:
NOTREACHED();
}
return UpdateCpuShares(base::FilePath(kTerminaCpuCgroup), cpu_shares);
}
void TerminaVm::SetContainerSubnet(
std::unique_ptr<arc_networkd::Subnet> subnet) {
container_subnet_ = std::move(subnet);
}
uint32_t TerminaVm::GatewayAddress() const {
return subnet_->AddressAtOffset(kHostAddressOffset);
}
uint32_t TerminaVm::IPv4Address() const {
return subnet_->AddressAtOffset(kGuestAddressOffset);
}
uint32_t TerminaVm::Netmask() const {
return subnet_->Netmask();
}
uint32_t TerminaVm::ContainerNetmask() const {
if (container_subnet_)
return container_subnet_->Netmask();
return INADDR_ANY;
}
size_t TerminaVm::ContainerPrefixLength() const {
if (container_subnet_)
return container_subnet_->PrefixLength();
return 0;
}
uint32_t TerminaVm::ContainerSubnet() const {
if (container_subnet_)
return container_subnet_->AddressAtOffset(0);
return INADDR_ANY;
}
VmInterface::Info TerminaVm::GetInfo() {
VmInterface::Info info = {
.ipv4_address = IPv4Address(),
.pid = pid(),
.cid = cid(),
.seneschal_server_handle = seneschal_server_handle(),
.status = IsTremplinStarted() ? VmInterface::Status::RUNNING
: VmInterface::Status::STARTING,
};
return info;
}
void TerminaVm::set_kernel_version_for_testing(std::string kernel_version) {
kernel_version_ = kernel_version;
}
void TerminaVm::set_stub_for_testing(
std::unique_ptr<vm_tools::Maitred::Stub> stub) {
stub_ = std::move(stub);
}
std::unique_ptr<TerminaVm> TerminaVm::CreateForTesting(
arc_networkd::MacAddress mac_addr,
std::unique_ptr<arc_networkd::Subnet> subnet,
uint32_t vsock_cid,
base::FilePath runtime_dir,
std::string stateful_device,
std::string kernel_version,
std::unique_ptr<vm_tools::Maitred::Stub> stub) {
VmFeatures features{
.gpu = false,
.software_tpm = false,
};
auto vm = base::WrapUnique(new TerminaVm(
std::move(mac_addr), std::move(subnet), vsock_cid, nullptr,
std::move(runtime_dir), std::move(stateful_device), features));
vm->set_kernel_version_for_testing(kernel_version);
vm->set_stub_for_testing(std::move(stub));
return vm;
}
} // namespace concierge
} // namespace vm_tools