blob: 2232250416640e7505fa5a2b2efa821894dc939b [file] [log] [blame] [edit]
// Copyright 2019 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "vm_tools/concierge/vm_util.h"
#include <errno.h>
#include <fcntl.h>
#include <libudev.h>
#include <signal.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include <sys/signalfd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <algorithm>
#include <csignal>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include <absl/strings/str_split.h>
#include <base/base64.h>
#include <base/containers/fixed_flat_map.h>
#include <base/files/file_path.h>
#include <base/files/file_util.h>
#include <base/files/scoped_file.h>
#include <base/format_macros.h>
#include <base/json/json_reader.h>
#include <base/logging.h>
#include <base/memory/ptr_util.h>
#include <base/posix/eintr_wrapper.h>
#include <base/strings/safe_sprintf.h>
#include <base/strings/strcat.h>
#include <base/strings/string_number_conversions.h>
#include <base/strings/string_util.h>
#include <base/strings/stringprintf.h>
#include <base/system/sys_info.h>
#include <brillo/files/file_util.h>
#include <brillo/process/process.h>
#include <chromeos-config/libcros_config/cros_config.h>
#include <metrics/structured_events.h>
#include <vboot/crossystem.h>
#include <vm_applications/apps.pb.h>
#include <vm_concierge/concierge_service.pb.h>
#include "vm_tools/concierge/crosvm_control.h"
namespace vm_tools::concierge {
const char kCrosvmBin[] = "/usr/bin/crosvm";
namespace {
constexpr char kFontsSharedDir[] = "/usr/share/fonts";
constexpr char kFontsSharedDirTag[] = "fonts";
// The maximum of CPU capacity is defined in include/linux/sched.h as
// SCHED_CAPACITY_SCALE. That is "1 << 10".
constexpr int kMaxCapacity = 1024;
constexpr char kSchedulerTunePath[] = "/scheduler-tune";
constexpr char kBoostTopAppProperty[] = "boost-top-app";
constexpr char kBoostArcVmProperty[] = "boost-arcvm";
// Path to cpu information directories
constexpr char kCpuInfosPath[] = "/sys/devices/system/cpu/";
} // namespace
namespace internal {
std::string GetDevConfPath(apps::VmType type) {
return base::StrCat({
"/usr/local/vms/etc/",
base::ToLowerASCII(apps::VmType_Name(type)),
"_dev.conf",
});
}
int64_t GetVmMemoryMiBInternal(int64_t sys_memory_mb, bool is_32bit) {
int64_t vm_memory_mb;
if (sys_memory_mb >= 4096) {
// On devices with >=4GB RAM, reserve 1GB for other processes.
vm_memory_mb = sys_memory_mb - kHostReservedNumMiB;
} else {
vm_memory_mb = sys_memory_mb / 4 * 3;
}
// Limit guest memory size to avoid running out of host process address space.
//
// A 32-bit process has 4GB address space, and some parts are not usable for
// various reasons including address space layout randomization (ASLR).
// In 32-bit crosvm address space, only ~3370MB is usable:
// - 256MB is not usable because of executable load bias ASLR.
// - 4MB is used for crosvm executable.
// - 32MB it not usable because of heap ASLR.
// - 16MB is used for mapped shared libraries.
// - 256MB is not usable because of mmap base address ASLR.
// - 132MB is used for gaps in the memory layout.
// - 30MB is used for other allocations.
//
// 3328 is chosen because it's a rounded number (i.e. 3328 % 256 == 0).
// TODO(hashimoto): Remove this once crosvm becomes 64-bit on ARM.
static constexpr int64_t k32bitVmMemoryMaxMb = 3328;
if (is_32bit) {
vm_memory_mb = std::min(vm_memory_mb, k32bitVmMemoryMaxMb);
}
return vm_memory_mb;
}
} // namespace internal
int64_t GetVmMemoryMiB() {
return internal::GetVmMemoryMiBInternal(
base::SysInfo::AmountOfPhysicalMemoryMB(), sizeof(uintptr_t) == 4);
}
std::optional<int32_t> ReadFileToInt32(const base::FilePath& filename) {
std::string str;
int int_val;
if (base::ReadFileToString(filename, &str) &&
base::StringToInt(
base::TrimWhitespaceASCII(str, base::TrimPositions::TRIM_TRAILING),
&int_val)) {
return std::optional<int32_t>(int_val);
}
return std::nullopt;
}
std::optional<int32_t> GetCpuPackageId(int32_t cpu,
const base::FilePath& cpu_info_path) {
base::FilePath topology_path(
base::StringPrintf("cpu%d/topology/physical_package_id", cpu));
topology_path = cpu_info_path.Append(topology_path);
return ReadFileToInt32(topology_path);
}
std::optional<int32_t> GetCpuCapacity(int32_t cpu,
const base::FilePath& cpu_info_path) {
base::FilePath cpu_capacity_path(
base::StringPrintf("cpu%d/cpu_capacity", cpu));
cpu_capacity_path = cpu_info_path.Append(cpu_capacity_path);
return ReadFileToInt32(cpu_capacity_path);
}
std::optional<int32_t> GetCpuMaxFrequency(int32_t cpu) {
base::FilePath cpu_max_freq_path(base::StringPrintf(
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu));
return ReadFileToInt32(cpu_max_freq_path);
}
std::optional<std::string> GetCpuAffinityFromClusters(
const std::vector<std::vector<std::string>>& cpu_clusters,
const std::map<int32_t, std::vector<std::string>>& cpu_capacity_groups) {
if (cpu_clusters.size() > 1) {
// If more than one CPU cluster exists, generate CPU affinity groups based
// on clusters. Each CPU from a given cluster will be pinned to the full
// set of cores of that cluster, allowing some scheduling flexibility
// while still ensuring vCPUs can only run on physical cores from the same
// package.
std::vector<std::string> cpu_affinities;
for (const auto& cluster : cpu_clusters) {
auto cpu_list = base::JoinString(cluster, ",");
for (const auto& cpu : cluster) {
cpu_affinities.push_back(
base::StringPrintf("%s=%s", cpu.c_str(), cpu_list.c_str()));
}
}
return base::JoinString(cpu_affinities, ":");
} else if (cpu_capacity_groups.size() > 1) {
// If only one cluster exists, group CPUs by capacity if there are at least
// two distinct CPU capacity groups.
std::vector<std::string> cpu_affinities;
for (const auto& group : cpu_capacity_groups) {
auto cpu_list = base::JoinString(group.second, ",");
for (const auto& cpu : group.second) {
cpu_affinities.push_back(
base::StringPrintf("%s=%s", cpu.c_str(), cpu_list.c_str()));
}
}
return base::JoinString(cpu_affinities, ":");
} else {
return std::nullopt;
}
}
bool SetUpCrosvmProcess(const base::FilePath& cpu_cgroup) {
// Note: This function is meant to be called after forking a process for
// crosvm but before execve(). Since Concierge is multi-threaded, this
// function should not call any functions that are not async signal safe
// (see man signal-safety). Especially, don't call malloc/new or any functions
// or constructors that may allocate heap memory. Calling malloc/new may
// result in a dead-lock trying to lock a mutex that has already been locked
// by one of the parent's threads.
// Set up CPU cgroup. Note that FilePath::value() returns a const reference
// to std::string without allocating a new object. c_str() doesn't do any copy
// as long as we use C++11 or later.
const int fd =
HANDLE_EINTR(open(cpu_cgroup.value().c_str(), O_WRONLY | O_CLOEXEC));
if (fd < 0) {
// TODO(yusukes): Do logging here in an async safe way.
return false;
}
char pid_str[32];
const size_t len = base::strings::SafeSPrintf(pid_str, "%d", getpid());
const ssize_t written = HANDLE_EINTR(write(fd, pid_str, len));
close(fd);
if (written != len) {
// TODO(yusukes): Do logging here in an async safe way.
return false;
}
// Set up process group ID.
return SetPgid();
}
bool SetPgid() {
// Note: This should only call async-signal-safe functions. Don't call
// malloc/new. See SetUpCrosvmProcess() for more details.
if (setpgid(0, 0) != 0) {
// TODO(yusukes): Do logging here in an async safe way.
return false;
}
return true;
}
bool WaitForChild(pid_t child, base::TimeDelta timeout) {
const base::Time deadline = base::Time::Now() + timeout;
while (true) {
pid_t ret = waitpid(child, nullptr, WNOHANG);
if (ret == child || (ret < 0 && errno == ECHILD)) {
// Either the child exited or it doesn't exist anymore.
return true;
}
// ret == 0 means that the child is still alive
if (ret < 0) {
PLOG(ERROR) << "Failed to wait for child process";
return false;
}
base::Time now = base::Time::Now();
if (deadline <= now) {
// Timed out.
return false;
}
usleep(100);
}
}
bool CheckProcessExists(pid_t pid) {
if (pid == 0)
return false;
// Try to reap child process in case it just exited.
waitpid(pid, nullptr, WNOHANG);
// kill() with a signal value of 0 is explicitly documented as a way to
// check for the existence of a process.
return kill(pid, 0) >= 0 || errno != ESRCH;
}
std::optional<BalloonStats> GetBalloonStats(
const std::string& socket_path, std::optional<base::TimeDelta> timeout) {
BalloonStats stats;
if (!CrosvmControl::Get()->BalloonStats(
socket_path, timeout, &stats.stats_ffi, &stats.balloon_actual)) {
LOG(ERROR) << "Failed to retrieve balloon stats";
return std::nullopt;
}
return stats;
}
std::optional<BalloonWorkingSet> GetBalloonWorkingSet(
const std::string& socket_path) {
BalloonWorkingSet ws;
if (!CrosvmControl::Get()->BalloonWorkingSet(socket_path, &ws.working_set_ffi,
&ws.balloon_actual)) {
LOG(ERROR) << "Failed to retrieve balloon working set";
return std::nullopt;
}
return ws;
}
udev_device* FindUdevDevice(const std::string& path) {
udev* udev = udev_new();
udev_enumerate* enumerate = udev_enumerate_new(udev);
udev_enumerate_scan_devices(enumerate);
struct udev_list_entry* entry = nullptr;
udev_list_entry_foreach(entry, udev_enumerate_get_list_entry(enumerate)) {
const char* syspath = udev_list_entry_get_name(entry);
udev_device* device = udev_device_new_from_syspath(udev, syspath);
const char* devnode = udev_device_get_devnode(device);
if (devnode && !strcmp(devnode, path.c_str()))
return device;
}
return nullptr;
}
bool AttachNetDevice(const std::string& socket_path,
const std::string& tap_name,
uint8_t* out_bus) {
return CrosvmControl::Get()->NetAttach(socket_path, tap_name, out_bus);
}
bool DetachNetDevice(const std::string& socket_path, uint8_t bus) {
return CrosvmControl::Get()->NetDetach(socket_path, bus);
}
std::string GetStringUdevAttr(udev_device* device, std::string sysattr) {
const char* attr_char =
udev_device_get_sysattr_value(device, sysattr.c_str());
if (!attr_char) {
return {};
}
std::string attr(attr_char);
base::TrimWhitespaceASCII(attr, base::TRIM_ALL, &attr);
return attr;
}
int GetIntUdevAttr(udev_device* device, std::string sysattr) {
std::string attr = GetStringUdevAttr(device, sysattr);
if (attr.empty()) {
return -1;
}
int attr_int = 0;
base::HexStringToInt(attr, &attr_int);
return attr_int;
}
bool LogGuestUsbStructuredMetrics(int fd,
bool attach_success,
apps::VmType vm_type) {
if (vm_type != apps::VmType::TERMINA) {
// Only Crostini has been approved to collect metrics for now.
return false;
}
std::string device_path = "/proc/self/fd/" + std::to_string(fd);
char buf[50];
int namelen = readlink(device_path.c_str(), buf, sizeof(buf));
if (namelen < 0) {
LOG(ERROR) << "Unable to determine device path from proc fd, will not log "
"USB metrics";
return false;
}
std::string dev_path(buf, namelen);
udev_device* device = FindUdevDevice(dev_path);
if (!device) {
LOG(ERROR) << "Unable to find udev device for " << dev_path;
return false;
}
std::string vendor_name = GetStringUdevAttr(device, "manufacturer");
std::string product_name = GetStringUdevAttr(device, "product");
int vendor_id = GetIntUdevAttr(device, "idVendor");
int product_id = GetIntUdevAttr(device, "idProduct");
int device_class = GetIntUdevAttr(device, "bDeviceClass");
// Limit string length to prevent poorly behaved device from creating huge
// metrics packet.
int string_len_limit = 200;
vendor_name = vendor_name.substr(0, string_len_limit);
product_name = product_name.substr(0, string_len_limit);
std::vector<int64_t> interface_classes;
udev* udev = udev_device_get_udev(device);
udev_enumerate* enumerate = udev_enumerate_new(udev);
udev_enumerate_add_match_subsystem(enumerate, "usb");
udev_enumerate_add_match_parent(enumerate, device);
udev_enumerate_scan_devices(enumerate);
struct udev_list_entry* entry = nullptr;
udev_list_entry_foreach(entry, udev_enumerate_get_list_entry(enumerate)) {
const char* entry_path = udev_list_entry_get_name(entry);
// udev_enumerate_add_match_parent includes the parent entry, skip it.
if (!strcmp(udev_device_get_syspath(device), entry_path)) {
continue;
}
udev_device* child = udev_device_new_from_syspath(udev, entry_path);
const char* child_type = udev_device_get_devtype(child);
if (!child_type || strcmp(child_type, "usb_interface") != 0) {
// If this is not a usb_interface node then something is wrong, fail
// safe.
LOG(WARNING) << "Found a child '" << entry_path
<< "' with unexpected type: "
<< (child_type ? child_type : "(null)");
continue;
}
std::string interface_class(
udev_device_get_sysattr_value(child, "bInterfaceClass"));
int64_t interface_class_int = 0;
base::TrimWhitespaceASCII(interface_class, base::TRIM_ALL,
&interface_class);
if (base::HexStringToInt64(interface_class, &interface_class_int)) {
interface_classes.push_back(interface_class_int);
}
}
return metrics::structured::events::guest_usb_device::UsbDeviceInfo()
.SetVendorName(vendor_name)
.SetVendorId(vendor_id)
.SetProductName(product_name)
.SetProductId(product_id)
.SetDeviceClass(device_class)
.SetCrostiniConnectionSuccess(attach_success)
.SetInterfaceClass(std::move(interface_classes))
.SetGuestType(vm_type)
.Record();
}
bool AttachUsbDevice(const std::string& socket_path,
uint8_t bus,
uint8_t addr,
uint16_t vid,
uint16_t pid,
int fd,
uint8_t* out_port,
apps::VmType vm_type) {
std::string device_path = "/proc/self/fd/" + std::to_string(fd);
fcntl(fd, F_SETFD, 0); // Remove the CLOEXEC
bool attach_success = CrosvmControl::Get()->UsbAttach(
socket_path, bus, addr, vid, pid, device_path, out_port);
LogGuestUsbStructuredMetrics(fd, attach_success, vm_type);
return attach_success;
}
bool AttachKey(const std::string& socket_path,
int hidraw_fd,
uint8_t* out_port,
apps::VmType vm_type) {
std::string hidraw_path = "/proc/self/fd/" + std::to_string(hidraw_fd);
fcntl(hidraw_fd, F_SETFD, 0); // Remove the CLOEXEC
return CrosvmControl::Get()->KeyAttach(socket_path, hidraw_path, out_port);
}
bool DetachUsbDevice(const std::string& socket_path, uint8_t port) {
return CrosvmControl::Get()->UsbDetach(socket_path, port);
}
bool ListUsbDevice(const std::string& socket_path,
std::vector<UsbDeviceEntry>* device) {
// Allocate enough slots for the max number of USB devices
// This will never be more than 255
const size_t max_usb_devices = CrosvmControl::Get()->MaxUsbDevices();
device->resize(max_usb_devices);
ssize_t dev_count = CrosvmControl::Get()->UsbList(socket_path, device->data(),
max_usb_devices);
if (dev_count < 0) {
return false;
}
device->resize(dev_count);
return true;
}
bool CrosvmDiskResize(const std::string& socket_path,
int disk_index,
uint64_t new_size) {
return CrosvmControl::Get()->ResizeDisk(socket_path, disk_index, new_size);
}
bool UpdateCpuShares(const base::FilePath& cpu_cgroup, int cpu_shares) {
const std::string cpu_shares_str = std::to_string(cpu_shares);
if (base::WriteFile(cpu_cgroup.Append("cpu.shares"), cpu_shares_str.c_str(),
cpu_shares_str.size()) != cpu_shares_str.size()) {
PLOG(ERROR) << "Failed to update " << cpu_cgroup.value() << " to "
<< cpu_shares;
return false;
}
return true;
}
// This will limit the tasks in the CGroup to P @percent of CPU.
// Although P can be > 100, its maximum value depends on the number of CPUs.
// For now, limit to a certain percent of 1 CPU. @percent=-1 disables quota.
bool UpdateCpuQuota(const base::FilePath& cpu_cgroup, int percent) {
LOG_ASSERT(percent <= 100 && (percent >= 0 || percent == -1));
// Set period to 100000us and quota to percent * 1000us.
const std::string cpu_period_str = std::to_string(100000);
const base::FilePath cfs_period_us = cpu_cgroup.Append("cpu.cfs_period_us");
if (base::WriteFile(cfs_period_us, cpu_period_str.c_str(),
cpu_period_str.size()) != cpu_period_str.size()) {
PLOG(ERROR) << "Failed to update " << cfs_period_us.value() << " to "
<< cpu_period_str;
return false;
}
int quota_int;
if (percent == -1)
quota_int = -1;
else
quota_int = percent * 1000;
const std::string cpu_quota_str = std::to_string(quota_int);
const base::FilePath cfs_quota_us = cpu_cgroup.Append("cpu.cfs_quota_us");
if (base::WriteFile(cfs_quota_us, cpu_quota_str.c_str(),
cpu_quota_str.size()) != cpu_quota_str.size()) {
PLOG(ERROR) << "Failed to update " << cfs_quota_us.value() << " to "
<< cpu_quota_str;
return false;
}
return true;
}
bool UpdateCpuLatencySensitive(const base::FilePath& cpu_cgroup, bool enable) {
std::string enable_str = std::to_string(static_cast<int>(enable));
auto latency_sensitive = cpu_cgroup.Append("cpu.uclamp.latency_sensitive");
if (base::WriteFile(latency_sensitive, enable_str.c_str(),
enable_str.size()) != enable_str.size()) {
PLOG(ERROR) << "Failed to update " << latency_sensitive.value() << " to "
<< enable_str;
return false;
}
return true;
}
bool UpdateCpuUclampMin(const base::FilePath& cpu_cgroup, double percent) {
LOG_ASSERT(percent <= 100.0 && percent >= 0.0);
std::string uclamp_min_str = std::to_string(percent);
auto uclamp_min = cpu_cgroup.Append("cpu.uclamp.min");
if (base::WriteFile(uclamp_min, uclamp_min_str.c_str(),
uclamp_min_str.size()) != uclamp_min_str.size()) {
PLOG(ERROR) << "Failed to update " << uclamp_min.value() << " to "
<< uclamp_min_str;
return false;
}
return true;
}
// Convert file path into fd path
// This will open the file and append SafeFD into provided container
std::string ConvertToFdBasedPath(brillo::SafeFD& parent_fd,
base::FilePath* in_out_path,
int flags,
std::vector<brillo::SafeFD>& fd_storage) {
static auto procSelfFd = base::FilePath("/proc/self/fd");
if (procSelfFd.IsParent(*in_out_path)) {
if (!base::PathExists(*in_out_path)) {
return "Path does not exist";
}
} else {
auto disk_fd = parent_fd.OpenExistingFile(*in_out_path, flags);
if (brillo::SafeFD::IsError(disk_fd.second)) {
LOG(ERROR) << "Could not open file: " << static_cast<int>(disk_fd.second);
return "Could not open file";
}
*in_out_path = base::FilePath(kProcFileDescriptorsPath)
.Append(base::NumberToString(disk_fd.first.get()));
fd_storage.push_back(std::move(disk_fd.first));
}
return "";
}
CustomParametersForDev::CustomParametersForDev(const std::string& data) {
std::vector<std::string_view> lines = base::SplitStringPiece(
data, "\n", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
for (auto& line : lines) {
if (line.empty() || line[0] == '#')
continue;
// Split line with first = sign. --key=value and KEY=VALUE parameters both
// use = to split. value will be an empty string in case of '--key'.
std::pair<std::string_view, std::string_view> param_pair =
absl::StrSplit(std::string_view(line), absl::MaxSplits('=', 1));
std::string_view key =
base::TrimWhitespaceASCII(param_pair.first, base::TRIM_ALL);
std::string_view value =
base::TrimWhitespaceASCII(param_pair.second, base::TRIM_ALL);
// Handle prerun: flags for flags before `run`.
if (line.substr(0, 7) == "prerun:") {
prerun_params_.emplace_back(key.substr(7), std::move(value));
continue;
}
// Add params before crosvm invocation.
if (line.substr(0, 10) == "precrosvm:") {
precrosvm_params_.emplace_back(line.substr(10));
continue;
}
switch (line[0]) {
case '!':
// Line contains a prefix key. Remove all args with this prefix.
run_prefix_to_remove_.emplace_back(line.substr(1));
break;
case '^':
// Parameter to be prepended before run, expected to be ^--key=value
// format.
run_params_to_prepend_.emplace_back(key.substr(1), std::move(value));
break;
case '-':
// Parameter expected to be --key=value format.
run_params_to_add_.emplace_back(std::move(key), std::move(value));
break;
default:
// KEY=VALUE pair.
special_parameters_[std::string(key)].emplace_back(std::move(value));
break;
}
}
initialized_ = true;
}
void CustomParametersForDev::Apply(base::StringPairs& args) {
if (!initialized_)
return;
for (const auto& prefix : run_prefix_to_remove_) {
std::erase_if(args, [&prefix](const auto& pair) {
return base::StartsWith(pair.first, prefix);
});
}
for (const auto& param : run_params_to_prepend_) {
args.emplace(args.begin(), param.first, param.second);
}
for (const auto& param : run_params_to_add_) {
args.emplace_back(param.first, param.second);
}
}
void CustomParametersForDev::AppendPrerunParams(
base::StringPairs& pre_run_args) const {
for (const auto& param : prerun_params_) {
pre_run_args.emplace_back(param.first, param.second);
}
}
std::optional<const std::string> CustomParametersForDev::ObtainSpecialParameter(
const std::string& key) const {
if (!initialized_)
return std::nullopt;
if (auto it = special_parameters_.find(key);
it != special_parameters_.end()) {
DCHECK(it->second.size());
return it->second[it->second.size() - 1];
} else {
return std::nullopt;
}
}
std::vector<const std::string> CustomParametersForDev::ObtainSpecialParameters(
const std::string& key) const {
if (!initialized_)
return {};
if (auto it = special_parameters_.find(key);
it != special_parameters_.end()) {
DCHECK(it->second.size());
return it->second;
} else {
return {};
}
}
std::unique_ptr<CustomParametersForDev> MaybeLoadCustomParametersForDev(
apps::VmType type, bool use_dev_conf) {
const bool is_dev_mode = (VbGetSystemPropertyInt("cros_debug") == 1);
// Load any custom parameters from the development configuration file if the
// feature is turned on (default) and path exists (dev mode only).
if (!is_dev_mode || !use_dev_conf) {
return nullptr;
}
// Path to the development configuration file (only visible in dev mode).
const base::FilePath dev_conf(internal::GetDevConfPath(type));
if (!base::PathExists(dev_conf)) {
return nullptr;
}
std::string data;
if (!base::ReadFileToString(dev_conf, &data)) {
PLOG(ERROR) << "Failed to read file " << dev_conf.value();
return nullptr;
}
return std::make_unique<CustomParametersForDev>(data);
}
std::string SharedDataParam::to_string() const {
// We can relax this condition later if we want to serve users which do not
// set uid_map and gid_map, but today there is none.
CHECK_NE(uid_map, "");
CHECK_NE(gid_map, "");
struct CacheParameters {
std::string_view cache;
std::string_view timeout;
std::string_view writeback;
std::string_view negative_timeout;
};
static constexpr auto params_map =
base::MakeFixedFlatMap<SharedDataParam::Cache, CacheParameters>(
{{SharedDataParam::Cache::kAuto,
{.cache = "auto", "1", "false", "1"}},
{SharedDataParam::Cache::kAlways,
{.cache = "always", "3600", "true", "3600"}},
{SharedDataParam::Cache::kNever,
{.cache = "never", "1", "false", "1"}}});
CacheParameters params = params_map.at(enable_caches);
// Disable negative dentry cache when ascii_casefold is enabled because it
// won't work for scenarios like the following:
// 1. Lookup "foo", an non-existing file. Negative dentry is cached on the
// guest.
// 2. Create "FOO".
// 3. Lookup "foo". This needs to be successful on the casefold directory,
// but the lookup can fail due the negative cache created at 1.
if (ascii_casefold) {
params.negative_timeout = "0";
}
std::string result = base::StrCat({
data_dir.value(), //
":",
tag, //
":type=fs", //
":cache=",
params.cache, //
":uidmap=",
uid_map, //
":gidmap=",
gid_map, //
":timeout=",
params.timeout, //
":rewrite-security-xattrs=", //
rewrite_security_xattrs ? "true" : "false", //
ascii_casefold ? ":ascii_casefold=true" : "", //
":writeback=",
params.writeback, //
posix_acl ? "" : ":posix_acl=false", //
":negative_timeout=",
params.negative_timeout, //
});
if (!privileged_quota_uids.empty()) {
result += ":privileged_quota_uids=";
for (size_t i = 0; i < privileged_quota_uids.size(); ++i) {
if (i != 0)
result += ' ';
result += base::NumberToString(privileged_quota_uids[i]);
}
}
return result;
}
SharedDataParam CreateFontsSharedDataParam() {
return SharedDataParam{.data_dir = base::FilePath(kFontsSharedDir),
.tag = kFontsSharedDirTag,
.uid_map = kAndroidUidMap,
.gid_map = kAndroidGidMap,
.enable_caches = SharedDataParam::Cache::kAlways,
.ascii_casefold = false,
.posix_acl = true};
}
void ArcVmCPUTopology::CreateAffinity() {
std::vector<std::string> cpu_list;
std::vector<std::string> affinities;
// Create capacity mask.
int min_cap = -1;
int max_cap = -1;
int last_non_rt_cpu = -1;
for (const auto& cap : capacity_) {
for (const auto cpu : cap.second) {
if (cap.first)
cpu_list.push_back(base::StringPrintf("%d=%d", cpu, cap.first));
// last_non_rt_cpu should be the last cpu with a lowest capacity.
if (min_cap == -1 || min_cap >= cap.first) {
min_cap = cap.first;
last_non_rt_cpu = cpu;
}
max_cap = std::max(max_cap, static_cast<int>(cap.first));
}
}
// Add RT VCPUs with a lowest capacity.
if (min_cap) {
for (int i = 0; i < num_rt_cpus_; i++) {
cpu_list.push_back(base::StringPrintf("%d=%d", num_cpus_ + i, min_cap));
}
capacity_mask_ = base::JoinString(cpu_list, ",");
cpu_list.clear();
}
// If there are heterogeneous cores, calculate uclamp.min value.
if (min_cap != max_cap) {
// Calculate a better uclamp.min for Android top-app tasks so that
// those tasks will NOT be scheduled on the LITTLE cores.
// If ARCVM kernel boots with different capacity CPUs, it enables Capacity
// Aware Scheduler (CAS) which schedules the tasks to a CPU comparing with
// its capacity and the task's expected CPU utilization.
// Since the uclamp.min boosts up the minimum expected utilization to the
// given percentage of maximum capacity, if that is bigger than the LITTLE
// core capacity, CAS will schedule it on big core.
// Thus its value must be *slightly* bigger than LITTLE core capacity.
// Because of this reason, this adds 5% more than the LITTLE core capacity
// rate. Note that the uclamp value must be a percentage of the maximum
// capacity (~= utilization).
top_app_uclamp_min_ = std::min(min_cap * 100 / kMaxCapacity + 5, 100);
}
// Allow boards to override the top_app_uclamp_min by scheduler-tune/
// boost-top-app.
brillo::CrosConfig cros_config;
std::string boost;
if (cros_config.GetString(kSchedulerTunePath, kBoostTopAppProperty, &boost)) {
int uclamp_min;
if (base::StringToInt(boost, &uclamp_min))
top_app_uclamp_min_ = uclamp_min;
else
LOG(WARNING) << "Failed to convert value of " << kSchedulerTunePath << "/"
<< kBoostTopAppProperty << " to number";
}
// The board may request to boost the whole ARCVM globally, in order to reduce
// the latency and improve general experience of the ARCVM, especially on the
// little.BIG CPU architecture.
// If the global boost wasn't defined, it won't be used at all. b/217825939
global_vm_boost_ = 0.0;
if (cros_config.GetString(kSchedulerTunePath, kBoostArcVmProperty, &boost)) {
double boost_factor;
if (base::StringToDouble(boost, &boost_factor)) {
int32_t little_max_freq = std::numeric_limits<int32_t>::max();
int32_t big_max_freq = 0;
// The global boost factor is defined as:
// max_freq(little_core) / max_freq(big_core) * boost_factor
for (int32_t cpu = 0; cpu < num_cpus_; cpu++) {
auto max_freq = GetCpuMaxFrequency(cpu);
if (max_freq) {
little_max_freq = std::min(little_max_freq, *max_freq);
big_max_freq = std::max(big_max_freq, *max_freq);
}
}
if (little_max_freq <= big_max_freq && little_max_freq != 0 &&
big_max_freq != 0) {
double freq_ratio = static_cast<double>(little_max_freq) / big_max_freq;
global_vm_boost_ = freq_ratio * boost_factor * 100.0;
if (global_vm_boost_ > 100.0) {
LOG(INFO) << "Clamping global VM boost from " << global_vm_boost_
<< "% to 100%";
global_vm_boost_ = 100.0;
}
LOG(INFO) << "Calculated global VM boost: " << global_vm_boost_ << "%";
} else {
LOG(WARNING) << "VM cannot be boosted - invalid frequencies detected "
<< "little: " << little_max_freq
<< " big: " << big_max_freq;
}
}
}
for (const auto& pkg : package_) {
bool is_rt_vcpu_package = false;
for (auto cpu : pkg.second) {
cpu_list.push_back(std::to_string(cpu));
// Add RT VCPUs as a package with a lowest capacity.
is_rt_vcpu_package = is_rt_vcpu_package || (cpu == last_non_rt_cpu);
}
if (is_rt_vcpu_package) {
for (int i = 0; i < num_rt_cpus_; i++) {
cpu_list.push_back(std::to_string(num_cpus_ + i));
}
}
package_mask_.push_back(base::JoinString(cpu_list, ","));
cpu_list.clear();
}
// Add RT VCPUs after non RT VCPUs.
for (int i = 0; i < num_rt_cpus_; i++) {
rt_cpus_.insert(num_cpus_ + i);
}
for (auto cpu : rt_cpus_) {
cpu_list.push_back(std::to_string(cpu));
}
rt_cpu_mask_ = base::JoinString(cpu_list, ",");
cpu_list.clear();
for (int i = 0; i < num_cpus_ + num_rt_cpus_; i++) {
if (rt_cpus_.find(i) == rt_cpus_.end()) {
cpu_list.push_back(std::to_string(i));
}
}
non_rt_cpu_mask_ = base::JoinString(cpu_list, ",");
cpu_list.clear();
// Try to group VCPUs based on physical CPUs topology.
if (package_.size() > 1) {
for (const auto& pkg : package_) {
bool is_rt_vcpu_package = false;
for (auto cpu : pkg.second) {
cpu_list.push_back(std::to_string(cpu));
// Add RT VCPUs as a package with a lowest capacity.
is_rt_vcpu_package = is_rt_vcpu_package || (cpu == last_non_rt_cpu);
}
std::string cpu_mask = base::JoinString(cpu_list, ",");
cpu_list.clear();
for (auto cpu : pkg.second) {
affinities.push_back(
base::StringPrintf("%d=%s", cpu, cpu_mask.c_str()));
}
if (is_rt_vcpu_package) {
for (int i = 0; i < num_rt_cpus_; i++) {
affinities.push_back(
base::StringPrintf("%d=%s", num_cpus_ + i, cpu_mask.c_str()));
}
}
}
} else {
// Try to group VCPUs based on physical CPUs capacity values.
for (const auto& cap : capacity_) {
bool is_rt_vcpu_cap = false;
for (auto cpu : cap.second) {
cpu_list.push_back(std::to_string(cpu));
is_rt_vcpu_cap = is_rt_vcpu_cap || (cpu == last_non_rt_cpu);
}
std::string cpu_mask = base::JoinString(cpu_list, ",");
cpu_list.clear();
for (auto cpu : cap.second) {
affinities.push_back(
base::StringPrintf("%d=%s", cpu, cpu_mask.c_str()));
}
if (is_rt_vcpu_cap) {
for (int i = 0; i < num_rt_cpus_; i++) {
affinities.push_back(
base::StringPrintf("%d=%s", num_cpus_ + i, cpu_mask.c_str()));
}
}
}
}
affinity_mask_ = base::JoinString(affinities, ":");
num_cpus_ += num_rt_cpus_;
}
// Creates CPU grouping by cpu_capacity.
void ArcVmCPUTopology::CreateTopology() {
for (uint32_t cpu = 0; cpu < num_cpus_; cpu++) {
auto capacity = GetCpuCapacity(cpu, base::FilePath(kCpuInfosPath));
auto package = GetCpuPackageId(cpu, base::FilePath(kCpuInfosPath));
// Do not fail, carry on, but use an aritifical capacity group.
if (!capacity)
capacity_[0].push_back(cpu);
else
capacity_[*capacity].push_back(cpu);
// Ditto.
if (!package)
package_[0].push_back(cpu);
else
package_[*package].push_back(cpu);
}
}
// Check whether the host processor is symmetric.
// TODO(kansho): Support ADL. IsSymmetricCPU() would return true even though
// it's heterogeneous.
bool ArcVmCPUTopology::IsSymmetricCPU() {
return capacity_.size() == 1 && package_.size() == 1;
}
void ArcVmCPUTopology::CreateCPUAffinity() {
CreateTopology();
CreateAffinity();
}
void ArcVmCPUTopology::AddCpuToCapacityGroupForTesting(uint32_t cpu,
uint32_t capacity) {
capacity_[capacity].push_back(cpu);
}
void ArcVmCPUTopology::AddCpuToPackageGroupForTesting(uint32_t cpu,
uint32_t package) {
package_[package].push_back(cpu);
}
void ArcVmCPUTopology::CreateCPUAffinityForTesting() {
CreateAffinity();
}
uint32_t ArcVmCPUTopology::NumCPUs() {
return num_cpus_;
}
uint32_t ArcVmCPUTopology::NumRTCPUs() {
return num_rt_cpus_;
}
void ArcVmCPUTopology::SetNumRTCPUs(uint32_t num_rt_cpus) {
num_rt_cpus_ = num_rt_cpus;
}
const std::string& ArcVmCPUTopology::AffinityMask() {
return affinity_mask_;
}
const std::string& ArcVmCPUTopology::RTCPUMask() {
return rt_cpu_mask_;
}
const std::string& ArcVmCPUTopology::NonRTCPUMask() {
return non_rt_cpu_mask_;
}
const std::string& ArcVmCPUTopology::CapacityMask() {
return capacity_mask_;
}
const std::vector<std::string>& ArcVmCPUTopology::PackageMask() {
return package_mask_;
}
int ArcVmCPUTopology::TopAppUclampMin() {
return top_app_uclamp_min_;
}
double ArcVmCPUTopology::GlobalVMBoost() {
return global_vm_boost_;
}
ArcVmCPUTopology::ArcVmCPUTopology(uint32_t num_cpus, uint32_t num_rt_cpus) {
num_cpus_ = num_cpus;
num_rt_cpus_ = num_rt_cpus;
}
uint32_t GetBorealisCpuCountOverride(uint32_t num_cpus) {
// 8 or more logical processors, return 2 less to allow
// breathing room for the virtio threads.
uint32_t num_vcpus;
if (num_cpus >= 8) {
num_vcpus = num_cpus - 2;
} else {
num_vcpus = num_cpus - 1;
}
LOG(INFO) << num_vcpus << "/" << (num_cpus)
<< " logical processors will be used.";
return num_vcpus;
}
std::ostream& operator<<(std::ostream& os, const VmStartChecker::Status& e) {
switch (e) {
case VmStartChecker::Status::READY:
os << "VM is ready";
break;
case VmStartChecker::Status::EPOLL_INVALID_EVENT:
os << "Received invalid event while waiting for VM to start";
break;
case VmStartChecker::Status::EPOLL_INVALID_FD:
os << "Received invalid fd while waiting for VM to start";
break;
case VmStartChecker::Status::TIMEOUT:
os << "Timed out while waiting for VM to start";
break;
case VmStartChecker::Status::INVALID_SIGNAL_INFO:
os << "Received invalid signal info while waiting for VM to start";
break;
case VmStartChecker::Status::SIGNAL_RECEIVED:
os << "Received signal while waiting for VM to start";
break;
default:
os << "Invalid enum value";
break;
}
return os;
}
std::unique_ptr<VmStartChecker> VmStartChecker::Create(int32_t signal_fd) {
// Create an event fd that will be signalled when a VM is ready.
base::ScopedFD vm_start_event_fd(eventfd(0, EFD_CLOEXEC));
if (!vm_start_event_fd.is_valid()) {
PLOG(ERROR) << "Failed to create eventfd for VM start notification";
return nullptr;
}
// We need to add it to the epoll set so that |Wait| can use it successfully.
// This fd shouldn't be used across child processes but still pass
// EPOLL_CLOEXEC as good hygiene.
base::ScopedFD vm_start_epoll_fd(epoll_create1(EPOLL_CLOEXEC));
if (!vm_start_epoll_fd.is_valid()) {
PLOG(ERROR) << "Failed to create epoll fd for the VM start event";
return nullptr;
}
struct epoll_event ep_event {
.events = EPOLLIN,
.data.u32 = static_cast<uint32_t>(vm_start_event_fd.get()),
};
if (HANDLE_EINTR(epoll_ctl(vm_start_epoll_fd.get(), EPOLL_CTL_ADD,
vm_start_event_fd.get(), &ep_event)) < 0) {
PLOG(ERROR) << "Failed to epoll add VM start event fd";
return nullptr;
}
// Add the signal fd to the epoll set to see if a signal is received while
// waiting for the VM.
ep_event = {
.events = EPOLLIN,
.data.u32 = static_cast<uint32_t>(signal_fd),
};
if (HANDLE_EINTR(epoll_ctl(vm_start_epoll_fd.get(), EPOLL_CTL_ADD, signal_fd,
&ep_event)) < 0) {
PLOG(ERROR) << "Failed to epoll add signal fd";
return nullptr;
}
return base::WrapUnique(new VmStartChecker(
signal_fd, std::move(vm_start_event_fd), std::move(vm_start_epoll_fd)));
}
VmStartChecker::Status VmStartChecker::Wait(base::TimeDelta timeout) {
struct epoll_event ep_event;
if (HANDLE_EINTR(epoll_wait(epoll_fd_.get(), &ep_event, 1,
timeout.InMilliseconds())) <= 0) {
PLOG(ERROR) << "Timed out waiting for VM to start";
return Status::TIMEOUT;
}
// We've only registered for input events.
if ((ep_event.events & EPOLLIN) == 0) {
LOG(ERROR) << "Got invalid event while waiting for VM to start: "
<< ep_event.events;
return Status::EPOLL_INVALID_EVENT;
}
if ((ep_event.data.u32 != static_cast<uint32_t>(event_fd_.get())) &&
(ep_event.data.u32 != static_cast<uint32_t>(signal_fd_))) {
LOG(ERROR) << "Got invalid fd while waiting for VM to start: "
<< ep_event.data.u32;
return Status::EPOLL_INVALID_FD;
}
if (ep_event.data.u32 == static_cast<uint32_t>(signal_fd_)) {
struct signalfd_siginfo siginfo;
if (read(signal_fd_, &siginfo, sizeof(siginfo)) != sizeof(siginfo)) {
PLOG(ERROR) << "Failed to read signal info";
return Status::INVALID_SIGNAL_INFO;
}
LOG(ERROR) << "Received signal: " << siginfo.ssi_signo
<< " while waiting to start the VM";
return Status::SIGNAL_RECEIVED;
}
// At this point |event_fd_| has been successfully signalled.
return Status::READY;
}
int32_t VmStartChecker::GetEventFd() const {
return event_fd_.get();
}
VmStartChecker::VmStartChecker(int32_t signal_fd,
base::ScopedFD event_fd,
base::ScopedFD epoll_fd)
: signal_fd_(signal_fd),
event_fd_(std::move(event_fd)),
epoll_fd_(std::move(epoll_fd)) {}
VmInfo_VmType ToLegacyVmType(apps::VmType type) {
switch (type) {
case apps::VmType::TERMINA:
return VmInfo::TERMINA;
case apps::PLUGIN_VM:
return VmInfo::PLUGIN_VM;
case apps::BOREALIS:
return VmInfo::BOREALIS;
case apps::ARCVM:
return VmInfo::ARC_VM;
case apps::BRUSCHETTA:
return VmInfo::BRUSCHETTA;
default:
return VmInfo::UNKNOWN;
}
}
VmStatus ToVmStatus(VmBaseImpl::Status status) {
switch (status) {
case VmBaseImpl::Status::STARTING:
return VM_STATUS_STARTING;
case VmBaseImpl::Status::RUNNING:
return VM_STATUS_RUNNING;
case VmBaseImpl::Status::STOPPED:
return VM_STATUS_STOPPED;
}
}
VmInfo ToVmInfo(const VmBaseImpl::Info& info, bool fill_sensitive_info) {
VmInfo vm_info;
vm_info.set_ipv4_address(info.ipv4_address);
vm_info.set_pid(info.pid);
vm_info.set_cid(info.cid);
vm_info.set_seneschal_server_handle(info.seneschal_server_handle);
vm_info.set_vm_type(ToLegacyVmType(info.type));
vm_info.set_storage_ballooning(info.storage_ballooning);
if (fill_sensitive_info) {
vm_info.set_permission_token(info.permission_token);
}
vm_info.set_status(ToVmStatus(info.status));
return vm_info;
}
} // namespace vm_tools::concierge