blob: 63ea20f0264bb45a11a4fdd9d016e72703b9bb6e [file] [log] [blame]
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "debugd/src/helpers/scheduler_configuration_utils.h"
#include <libminijail.h>
#include <scoped_minijail.h>
#include <sys/prctl.h>
#include <string>
#include <base/check_op.h>
#include <base/files/file_path.h>
#include <base/logging.h>
#include <brillo/flag_helper.h>
#include <brillo/syslog_logging.h>
#include <build/build_config.h>
#include <build/buildflag.h>
#include <chromeos/dbus/service_constants.h>
// Downstream core scheduling interface for CrOS v4.19, v5.4 kernels.
// TODO(b/152605392): Remove once those kernel versions are obsolete.
#ifndef PR_SET_CORE_SCHED
#define PR_SET_CORE_SCHED 0x200
#endif
// Upstream interface for core scheduling. Defined upstream from v5.14-rc1
// onwards in include/uapi/linux/prctl.h. Backported to CrOS kernel v5.10.
#ifndef PR_SCHED_CORE
#define PR_SCHED_CORE 62
#define PR_SCHED_CORE_GET 0
#define PR_SCHED_CORE_CREATE 1
#define PR_SCHED_CORE_SHARE_TO 2
#define PR_SCHED_CORE_SHARE_FROM 3
#define PR_SCHED_CORE_MAX 4
#endif
enum pid_type { PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID };
using debugd::scheduler_configuration::kConservativeScheduler;
using debugd::scheduler_configuration::kCoreIsolationScheduler;
using debugd::scheduler_configuration::kPerformanceScheduler;
namespace {
constexpr char kCPUPathPrefix[] = "/sys";
constexpr char kSeccompFilterPath[] =
"/usr/share/policy/scheduler-configuration-helper.policy";
constexpr char kDebugdUser[] = "debugd";
constexpr char kDebugdGroup[] = "debugd";
// Enters a minijail sandbox.
void EnterSandbox() {
ScopedMinijail jail(minijail_new());
minijail_no_new_privs(jail.get());
minijail_use_seccomp_filter(jail.get());
minijail_parse_seccomp_filters(jail.get(), kSeccompFilterPath);
minijail_reset_signal_mask(jail.get());
minijail_namespace_ipc(jail.get());
minijail_namespace_net(jail.get());
minijail_remount_proc_readonly(jail.get());
minijail_change_user(jail.get(), kDebugdUser);
minijail_change_group(jail.get(), kDebugdGroup);
minijail_namespace_vfs(jail.get());
minijail_bind(jail.get(), "/", "/", 0);
minijail_bind(jail.get(), "/proc", "/proc", 0);
minijail_bind(jail.get(), "/dev/log", "/dev/log", 0);
minijail_mount_dev(jail.get());
minijail_remount_proc_readonly(jail.get());
minijail_enter_pivot_root(jail.get(), "/mnt/empty");
minijail_bind(jail.get(), "/sys", "/sys", 1);
minijail_enter(jail.get());
}
bool CoreSchedSupported() {
int ret = prctl(PR_SET_CORE_SCHED, 2);
DCHECK_LT(ret, 0); // This should never succeed.
// The kernel supports the call but we gave it a bogus argument.
if (errno == ERANGE)
return true;
// Otherwise, try the new interface (available on >=5.10 kernels) to check
// for support.
ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PID, 0);
// Since HT is likely not enabled initially, the prctl(2) may initially
// return -ENODEV and we know the prctl(2) is working.
if (ret != -1 || (ret == -1 && errno == ENODEV))
return true;
return false;
}
} // namespace
int main(int argc, char* argv[]) {
brillo::InitLog(brillo::kLogToStderr);
std::string policy_flag =
std::string("Set to either ") + kConservativeScheduler + " or " +
kCoreIsolationScheduler + " or " + kPerformanceScheduler + ".";
DEFINE_string(policy, "", policy_flag.c_str());
brillo::FlagHelper::Init(argc, argv, "scheduler_configuration_helper");
if (FLAGS_policy != kConservativeScheduler &&
FLAGS_policy != kCoreIsolationScheduler &&
FLAGS_policy != kPerformanceScheduler) {
LOG(INFO) << "Unknown policy \"" << FLAGS_policy << "\", defaulting to "
<< kConservativeScheduler;
FLAGS_policy = kConservativeScheduler;
}
// The CPU control files must be opened as root.
base::FilePath base_path(kCPUPathPrefix);
debugd::SchedulerConfigurationUtils utils(base_path);
if (!utils.GetControlFDs()) {
LOG(ERROR) << "Failed to open CPU control files.";
return 1;
}
if (!utils.GetCPUSetFDs()) {
LOG(ERROR) << "Failed to open cpuset files.";
return 1;
}
EnterSandbox();
// By default, Chrome prefers to use core isolation scheduling, which keeps
// hyper-threading enabled globally, but puts renderer processes into
// untrusted execution groups. Chrome does not know which kernels support core
// scheduling, so debugd makes that decision, and defaults to conservative if
// core scheduling is not supported.
if (FLAGS_policy == kCoreIsolationScheduler) {
if (CoreSchedSupported()) {
FLAGS_policy = kPerformanceScheduler;
} else {
FLAGS_policy = kConservativeScheduler;
}
}
int status = 1;
size_t num_cores_disabled = 0;
if (FLAGS_policy == kPerformanceScheduler) {
status = utils.EnablePerformanceConfiguration(&num_cores_disabled) ? 0 : 1;
} else if (FLAGS_policy == kConservativeScheduler) {
status = utils.EnableConservativeConfiguration(&num_cores_disabled) ? 0 : 1;
}
fprintf(stdout, "%zu", num_cores_disabled);
fflush(stdout);
return status;
}