blob: be367477588d98c36f3fcb0c33c5627e9badc7ba [file] [log] [blame]
// Copyright 2016 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "libcontainer/cgroup.h"
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <base/bind.h>
#include <base/callback.h>
#include <base/callback_helpers.h>
#include <base/files/file_util.h>
#include <base/files/scoped_file.h>
#include <base/strings/string_number_conversions.h>
#include <base/strings/stringprintf.h>
namespace libcontainer {
namespace {
Cgroup::CgroupFactory g_cgroup_factory = nullptr;
constexpr const char* kCgroupNames[Cgroup::Type::NUM_TYPES] = {
"cpu", "cpuacct", "cpuset", "devices", "freezer", "schedtune", "memory"};
base::ScopedFD OpenCgroupFile(const base::FilePath& cgroup_path,
base::StringPiece name,
bool write) {
base::FilePath path = cgroup_path.Append(name);
int flags = write ? O_WRONLY | O_CREAT | O_TRUNC : O_RDONLY;
// Adding O_NONBLOCK to avoid blocking in case we were tricked into opening a
// blocking file (e.g. an unopened FIFO or a device).
flags |= O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC;
base::ScopedFD fd(HANDLE_EINTR(open(path.value().c_str(), flags, 0664)));
if (!fd.is_valid())
return base::ScopedFD();
// Ensure the opened file is a regular file.
struct stat st;
if (fstat(fd.get(), &st) < 0) {
PLOG(ERROR) << "Failed to fstat " << path.value();
return base::ScopedFD();
}
if (!S_ISREG(st.st_mode)) {
LOG(ERROR) << path.value() << " is not a regular file";
return base::ScopedFD();
}
// Remove O_NONBLOCK before returning |fd|.
flags = fcntl(fd.get(), F_GETFL);
if (flags == -1) {
PLOG(ERROR) << "Failed to get flags for " << path.value();
return base::ScopedFD();
}
if (fcntl(fd.get(), F_SETFL, flags & ~O_NONBLOCK) < 0) {
PLOG(ERROR) << "Failed to remove O_NONBLOCK for " << path.value();
return base::ScopedFD();
}
return fd;
}
bool WriteCgroupFile(const base::FilePath& cgroup_path,
base::StringPiece name,
base::StringPiece str) {
base::ScopedFD fd = OpenCgroupFile(cgroup_path, name, true);
if (!fd.is_valid())
return false;
if (!base::WriteFileDescriptor(fd.get(), str.data(), str.size()))
return false;
return true;
}
bool WriteCgroupFileInt(const base::FilePath& cgroup_path,
base::StringPiece name,
const int value) {
return WriteCgroupFile(cgroup_path, name, base::NumberToString(value));
}
bool CopyCgroupParent(const base::FilePath& cgroup_path,
base::StringPiece name) {
base::ScopedFD dest = OpenCgroupFile(cgroup_path, name, true);
if (!dest.is_valid())
return false;
base::ScopedFD source = OpenCgroupFile(cgroup_path.DirName(), name, false);
if (!source.is_valid())
return false;
char buffer[256];
ssize_t bytes_read;
while ((bytes_read = read(source.get(), buffer, sizeof(buffer))) > 0) {
if (!base::WriteFileDescriptor(dest.get(), buffer, bytes_read))
return false;
}
if (bytes_read < 0)
return false;
return true;
}
std::string GetDeviceString(const int major, const int minor) {
if (major >= 0 && minor >= 0)
return base::StringPrintf("%d:%d", major, minor);
else if (major >= 0)
return base::StringPrintf("%d:*", major);
else if (minor >= 0)
return base::StringPrintf("*:%d", minor);
else
return base::StringPrintf("*:*");
}
bool CreateCgroupAsOwner(const base::FilePath& cgroup_path,
uid_t cgroup_owner,
gid_t cgroup_group) {
base::ScopedClosureRunner runner;
// If running as root and the cgroup owner is a user, create the cgroup
// as that user.
if (getuid() == 0 && (cgroup_owner != 0 || cgroup_group != 0)) {
// Ensure that we reset the euid and egid no matter what.
// TODO(ejcaruso, hidehiko): change to OnceClosure when libchrome is
// upreved again.
runner.ReplaceClosure(base::Bind(
[](uid_t euid, gid_t egid) {
if (seteuid(euid) != 0)
PLOG(ERROR) << "Failed to reset euid";
if (setegid(egid) != 0)
PLOG(ERROR) << "Failed to reset egid";
},
geteuid(), getegid()));
if (setegid(cgroup_group) != 0 || seteuid(cgroup_owner) != 0) {
PLOG(ERROR) << "Failed to set cgroup owner";
return false;
}
}
if (mkdir(cgroup_path.value().c_str(), S_IRWXU | S_IRWXG) < 0 &&
errno != EEXIST) {
return false;
}
return true;
}
bool CopyCpusetParent(const base::FilePath& cgroup_path) {
// cpuset is special: we need to copy parent's cpus or mems,
// otherwise we'll start with "empty" cpuset and nothing can
// run in it/be moved into it.
//
// Chromium OS and Android use the legacy noprefix mount option (i.e.
// no "cpuset." in front of cpus/mems), but we also check for the
// prefixed files.
if (!CopyCgroupParent(cgroup_path, "cpus") &&
!CopyCgroupParent(cgroup_path, "cpuset.cpus")) {
PLOG(ERROR) << "Failed to copy " << cgroup_path.value()
<< "/cpus from parent";
return false;
}
if (!CopyCgroupParent(cgroup_path, "mems") &&
!CopyCgroupParent(cgroup_path, "cpuset.mems")) {
PLOG(ERROR) << "Failed to copy " << cgroup_path.value()
<< "/mems from parent";
return false;
}
return true;
}
bool CheckCgroupAvailable(const base::FilePath& cgroup_root,
base::StringPiece cgroup_name) {
base::FilePath path = cgroup_root.Append(cgroup_name);
return access(path.value().c_str(), F_OK) == 0;
}
} // namespace
bool Cgroup::Freeze() {
return WriteCgroupFile(cgroup_paths_[Type::FREEZER], "freezer.state",
"FROZEN\n");
}
bool Cgroup::Thaw() {
return WriteCgroupFile(cgroup_paths_[Type::FREEZER], "freezer.state",
"THAWED\n");
}
bool Cgroup::DenyAllDevices() {
return WriteCgroupFile(cgroup_paths_[Type::DEVICES], "devices.deny", "a\n");
}
bool Cgroup::AddDevice(bool allow,
int major,
int minor,
bool read,
bool write,
bool modify,
char type) {
if (type != 'b' && type != 'c' && type != 'a') {
LOG(ERROR) << "Invalid cgroup type '" << type << "'";
return false;
}
if (!read && !write && !modify) {
LOG(ERROR) << "Invalid cgroup permissions: at least one of read, write, "
"modify should be true";
return false;
}
std::string device_string = GetDeviceString(major, minor);
// The device file format is:
// <type, c, b, or a> major:minor rmw
std::string perm_string =
base::StringPrintf("%c %s %s%s%s\n", type, device_string.c_str(),
read ? "r" : "", write ? "w" : "", modify ? "m" : "");
return WriteCgroupFile(cgroup_paths_[Type::DEVICES],
allow ? "devices.allow" : "devices.deny", perm_string);
}
bool Cgroup::SetCpuShares(int shares) {
return WriteCgroupFileInt(cgroup_paths_[Type::CPU], "cpu.shares", shares);
}
bool Cgroup::SetCpuQuota(int quota) {
return WriteCgroupFileInt(cgroup_paths_[Type::CPU], "cpu.cfs_quota_us",
quota);
}
bool Cgroup::SetCpuPeriod(int period) {
return WriteCgroupFileInt(cgroup_paths_[Type::CPU], "cpu.cfs_period_us",
period);
}
bool Cgroup::SetCpuRtRuntime(int rt_runtime) {
return WriteCgroupFileInt(cgroup_paths_[Type::CPU], "cpu.rt_runtime_us",
rt_runtime);
}
bool Cgroup::SetCpuRtPeriod(int rt_period) {
return WriteCgroupFileInt(cgroup_paths_[Type::CPU], "cpu.rt_period_us",
rt_period);
}
// static
void Cgroup::SetCgroupFactoryForTesting(CgroupFactory factory) {
g_cgroup_factory = factory;
}
// static
std::unique_ptr<Cgroup> Cgroup::Create(base::StringPiece name,
const base::FilePath& cgroup_root,
const base::FilePath& cgroup_parent,
uid_t cgroup_owner,
gid_t cgroup_group) {
if (g_cgroup_factory) {
return g_cgroup_factory(name, cgroup_root, cgroup_parent, cgroup_owner,
cgroup_group);
}
std::unique_ptr<Cgroup> cg(new Cgroup());
for (int32_t i = 0; i < Type::NUM_TYPES; ++i) {
if (!CheckCgroupAvailable(cgroup_root, kCgroupNames[i])) {
if (errno == ENOENT)
continue;
PLOG(ERROR) << "Cgroup " << kCgroupNames[i] << " not available";
return nullptr;
}
if (!cgroup_parent.empty()) {
const base::FilePath parent_path =
cgroup_root.Append(kCgroupNames[i]).Append(cgroup_parent);
cg->cgroup_paths_[i] = parent_path.Append(name);
// Try to create the parent cgroup only if it doesn't exist.
if (!base::DirectoryExists(parent_path)) {
if (!CreateCgroupAsOwner(parent_path, cgroup_owner, cgroup_group)) {
PLOG(ERROR) << "Failed to create parent cgroup "
<< parent_path.value() << " as owner";
return nullptr;
}
if (i == Type::CPUSET && !CopyCpusetParent(parent_path))
return nullptr;
}
} else {
cg->cgroup_paths_[i] = cgroup_root.Append(kCgroupNames[i]).Append(name);
}
if (!CreateCgroupAsOwner(cg->cgroup_paths_[i], cgroup_owner,
cgroup_group)) {
PLOG(ERROR) << "Failed to create cgroup " << cg->cgroup_paths_[i].value()
<< " as owner";
return nullptr;
}
cg->cgroup_tasks_paths_[i] = cg->cgroup_paths_[i].Append("tasks");
if (i == Type::CPUSET && !CopyCpusetParent(cg->cgroup_paths_[i]))
return nullptr;
}
cg->name_ = name.as_string();
return cg;
}
Cgroup::Cgroup() = default;
Cgroup::~Cgroup() {
for (int32_t i = 0; i < Type::NUM_TYPES; ++i) {
if (cgroup_paths_[i].empty())
continue;
rmdir(cgroup_paths_[i].value().c_str());
}
}
} // namespace libcontainer