blob: 0bf84c4f62e902c2c2f5dca1fa1be9e5407bcfd0 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Container Security Monitor module
*
* Copyright (c) 2018 Google, Inc
*/
#include "monitor.h"
#include <linux/atomic.h>
#include <linux/audit.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/mempool.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/notifier.h>
#include <linux/net.h>
#include <linux/path.h>
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/timekeeping.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/xattr.h>
#include <net/ipv6.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <overlayfs/overlayfs.h>
#include <uapi/linux/magic.h>
#include <uapi/asm/mman.h>
/* Configuration options for execute collector. */
struct execute_config csm_execute_config;
/* unique atomic value for the machine boot instance */
static atomic_t machine_rand = ATOMIC_INIT(0);
/* sequential container identifier */
static atomic_t contid = ATOMIC_INIT(0);
/* Generation id for each enumeration invocation. */
static atomic_t enumeration_count = ATOMIC_INIT(0);
struct file_provenance {
/* pid of the process doing the first write. */
pid_t tgid;
/* start_time of the process to uniquely identify it. */
u64 start_time;
};
struct csm_enumerate_processes_work_data {
struct work_struct work;
int enumeration_count;
};
static void *kmap_argument_stack(struct linux_binprm *bprm, void **ctx)
{
char *argv;
int err;
unsigned long i, pos, count;
void *map;
struct page *page;
/* vma_pages() returns the number of pages reserved for the stack */
count = vma_pages(bprm->vma);
if (likely(count == 1)) {
err = get_user_pages_remote(current, bprm->mm, bprm->p, 1,
FOLL_FORCE, &page, NULL, NULL);
if (err != 1)
return NULL;
argv = kmap(page);
*ctx = page;
} else {
/*
* If more than one pages is needed, copy all of them to a set
* of pages. Parsing the argument across kmap pages in different
* addresses would make it impractical.
*/
argv = vmalloc(count * PAGE_SIZE);
if (!argv)
return NULL;
for (i = 0; i < count; i++) {
pos = ALIGN_DOWN(bprm->p, PAGE_SIZE) + i * PAGE_SIZE;
err = get_user_pages_remote(current, bprm->mm, pos, 1,
FOLL_FORCE, &page, NULL,
NULL);
if (err <= 0) {
vfree(argv);
return NULL;
}
map = kmap(page);
memcpy(argv + i * PAGE_SIZE, map, PAGE_SIZE);
kunmap(page);
put_page(page);
}
*ctx = bprm;
}
return argv;
}
static void kunmap_argument_stack(struct linux_binprm *bprm, void *addr,
void *ctx)
{
struct page *page;
if (!addr)
return;
if (likely(vma_pages(bprm->vma) == 1)) {
page = (struct page *)ctx;
kunmap(page);
put_page(ctx);
} else {
vfree(addr);
}
}
static char *find_array_next_entry(char *array, unsigned long *offset,
unsigned long end)
{
char *entry;
unsigned long off = *offset;
if (off >= end)
return NULL;
/* Check the entry is null terminated and in bound */
entry = array + off;
while (array[off]) {
if (++off >= end)
return NULL;
}
/* Pass the null byte for the next iteration */
*offset = off + 1;
return entry;
}
struct string_arr_ctx {
struct linux_binprm *bprm;
void *stack;
};
static size_t get_config_limit(size_t *config_ptr)
{
lockdep_assert_held_read(&csm_rwsem_config);
/*
* If execute is not enabled, do not capture arguments.
* The event proto won't be sent anyway.
*/
if (!csm_execute_enabled)
return 0;
return *config_ptr;
}
static bool encode_current_argv(pb_ostream_t *stream, const pb_field_t *field,
void * const *arg)
{
struct string_arr_ctx *ctx = (struct string_arr_ctx *)*arg;
int i;
struct linux_binprm *bprm = ctx->bprm;
unsigned long offset = bprm->p % PAGE_SIZE;
unsigned long end = vma_pages(bprm->vma) * PAGE_SIZE;
char *argv = ctx->stack;
char *entry;
size_t limit, used = 0;
ssize_t ret;
limit = get_config_limit(&csm_execute_config.argv_limit);
if (!limit)
return true;
for (i = 0; i < bprm->argc; i++) {
entry = find_array_next_entry(argv, &offset, end);
if (!entry)
return false;
ret = pb_encode_string_field_limit(stream, field,
(void * const *)&entry,
limit - used);
if (ret < 0)
return false;
used += ret;
if (used >= limit)
break;
}
return true;
}
static bool check_envp_allowlist(char *envp)
{
bool ret = false;
char *strs, *equal;
size_t str_size, equal_pos;
/* If execute is not enabled, skip all. */
if (!csm_execute_enabled)
goto out;
/* No filter, allow all. */
strs = csm_execute_config.envp_allowlist;
if (!strs) {
ret = true;
goto out;
}
/*
* Identify the key=value separation.
* If none exists use the whole string as a key.
*/
equal = strchr(envp, '=');
equal_pos = equal ? (equal - envp) : strlen(envp);
/* Default to skip if no match found. */
ret = false;
do {
str_size = strlen(strs);
/*
* If the filter length align with the key value equal sign,
* it might be a match, check the key value.
*/
if (str_size == equal_pos &&
!strncmp(strs, envp, str_size)) {
ret = true;
goto out;
}
strs += str_size + 1;
} while (*strs != 0);
out:
return ret;
}
static bool encode_current_envp(pb_ostream_t *stream, const pb_field_t *field,
void * const *arg)
{
struct string_arr_ctx *ctx = (struct string_arr_ctx *)*arg;
int i;
struct linux_binprm *bprm = ctx->bprm;
unsigned long offset = bprm->p % PAGE_SIZE;
unsigned long end = vma_pages(bprm->vma) * PAGE_SIZE;
char *argv = ctx->stack;
char *entry;
size_t limit, used = 0;
ssize_t ret;
limit = get_config_limit(&csm_execute_config.envp_limit);
if (!limit)
return true;
/* Skip arguments */
for (i = 0; i < bprm->argc; i++) {
if (!find_array_next_entry(argv, &offset, end))
return false;
}
for (i = 0; i < bprm->envc; i++) {
entry = find_array_next_entry(argv, &offset, end);
if (!entry)
return false;
if (!check_envp_allowlist(entry))
continue;
ret = pb_encode_string_field_limit(stream, field,
(void * const *)&entry,
limit - used);
if (ret < 0)
return false;
used += ret;
if (used >= limit)
break;
}
return true;
}
static bool is_overlayfs_mounted(struct file *file)
{
struct vfsmount *mnt;
struct super_block *mnt_sb;
mnt = file->f_path.mnt;
if (mnt == NULL)
return false;
mnt_sb = mnt->mnt_sb;
if (mnt_sb == NULL || mnt_sb->s_magic != OVERLAYFS_SUPER_MAGIC)
return false;
return true;
}
/*
* Before the process starts, identify a possible container by checking if the
* task is on a pid namespace and the target file is using an overlayfs mounting
* point. This check is valid for COS and GKE but not all existing containers.
*/
static bool is_possible_container(struct task_struct *task,
struct file *file)
{
if (task_active_pid_ns(task) == &init_pid_ns)
return false;
return is_overlayfs_mounted(file);
}
/*
* Generates a random identifier for this boot instance.
* This identifier is generated only when needed to increase the entropy
* available compared to doing it at early boot.
*/
static u32 get_machine_id(void)
{
int machineid, old;
machineid = atomic_read(&machine_rand);
if (unlikely(machineid == 0)) {
machineid = (int)get_random_int();
if (machineid == 0)
machineid = 1;
old = atomic_cmpxchg(&machine_rand, 0, machineid);
/* If someone beat us, use their value. */
if (old != 0)
machineid = old;
}
return (u32)machineid;
}
/*
* Generate a 128-bit unique identifier for the process by appending:
* - A machine identifier unique per boot.
* - The start time of the process in nanoseconds.
* - The tgid for the set of threads in a process.
*/
static int get_process_uuid(struct task_struct *task, char *buffer, size_t size)
{
union process_uuid *id = (union process_uuid *)buffer;
memset(buffer, 0, size);
if (WARN_ON(size < PROCESS_UUID_SIZE))
return -EINVAL;
id->machineid = get_machine_id();
id->start_time = ktime_mono_to_real(task->group_leader->start_time);
id->tgid = task_tgid_nr(task);
return 0;
}
int get_process_uuid_by_pid(pid_t pid_nr, char *buffer, size_t size)
{
int err;
struct task_struct *task = NULL;
rcu_read_lock();
task = find_task_by_pid_ns(pid_nr, &init_pid_ns);
if (!task) {
err = -ENOENT;
goto out;
}
err = get_process_uuid(task, buffer, size);
out:
rcu_read_unlock();
return err;
}
static int get_process_uuid_from_xattr(struct file *file, char *buffer,
size_t size)
{
struct dentry *dentry;
int err;
struct file_provenance prov;
union process_uuid *id = (union process_uuid *)buffer;
memset(buffer, 0, size);
if (WARN_ON(size < PROCESS_UUID_SIZE))
return -EINVAL;
/* The file is part of overlayfs on the upper layer. */
if (!is_overlayfs_mounted(file))
return -ENODATA;
dentry = ovl_dentry_upper(file->f_path.dentry);
if (!dentry)
return -ENODATA;
err = __vfs_getxattr(dentry, dentry->d_inode,
XATTR_SECURITY_CSM, &prov, sizeof(prov));
/* returns -ENODATA if the xattr does not exist. */
if (err < 0)
return err;
if (err != sizeof(prov)) {
pr_err("unexpected size for xattr: %zu -> %d\n",
size, err);
return -ENODATA;
}
id->machineid = get_machine_id();
id->start_time = prov.start_time;
id->tgid = prov.tgid;
return 0;
}
u64 csm_set_contid(struct task_struct *task)
{
u64 cid;
struct pid_namespace *ns;
ns = task_active_pid_ns(task);
if (WARN_ON(!task->audit) || WARN_ON(!ns))
return AUDIT_CID_UNSET;
cid = atomic_inc_return(&contid);
task->audit->contid = cid;
/*
* If the namespace container-id is not set, use the one assigned
* to the first process created.
*/
cmpxchg(&ns->cid, 0, cid);
return cid;
}
u64 csm_get_ns_contid(struct pid_namespace *ns)
{
if (!ns || !ns->cid)
return AUDIT_CID_UNSET;
return ns->cid;
}
union ip_data {
struct in_addr ip4;
struct in6_addr ip6;
};
struct file_data {
void *allocated;
union ip_data local;
union ip_data remote;
char modified_uuid[PROCESS_UUID_SIZE];
};
static void free_file_data(struct file_data *fdata)
{
free_page((unsigned long)fdata->allocated);
fdata->allocated = NULL;
}
static void fill_socket_description(struct sockaddr_storage *saddr,
union ip_data *idata,
schema_SocketIp *schema_socketip)
{
struct sockaddr_in *sin4 = (struct sockaddr_in *)saddr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)saddr;
schema_socketip->family = saddr->ss_family;
switch (saddr->ss_family) {
case AF_INET:
schema_socketip->port = ntohs(sin4->sin_port);
idata->ip4 = sin4->sin_addr;
schema_socketip->ip.funcs.encode = pb_encode_ip4;
schema_socketip->ip.arg = &idata->ip4;
break;
case AF_INET6:
schema_socketip->port = ntohs(sin6->sin6_port);
idata->ip6 = sin6->sin6_addr;
schema_socketip->ip.funcs.encode = pb_encode_ip6;
schema_socketip->ip.arg = &idata->ip6;
break;
}
}
static int fill_file_overlayfs(struct file *file, schema_File *schema_file,
struct file_data *fdata)
{
struct dentry *dentry;
int err;
schema_Overlay *overlayfs;
/* If not an overlayfs superblock, done. */
if (!is_overlayfs_mounted(file))
return 0;
dentry = file->f_path.dentry;
schema_file->which_filesystem = schema_File_overlayfs_tag;
overlayfs = &schema_file->filesystem.overlayfs;
overlayfs->lower_layer = ovl_dentry_lower(dentry);
overlayfs->upper_layer = ovl_dentry_upper(dentry);
err = get_process_uuid_from_xattr(file, fdata->modified_uuid,
sizeof(fdata->modified_uuid));
/* If there is no xattr, just skip the modified_uuid field. */
if (err == -ENODATA)
return 0;
if (err < 0)
return err;
overlayfs->modified_uuid.funcs.encode = pb_encode_uuid_field;
overlayfs->modified_uuid.arg = fdata->modified_uuid;
return 0;
}
static int fill_file_description(struct file *file, schema_File *schema_file,
struct file_data *fdata)
{
char *buf;
int err;
u32 mode;
char *path;
struct socket *socket;
schema_Socket *socketfs;
struct sockaddr_storage saddr;
memset(fdata, 0, sizeof(*fdata));
if (file == NULL)
return 0;
schema_file->ino = file_inode(file)->i_ino;
mode = file_inode(file)->i_mode;
/* For pipes, no need to resolve the path. */
if (S_ISFIFO(mode))
return 0;
if (S_ISSOCK(mode)) {
socket = (struct socket *)file->private_data;
socketfs = &schema_file->filesystem.socket;
/* Local socket */
err = kernel_getsockname(socket, (struct sockaddr *)&saddr);
if (err >= 0) {
socketfs->has_local = true;
fill_socket_description(&saddr, &fdata->local,
&socketfs->local);
}
/* Remote socket, might not be connected. */
err = kernel_getpeername(socket, (struct sockaddr *)&saddr);
if (err >= 0) {
socketfs->has_remote = true;
fill_socket_description(&saddr, &fdata->remote,
&socketfs->remote);
}
schema_file->which_filesystem = schema_File_socket_tag;
return 0;
}
/*
* From this point, we care about all the other types of files as their
* path provides interesting insight.
*/
buf = (char *)__get_free_page(GFP_KERNEL);
if (buf == NULL)
return -ENOMEM;
fdata->allocated = buf;
path = d_path(&file->f_path, buf, PAGE_SIZE);
if (IS_ERR(path)) {
free_file_data(fdata);
return PTR_ERR(path);
}
schema_file->fullpath.funcs.encode = pb_encode_string_field;
schema_file->fullpath.arg = path; /* buf is freed in free_file_data. */
err = fill_file_overlayfs(file, schema_file, fdata);
if (err) {
free_file_data(fdata);
return err;
}
return 0;
}
static int fill_stream_description(schema_Descriptor *desc, int fd,
struct file_data *fdata)
{
struct fd sfd;
struct file *file;
int err = 0;
sfd = fdget(fd);
file = sfd.file;
if (file == NULL) {
memset(fdata, 0, sizeof(*fdata));
goto end;
}
desc->mode = file_inode(file)->i_mode;
desc->has_file = true;
err = fill_file_description(file, &desc->file, fdata);
end:
fdput(sfd);
return err;
}
static int populate_proc_uuid_common(schema_Process *proc, char *uuid,
size_t uuid_size, char *parent_uuid,
size_t parent_uuid_size,
struct task_struct *task)
{
int err;
struct task_struct *parent;
/* Generate unique identifier for the process and its parent */
err = get_process_uuid(task, uuid, uuid_size);
if (err)
return err;
proc->uuid.funcs.encode = pb_encode_uuid_field;
proc->uuid.arg = uuid;
rcu_read_lock();
if (!pid_alive(task))
goto out;
/*
* I don't think this needs to be task_rcu_dereference because
* real_parent is only supposed to be accessed using RCU.
*/
parent = rcu_dereference(task->real_parent);
if (parent) {
err = get_process_uuid(parent, parent_uuid, parent_uuid_size);
if (!err) {
proc->parent_uuid.funcs.encode = pb_encode_uuid_field;
proc->parent_uuid.arg = parent_uuid;
}
}
out:
rcu_read_unlock();
return err;
}
/* Populate the fields that we always want to set in Process messages. */
static int populate_proc_common(schema_Process *proc, char *uuid,
size_t uuid_size, char *parent_uuid,
size_t parent_uuid_size,
struct task_struct *task)
{
u64 cid;
struct pid_namespace *ns = task_active_pid_ns(task);
/* Container identifier for the current namespace. */
proc->container_id = csm_get_ns_contid(ns);
/*
* If the process container-id is different, the process tree is part of
* a different session within the namespace (kubectl/docker exec,
* liveness probe or others).
*/
cid = audit_get_contid(task);
if (proc->container_id != cid)
proc->exec_session_id = cid;
/* Add information about pid in different namespaces */
proc->pid = task_tgid_nr(task);
proc->parent_pid = task_ppid_nr(task);
proc->container_pid = task_tgid_nr_ns(task, ns);
proc->container_parent_pid = task_ppid_nr_ns(task, ns);
return populate_proc_uuid_common(proc, uuid, uuid_size, parent_uuid,
parent_uuid_size, task);
}
int csm_bprm_check_security(struct linux_binprm *bprm)
{
char uuid[PROCESS_UUID_SIZE];
char parent_uuid[PROCESS_UUID_SIZE];
int err;
schema_Event event = {};
schema_Process *proc;
struct string_arr_ctx argv_ctx;
void *stack = NULL, *ctx = NULL;
u64 cid;
struct file_data path_data = {};
struct file_data stdin_data = {};
struct file_data stdout_data = {};
struct file_data stderr_data = {};
/*
* Always create a container-id for containerized processes.
* If the LSM is enabled later, we can track existing containers.
*/
cid = audit_get_contid(current);
if (cid == AUDIT_CID_UNSET) {
if (!is_possible_container(current, bprm->file))
return 0;
cid = csm_set_contid(current);
if (cid == AUDIT_CID_UNSET)
return 0;
}
if (!csm_execute_enabled)
return 0;
/* The interpreter will call us again with more context. */
if (bprm->buf[0] == '#' && bprm->buf[1] == '!')
return 0;
proc = &event.event.execute.proc;
err = populate_proc_common(proc, uuid, sizeof(uuid), parent_uuid,
sizeof(parent_uuid), current);
if (err)
goto out_free_buf;
proc->creation_timestamp = ktime_get_real_ns();
/* Provide information about the launched binary. */
proc->has_binary = true;
err = fill_file_description(bprm->file, &proc->binary, &path_data);
if (err)
goto out_free_buf;
/* Information about streams */
proc->has_streams = true;
proc->streams.has_stdin = true;
err = fill_stream_description(&proc->streams.stdin, STDIN_FILENO,
&stdin_data);
if (err)
goto out_free_buf;
proc->streams.has_stdout = true;
err = fill_stream_description(&proc->streams.stdout, STDOUT_FILENO,
&stdout_data);
if (err)
goto out_free_buf;
proc->streams.has_stderr = true;
err = fill_stream_description(&proc->streams.stderr, STDERR_FILENO,
&stderr_data);
if (err)
goto out_free_buf;
stack = kmap_argument_stack(bprm, &ctx);
if (!stack) {
err = -EFAULT;
goto out_free_buf;
}
/* Capture process argument */
argv_ctx.bprm = bprm;
argv_ctx.stack = stack;
proc->args.argv.funcs.encode = encode_current_argv;
proc->args.argv.arg = &argv_ctx;
/* Capture process environment variables */
proc->args.envp.funcs.encode = encode_current_envp;
proc->args.envp.arg = &argv_ctx;
event.which_event = schema_Event_execute_tag;
event.event.execute.has_proc = true;
proc->has_args = true;
/*
* Configurations options are checked when computing the serialized
* protobufs.
*/
down_read(&csm_rwsem_config);
err = csm_sendeventproto(schema_Event_fields, &event);
up_read(&csm_rwsem_config);
if (err)
pr_err("csm_sendeventproto returned %d on execve\n", err);
err = 0;
out_free_buf:
kunmap_argument_stack(bprm, stack, ctx);
free_file_data(&path_data);
free_file_data(&stdin_data);
free_file_data(&stdout_data);
free_file_data(&stderr_data);
/*
* On failure, enforce it only if the execute config is enabled.
* If the collector was disabled, prefer to succeed to not impact the
* system.
*/
if (unlikely(err < 0 && !csm_execute_enabled))
err = 0;
return err;
}
/* Create a clone event when a new task leader is created. */
void csm_task_post_alloc(struct task_struct *task)
{
int err;
char uuid[PROCESS_UUID_SIZE];
char parent_uuid[PROCESS_UUID_SIZE];
schema_Event event = {};
schema_Process *proc;
if (!csm_execute_enabled ||
audit_get_contid(task) == AUDIT_CID_UNSET ||
!thread_group_leader(task))
return;
proc = &event.event.clone.proc;
err = populate_proc_uuid_common(proc, uuid, sizeof(uuid), parent_uuid,
sizeof(parent_uuid), task);
event.which_event = schema_Event_clone_tag;
event.event.clone.has_proc = true;
err = csm_sendeventproto(schema_Event_fields, &event);
if (err)
pr_err("csm_sendeventproto returned %d on exit\n", err);
}
/*
* This LSM hook callback doesn't exist upstream and is called only when the
* last thread of a thread group exit.
*/
void csm_task_exit(struct task_struct *task)
{
int err;
schema_Event event = {};
schema_ExitEvent *exit;
char uuid[PROCESS_UUID_SIZE];
if (!csm_execute_enabled ||
audit_get_contid(task) == AUDIT_CID_UNSET)
return;
exit = &event.event.exit;
/* Fetch the unique identifier for this process */
err = get_process_uuid(task, uuid, sizeof(uuid));
if (err) {
pr_err("failed to get process uuid on exit\n");
return;
}
exit->process_uuid.funcs.encode = pb_encode_uuid_field;
exit->process_uuid.arg = uuid;
event.which_event = schema_Event_exit_tag;
err = csm_sendeventproto(schema_Event_fields, &event);
if (err)
pr_err("csm_sendeventproto returned %d on exit\n", err);
}
int csm_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
unsigned long prot)
{
char uuid[PROCESS_UUID_SIZE];
char parent_uuid[PROCESS_UUID_SIZE];
int err;
schema_Event event = {};
schema_MemoryExecEvent *memexec;
u64 cid;
struct file_data path_data = {};
cid = audit_get_contid(current);
if (!csm_memexec_enabled ||
!(prot & PROT_EXEC) ||
vma->vm_file == NULL ||
cid == AUDIT_CID_UNSET)
return 0;
memexec = &event.event.memexec;
err = fill_file_description(vma->vm_file, &memexec->mapped_file,
&path_data);
if (err)
return err;
err = populate_proc_common(&memexec->proc, uuid, sizeof(uuid),
parent_uuid, sizeof(parent_uuid), current);
if (err)
goto out;
memexec->prot_exec_timestamp = ktime_get_real_ns();
memexec->new_flags = prot;
memexec->req_flags = reqprot;
memexec->old_vm_flags = vma->vm_flags;
memexec->action = schema_MemoryExecEvent_Action_MPROTECT;
memexec->start_addr = vma->vm_start;
memexec->end_addr = vma->vm_end;
event.which_event = schema_Event_memexec_tag;
event.event.memexec.has_proc = true;
event.event.memexec.has_mapped_file = true;
err = csm_sendeventproto(schema_Event_fields, &event);
if (err)
pr_err("csm_sendeventproto returned %d on mprotect\n", err);
err = 0;
if (unlikely(err < 0 && !csm_memexec_enabled))
err = 0;
out:
free_file_data(&path_data);
return err;
}
int csm_mmap_file(struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
{
char uuid[PROCESS_UUID_SIZE];
char parent_uuid[PROCESS_UUID_SIZE];
int err;
schema_Event event = {};
schema_MemoryExecEvent *memexec;
struct file *exe_file;
u64 cid;
struct file_data path_data = {};
cid = audit_get_contid(current);
if (!csm_memexec_enabled ||
!(prot & PROT_EXEC) ||
file == NULL ||
cid == AUDIT_CID_UNSET)
return 0;
memexec = &event.event.memexec;
err = fill_file_description(file, &memexec->mapped_file,
&path_data);
if (err)
return err;
err = populate_proc_common(&memexec->proc, uuid, sizeof(uuid),
parent_uuid, sizeof(parent_uuid), current);
if (err)
goto out;
/* get_mm_exe_file does its own locking on mm_sem. */
exe_file = get_mm_exe_file(current->mm);
if (exe_file) {
if (path_equal(&file->f_path, &exe_file->f_path))
memexec->is_initial_mmap = 1;
fput(exe_file);
}
memexec->prot_exec_timestamp = ktime_get_real_ns();
memexec->new_flags = prot;
memexec->req_flags = reqprot;
memexec->mmap_flags = flags;
memexec->action = schema_MemoryExecEvent_Action_MMAP_FILE;
event.which_event = schema_Event_memexec_tag;
event.event.memexec.has_proc = true;
event.event.memexec.has_mapped_file = true;
err = csm_sendeventproto(schema_Event_fields, &event);
if (err)
pr_err("csm_sendeventproto returned %d on mmap_file\n", err);
err = 0;
if (unlikely(err < 0 && !csm_memexec_enabled))
err = 0;
out:
free_file_data(&path_data);
return err;
}
void csm_file_pre_free(struct file *file)
{
struct dentry *dentry;
int err;
struct file_provenance prov;
/* The file was opened to be modified and the LSM is enabled */
if (!(file->f_mode & FMODE_WRITE) ||
!csm_enabled)
return;
/* The current process is containerized. */
if (audit_get_contid(current) == AUDIT_CID_UNSET)
return;
/* The file is part of overlayfs on the upper layer. */
if (!is_overlayfs_mounted(file))
return;
dentry = ovl_dentry_upper(file->f_path.dentry);
if (!dentry)
return;
err = __vfs_getxattr(dentry, dentry->d_inode, XATTR_SECURITY_CSM,
NULL, 0);
if (err != -ENODATA) {
if (err < 0)
pr_err("failed to get security attribute: %d\n", err);
return;
}
prov.tgid = task_tgid_nr(current);
prov.start_time = ktime_mono_to_real(current->group_leader->start_time);
err = __vfs_setxattr(dentry, dentry->d_inode, XATTR_SECURITY_CSM, &prov,
sizeof(prov), 0);
if (err < 0)
pr_err("failed to set security attribute: %d\n", err);
}
/*
* Based off of fs/proc/base.c:next_tgid
*
* next_thread_group_leader returns the task_struct of the next task with a pid
* greater than or equal to tgid. The reference count is increased so that
* rcu_read_unlock may be called, and preemption reenabled.
*/
static struct task_struct *next_thread_group_leader(pid_t *tgid)
{
struct pid *pid;
struct task_struct *task;
cond_resched();
rcu_read_lock();
retry:
task = NULL;
pid = find_ge_pid(*tgid, &init_pid_ns);
if (pid) {
*tgid = pid_nr_ns(pid, &init_pid_ns);
task = pid_task(pid, PIDTYPE_PID);
if (!task || !has_group_leader_pid(task) ||
audit_get_contid(task) == AUDIT_CID_UNSET) {
(*tgid) += 1;
goto retry;
}
/*
* Increment the reference count on the task before leaving
* the RCU grace period.
*/
get_task_struct(task);
(*tgid) += 1;
}
rcu_read_unlock();
return task;
}
void delayed_enumerate_processes(struct work_struct *work)
{
pid_t tgid = 0;
struct task_struct *task;
struct csm_enumerate_processes_work_data *wd = container_of(
work, struct csm_enumerate_processes_work_data, work);
int wd_enumeration_count = wd->enumeration_count;
kfree(wd);
wd = NULL;
work = NULL;
/*
* Try for only a single enumeration routine at a time, as long as the
* execute collector is enabled.
*/
while ((wd_enumeration_count == atomic_read(&enumeration_count)) &&
READ_ONCE(csm_execute_enabled) &&
(task = next_thread_group_leader(&tgid))) {
int err;
char uuid[PROCESS_UUID_SIZE];
char parent_uuid[PROCESS_UUID_SIZE];
struct file *exe_file = NULL;
struct file_data path_data = {};
schema_Event event = {};
schema_Process *proc = &event.event.enumproc.proc;
exe_file = get_task_exe_file(task);
if (!exe_file) {
pr_err("failed to get enumerated process executable, pid: %u\n",
task_pid_nr(task));
goto next;
}
proc->has_binary = true;
err = fill_file_description(exe_file, &proc->binary,
&path_data);
if (err) {
pr_err("failed to fill enumerated process %u executable description: %d\n",
task_pid_nr(task), err);
goto next;
}
err = populate_proc_common(proc, uuid, sizeof(uuid),
parent_uuid, sizeof(parent_uuid),
task);
if (err) {
pr_err("failed to set pid %u common fields: %d\n",
task_pid_nr(task), err);
goto next;
}
if (task->flags & PF_EXITING)
goto next;
event.which_event = schema_Event_enumproc_tag;
event.event.execute.has_proc = true;
err = csm_sendeventproto(schema_Event_fields,
&event);
if (err) {
pr_err("failed to send pid %u enumerated process: %d\n",
task_pid_nr(task), err);
goto next;
}
next:
free_file_data(&path_data);
if (exe_file)
fput(exe_file);
put_task_struct(task);
}
}
void csm_enumerate_processes(unsigned long const config_version)
{
struct csm_enumerate_processes_work_data *wd;
wd = kmalloc(sizeof(*wd), GFP_KERNEL);
if (!wd)
return;
INIT_WORK(&wd->work, delayed_enumerate_processes);
wd->enumeration_count = atomic_add_return(1, &enumeration_count);
schedule_work(&wd->work);
}