blob: ef2390f709e565a8c5de37173e244aa6a5eedcd2 [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
description "Start the VM concierge service"
author "chromium-os-dev@chromium.org"
# Start the VM concierge service, which is responsible for managing all the
# VMs running in the system.
start on start-user-session or starting arcvm-pre-login-services
stop on stopping ui
respawn
expect fork
# Force gRPC to use poll instead of epoll.
# TODO(crbug.com/987390): Remove once epoll1 poller is removed or fixed.
env GRPC_POLL_STRATEGY=poll
# Force gRPC to use the native resolver instead of ares.
# TODO(crbug.com/1044665): Remove once gRPC doesn't use ares resolver for vsock.
env GRPC_DNS_RESOLVER=native
# TODO: Remove once the Perfetto SDK is updated beyond v9.0 virglrenderer and
# other client code -- the new SDK has this path builtin.
env PERFETTO_PRODUCER_SOCK_NAME=/run/perfetto/traced-producer.sock
# Give any running VMs enough time to attempt an orderly shutdown.
kill timeout 30
# The virtio-fs device opens a lot of fds and so the whole process tree needs a
# much higher limit.
limit nofile 1024 262144
# Give rtprio to vms to set threads to real-time priority.
# 10 is for audio client threads in AC'97 device.
limit rtprio 10 10
# Increase memlock for io_uring devices.
# This hard limit must be larger than:
# |# of VM instances| * |# of uring devices| * |memory size for one device|.
limit memlock 64 1024
pre-start script
# Make sure the vsock module is loaded.
modprobe -q vhost-vsock
# Create the runtime directory.
mkdir -p /run/vm
chown crosvm:crosvm /run/vm
# Create the runtime directory for ARCVM.
mkdir -p /run/arcvm
chown crosvm:crosvm /run/arcvm
# Make /run/arcvm a shared mountpoint.
mount --bind /run/arcvm /run/arcvm
mount --make-shared /run/arcvm
# Create persistent mount namespace at /run/namespaces/mnt_concierge.
touch /run/namespaces/mnt_concierge
unshare --mount=/run/namespaces/mnt_concierge --propagation unchanged \
-- /bin/true
# Prevent mounts at /run/arcvm from propagating out of the namespace.
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- bash -c "mount --make-slave /run/arcvm && mount --make-shared /run/arcvm"
# Create the directory for mojo proxy.
mkdir -p /run/arcvm/mojo
chmod 770 /run/arcvm/mojo
chown crosvm:crosvm /run/arcvm/mojo
# Create the directory for android-data bind mount.
# gid 655360 maps to root user on Android.
mkdir -p /run/arcvm/android-data/mount
chmod 770 /run/arcvm/android-data
chown crosvm:655360 /run/arcvm/android-data
# Create the directory for user home bind mount.
mkdir -p /run/arcvm/userhome
# Create the runtime directory for plugin VMs.
mkdir -p /run/pvm
chmod 770 /run/pvm
chown pluginvm:crosvm /run/pvm
# Create the cicerone runtime directory, we need to do this here because it
# has to be mounted into the namespace for concierge.
mkdir -p /run/vm_cicerone
chown vm_cicerone:vm_cicerone /run/vm_cicerone
# Create the directory for UNIX socket communication with plugin VMs.
mkdir -p /run/vm_cicerone/client
chmod 770 /run/vm_cicerone/client
chown vm_cicerone:crosvm /run/vm_cicerone/client
# Create the directory mounted as /var/run/arc/tesharness.
# ugid 656360:656360 maps to system:system on the Android side.
mkdir -p /run/arcvm/testharness
chmod 770 /run/arcvm/testharness
chown 656360:656360 /run/arcvm/testharness
# Create the directory for various services that we need to mount in
# concierge's namespace. Individual service scripts will adjust ownership
# and permissions as needed.
mkdir -p /run/camera /run/cups_proxy /run/perfetto
# Create the runtime directory for removable media and MyFiles. Set its ugid
# to 655360:656360, which corresponds to root:system on the Android side.
mkdir -p /run/arcvm/media
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- mount -o rw,nodev,noexec,nosuid,mode=0755,uid=655360,gid=656360 \
-t tmpfs tmpfs /run/arcvm/media
if crossystem "cros_debug?1"; then
touch /run/vm/dev_mode
else
rm -f /run/vm/dev_mode
fi
end script
# Allow the following capabilities:
#
# CAP_SETGID for allowing mapping additional gids in user namespaces
# of jailed children. It will be added to the ambient set
# by concierge
#
# /proc is also remounted read-write because crosvm needs to be able to set the
# uid_map and gid_map for its child processes and that needs a writable /proc.
#
# The following mount flags are used below:
# MS_BIND = 0x1000
# MS_REC = 0x4000
#
# -Kslave is applied to propagate imageloader mounts into concierge's mount
# namespace.
script
gpu_args=""
set --
if [ -c "/dev/mali0" ]; then
set -- "$@" -b /dev/mali0,,1
fi
if [ -c "/dev/pvr_sync" ]; then
set -- "$@" -b /dev/pvr_sync,,1
fi
# (crbug.com/892806): remove check when udmabuf is built for all target kernels.
if [ -c "/dev/udmabuf" ]; then
set -- "$@" -b /dev/udmabuf,,1
fi
if [ -f /run/vm/dev_mode ]; then
if [ -d /opt/google/vms/android ]; then
set -- "$@" -b /opt/google/vms/android,,1
fi
if [ -d /usr/local/vms ]; then
set -- "$@" -k 'local,/usr/local,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC'
set -- "$@" -b /usr/local/vms,,1
fi
fi
exec nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- minijail0 -nplrvd -t -i -I --uts \
-u crosvm -g crosvm -G \
-c 'cap_setuid,cap_setgid+eip' \
--ambient \
-Kslave \
-P /mnt/empty \
-b /,/ \
-k 'proc,/proc,proc,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /sys,/sys \
-k 'tmpfs,/sys/fs/cgroup,tmpfs,MS_NODEV|MS_NOEXEC|MS_NOSUID,mode=755,size=10M' \
-b /sys/fs/cgroup/cpu,,1 \
-b /dev/chromeos-low-mem \
-b /dev/log,/dev/log,1 \
-b /dev/kvm,/dev/kvm,1 \
-b /dev/net,/dev/net,1 \
-b /dev/vhost-vsock,/dev/vhost-vsock,1 \
-b /dev/dri,/dev/dri,1 \
"$@" \
-k 'run,/run,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /run/camera/,,1 \
-b /run/chrome,/run/chrome,1 \
-b /run/cras/vms,/run/cras,1 \
-b /run/cups_proxy,,1 \
-b /run/dbus,/run/dbus,1 \
-b /run/perfetto,/run/perfetto,1 \
-b /run/pvm,,1 \
-b /run/vm,/run/vm,1 \
-b /run/vm_cicerone/client,/run/vm_cicerone/client,1 \
-k /run/imageloader,/run/imageloader,none,0x5000 \
-k 'var,/var,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /var/lib/timezone \
-k '/run/daemon-store/crosvm,/run/daemon-store/crosvm,none,MS_BIND|MS_REC' \
-k '/run/daemon-store/pvm,/run/daemon-store/pvm,none,MS_BIND|MS_REC' \
-k '/run/arcvm,/run/arcvm,none,MS_BIND|MS_REC' \
-- /usr/bin/vm_concierge
end script
post-stop script
if mountpoint -q /run/namespaces/mnt_concierge; then
umount /run/namespaces/mnt_concierge
fi
if mountpoint -q /run/arcvm; then
umount /run/arcvm
fi
end script