blob: 15fcd3f5596f2ade9c1a16b8c543f7b9755c6bac [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
description "Start the VM concierge service"
author ""
# Start the VM concierge service, which is responsible for managing all the
# VMs running in the system.
start on start-user-session
stop on stopping ui
expect daemon
tmpfiles /usr/lib/tmpfiles.d/arcvm.conf /usr/lib/tmpfiles.d/vm_tools.conf
# Camera libraries are mounted here at runtime.
env CAMERA_LIBFS_DIR=/usr/share/cros-camera/libfs
# Force gRPC to use poll instead of epoll.
# TODO( Remove once epoll1 poller is removed or fixed.
# Force gRPC to use the native resolver instead of ares.
# TODO( Remove once gRPC doesn't use ares resolver for vsock.
# TODO: Remove once the Perfetto SDK is updated beyond v9.0 virglrenderer and
# other client code -- the new SDK has this path builtin.
env PERFETTO_PRODUCER_SOCK_NAME=/run/perfetto/traced-producer.sock
# Give any running VMs enough time to attempt an orderly shutdown.
kill timeout 30
# The virtio-fs device opens a lot of fds and so the whole process tree needs a
# much higher limit.
limit nofile 1024 262144
# Give rtprio to vms to set threads to real-time priority.
# 10 is for audio client threads in AC'97 device.
limit rtprio 10 10
# Increase memlock for io_uring devices.
# This hard limit must be larger than:
# |# of VM instances| * |# of uring devices| * |memory size for one device|.
limit memlock 64 1024
# The directory for the disk image for arcvm adb sync to reside on.
# This directory will only be used in dev mode.
env ARCVM_ADB_SYNC_DIR=/mnt/stateful_partition/unencrypted/cache/arcvm_adb_sync
pre-start script
# If the pre-start script fails, by default nothing gets written to
# the logs. Make sure we can find out what went wrong by
# re-directing stderr to a temp file and sending it to the syslog if
# we encounter an error.
exec 2>"${tmp_log}"
dump_log() {
if [ $? != 0 ]; then
set +x
# Split up lines before calling logger to avoid running into line limits.
xargs -n 1 -d"\n" logger --tag "${UPSTART_JOB}" < "${tmp_log}"
trap dump_log EXIT
set -x
# Make sure the vsock module is loaded.
modprobe -q vhost-vsock
# Make /run/arcvm a shared mountpoint.
mount --no-canonicalize --bind /run/arcvm /run/arcvm
mount --no-canonicalize --make-shared /run/arcvm
# Create persistent mount namespace at /run/namespaces/mnt_concierge.
unshare --mount=/run/namespaces/mnt_concierge --propagation unchanged \
-- /bin/true
# Remount the bind-mount as 'noexec'.
mount --no-canonicalize /run/namespaces/mnt_concierge -o remount,noexec
# Prevent mounts at /run/arcvm from propagating out of the namespace.
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- bash -c "mount --no-canonicalize --make-slave /run/arcvm && \
mount --no-canonicalize --make-shared /run/arcvm"
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- mount --no-canonicalize --bind \
/mnt/stateful_partition/unencrypted/apkcache /run/arcvm/apkcache
# The runtime directory for removable media and MyFiles.
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- mount --no-canonicalize -o \
rw,nodev,noexec,nosuid,mode=0755,uid=655360,gid=656360 -t tmpfs \
tmpfs /run/arcvm/media
if crossystem "cros_debug?1"; then
touch /run/vm/dev_mode
if [! -d "${ARCVM_ADB_SYNC_DIR}"]; then
mkdir -p "${ARCVM_ADB_SYNC_DIR}"
chown crosvm:crosvm "${ARCVM_ADB_SYNC_DIR}"
rm -f /run/vm/dev_mode
end script
set --
if [ -c "/dev/mali0" ]; then
set -- "$@" -b /dev/mali0,,1
if [ -c "/dev/pvr_sync" ]; then
set -- "$@" -b /dev/pvr_sync,,1
# ( remove check when udmabuf is built for all target kernels.
if [ -c "/dev/udmabuf" ]; then
set -- "$@" -b /dev/udmabuf,,1
if [ -f /run/vm/dev_mode ]; then
if [ -d /opt/google/vms/android ]; then
set -- "$@" -b /opt/google/vms/android,,1
if [ -d "${ARCVM_ADB_SYNC_DIR}" ]; then
set -- "$@" -k \
set -- "$@" -b "${ARCVM_ADB_SYNC_DIR}","${ARCVM_ADB_SYNC_DIR}",1
if [ -d /usr/local/vms] || [ -d /usr/local/bin ]; then
set -- "$@" -k 'local,/usr/local,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC'
if [ -d /usr/local/vms ]; then
set -- "$@" -b /usr/local/vms,,1
if [ -d /usr/local/bin ]; then
set -- "$@" -b /usr/local/bin
# Bind mount any Virtio Vhost User proxy devices into concierge. This is
# relevant for `crosvm_siblings` builds.
# 1. Finds all PCI device ids like /sys/devices/pci0000:00/0000:00:01/device.
# 2. If the device id matches "0x107d" then this is a VVU device id, the next
# step is to get the corresponding vendor id by getting the "vendor" file in
# the same directory.
# 3. If the vendor id matches "0x1af4" then this is a VVU device.
# 4. Append all the VVU devices (if any) found on this device to the list of
# VVU devices to be used in the main script.
# Since this is running in the script stanza, we are not allowed to fork or
# spawn new processes so we need to use only builtin functions instead.
for f in /sys/devices/pci0000:00/*/ ; do
if [ -f "${f}/device" ]; then
read -r device < "${f}/device"
read -r vendor < "${f}/vendor"
if [ "$device" = "0x107d" ] && [ "$vendor" = "0x1af4" ]; then
# If we have VVU devices, for each of them create a string like this
# "-b /sys/devices/pci0000:00/0000:00:01/,,1".
for f in ${VVU_DEVICES}; do
# TODO(b/196186396): Use udev rules instead of "chown".
chown -R crosvm:crosvm "${f}" # croslint: disable
set -- "$@" -b "${f},,1" # croslint: disable
# If VVU devices are found then this is board needs sibling VM support. This
# means that crosvm needs access to the following files to start and manage
# sibling VM device backend processes.
# TODO(b/196186396): Use udev rules instead of "chown".
if [ -n "${VVU_DEVICES}" ]; then
chown crosvm:crosvm \
/sys/bus/pci/drivers/virtio-pci/unbind # croslint: disable
chown crosvm:crosvm \
/sys/bus/pci/drivers/virtio-pci/bind # croslint: disable
chown crosvm:crosvm /sys/bus/pci/drivers/vfio-pci/bind # croslint: disable
chown crosvm:crosvm /sys/bus/pci/drivers/vfio-pci/unbind # croslint: disable
chown -R crosvm:crosvm /dev/vfio/ # croslint: disable
set -- "$@" -b /sys/bus/pci/drivers/virtio-pci,,1
set -- "$@" -b /sys/bus/pci/drivers/vfio-pci,,1
set -- "$@" -b /dev/vfio,/dev/vfio,1
# Raise the memlock limit so vvu devices can pin pages via VFIO.
set -- "$@" -R RLIMIT_MEMLOCK,unlimited,unlimited
# Concierge needs to be able to configure user namespaces due to the
# way ARC VM shares files with the host. This requires CAP_SETUID
# and CAP_SETGID, but we can restrict the impact of this by running
# concierge in an outer user namespace which only maps the uids
# needed for the inner namespaces (except that concierge itself
# needs access to a number of groups). We also need to map a number
# of groups for concierge itself. The mapping does not renumber any
# uids or gids to make the inner mappings easier to understand,
# except that we are forced to map something to 0, and we use
# crosvm-root for this purpose.
# uids:
# - crosvm (299)
# - crosvm-root (20182)
# - arc-camera (603, but we have to map 600-649 for unclear reasons)
# - Android (2,000,000 starting from 655360)
# gids:
# - all of the above uids except crosvm-dbus, plus
# - video (27)
# - daemon-store (400)
# - tun (413)
# - virtaccess (418)
# - cras (600)
# - wayland (601)
# - android-reserved-disk (20119)
# - pluginvm (20128)
# - cups-proxy (20136)
# - traced-producer (20162)
# /proc is also remounted read-write because crosvm needs to be able to set the
# uid_map and gid_map for its child processes and that needs a writable /proc.
# The following mount flags are used below:
# MS_BIND = 0x1000
# MS_REC = 0x4000
# -Kslave is applied to propagate imageloader mounts into concierge's mount
# namespace.
exec nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- minijail0 -nlvd -i -t --uts \
-Kslave \
-P /mnt/empty \
-b /,/ \
-k 'proc,/proc,proc,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /sys,/sys \
-k 'tmpfs,/sys/fs/cgroup,tmpfs,MS_NODEV|MS_NOEXEC|MS_NOSUID,mode=755,size=10M' \
-b /sys/fs/cgroup/cpu,,1 \
-b /dev/chromeos-low-mem \
-b /dev/log,/dev/log,1 \
-b /dev/kvm,/dev/kvm,1 \
-b /dev/net,/dev/net,1 \
-b /dev/vhost-vsock,/dev/vhost-vsock,1 \
-b /dev/dri,/dev/dri,1 \
"$@" \
-k 'run,/run,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /run/camera/,,1 \
-b /run/chrome,/run/chrome,1 \
-b /run/wayland/concierge,/run/wayland/concierge,1 \
-b /run/cras/vms,/run/cras,1 \
-b /run/cups_proxy,,1 \
-b /run/dbus,/run/dbus,1 \
-b /run/perfetto,/run/perfetto,1 \
-b /run/pvm,,1 \
-b /run/vm,/run/vm,1 \
-b /run/vm_cicerone/client,/run/vm_cicerone/client,1 \
-k /run/imageloader,/run/imageloader,none,0x5000 \
-k 'var,/var,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /var/lib/timezone \
-k '/run/daemon-store/crosvm,/run/daemon-store/crosvm,none,MS_BIND|MS_REC' \
-k '/run/daemon-store/pvm,/run/daemon-store/pvm,none,MS_BIND|MS_REC' \
-k '/run/arcvm,/run/arcvm,none,MS_BIND|MS_REC' \
-- /sbin/minijail0 -U -pv -I -i \
-Kslave \
-c 'cap_setuid,cap_setgid+eip' --ambient \
-k 'proc,/proc,proc,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-m'0 20182 1,299 299 1,600 600 50,655360 655360 2000000' \
-M'0 20182 1,27 27 1,299 299 1,400 400 1,413 413 1,418 418 1,600 600 50,20119 20119 1,20128 20128 1,20136 20136 1,20162 20162 1,655360 655360 2000000' \
end script
post-stop script
if mountpoint -q /run/namespaces/mnt_concierge; then
umount /run/namespaces/mnt_concierge
if mountpoint -q /run/arcvm; then
umount /run/arcvm
end script