blob: 3a6cb856b6203ebd0094d8e4ca73ad238b7ff91d [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
description "Start the VM concierge service"
author "chromium-os-dev@chromium.org"
# Start the VM concierge service, which is responsible for managing all the
# VMs running in the system.
start on start-user-session
stop on stopping ui
respawn
expect daemon
tmpfiles /usr/lib/tmpfiles.d/arcvm.conf /usr/lib/tmpfiles.d/vm_tools.conf
# Force gRPC to use poll instead of epoll.
# TODO(crbug.com/987390): Remove once epoll1 poller is removed or fixed.
env GRPC_POLL_STRATEGY=poll
# Force gRPC to use the native resolver instead of ares.
# TODO(crbug.com/1044665): Remove once gRPC doesn't use ares resolver for vsock.
env GRPC_DNS_RESOLVER=native
# TODO: Remove once the Perfetto SDK is updated beyond v9.0 virglrenderer and
# other client code -- the new SDK has this path builtin.
env PERFETTO_PRODUCER_SOCK_NAME=/run/perfetto/traced-producer.sock
# Give any running VMs enough time to attempt an orderly shutdown.
kill timeout 30
# The virtio-fs device opens a lot of fds and so the whole process tree needs a
# much higher limit.
limit nofile 1024 262144
# Give rtprio to vms to set threads to real-time priority.
# 10 is for audio client threads in AC'97 device.
limit rtprio 10 10
# Increase memlock for io_uring devices.
# This hard limit must be larger than:
# |# of VM instances| * |# of uring devices| * |memory size for one device|.
limit memlock 64 1024
# The directory for the disk image for arcvm adb sync to reside on.
# This directory will only be used in dev mode.
env ARCVM_ADB_SYNC_DIR=/mnt/stateful_partition/unencrypted/cache/arcvm_adb_sync
pre-start script
# If the pre-start script fails, by default nothing gets written to
# the logs. Make sure we can find out what went wrong by
# re-directing stderr to a temp file and sending it to the syslog if
# we encounter an error.
tmp_log="$(mktemp)"
exec 2>"${tmp_log}"
dump_log() {
if [ $? != 0 ]; then
set +x
# Split up lines before calling logger to avoid running into line limits.
xargs -n 1 -d"\n" logger --tag "${UPSTART_JOB}" < "${tmp_log}"
fi
}
trap dump_log EXIT
set -x
# Make sure the vsock module is loaded.
modprobe -q vhost-vsock
# Make /run/arcvm a shared mountpoint.
mount --no-canonicalize --bind /run/arcvm /run/arcvm
mount --no-canonicalize --make-shared /run/arcvm
# Create persistent mount namespace at /run/namespaces/mnt_concierge.
unshare --mount=/run/namespaces/mnt_concierge --propagation unchanged \
-- /bin/true
# Remount the bind-mount as 'noexec'.
mount --no-canonicalize /run/namespaces/mnt_concierge -o remount,noexec
# Prevent mounts at /run/arcvm from propagating out of the namespace.
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- bash -c "mount --no-canonicalize --make-slave /run/arcvm && \
mount --no-canonicalize --make-shared /run/arcvm"
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- mount --no-canonicalize --bind \
/mnt/stateful_partition/unencrypted/apkcache /run/arcvm/apkcache
# The runtime directory for removable media and MyFiles.
nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- mount --no-canonicalize -o \
rw,nodev,noexec,nosuid,mode=0755,uid=655360,gid=656360 -t tmpfs \
tmpfs /run/arcvm/media
if crossystem "cros_debug?1"; then
touch /run/vm/dev_mode
if [! -d "${ARCVM_ADB_SYNC_DIR}"]; then
mkdir -p "${ARCVM_ADB_SYNC_DIR}"
chown crosvm:crosvm "${ARCVM_ADB_SYNC_DIR}"
fi
else
rm -f /run/vm/dev_mode
fi
end script
script
gpu_args=""
set --
if [ -c "/dev/mali0" ]; then
set -- "$@" -b /dev/mali0,,1
fi
if [ -c "/dev/pvr_sync" ]; then
set -- "$@" -b /dev/pvr_sync,,1
fi
# (crbug.com/892806): remove check when udmabuf is built for all target kernels.
if [ -c "/dev/udmabuf" ]; then
set -- "$@" -b /dev/udmabuf,,1
fi
if [ -f /run/vm/dev_mode ]; then
if [ -d /opt/google/vms/android ]; then
set -- "$@" -b /opt/google/vms/android,,1
fi
if [ -d "${ARCVM_ADB_SYNC_DIR}" ]; then
set -- "$@" -k \
'mnt,/mnt/stateful_partition,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC'
set -- "$@" -b "${ARCVM_ADB_SYNC_DIR}","${ARCVM_ADB_SYNC_DIR}",1
fi
if [ -d /usr/local/vms ]; then
set -- "$@" -k 'local,/usr/local,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC'
set -- "$@" -b /usr/local/vms,,1
fi
fi
# Concierge needs to be able to configure user namespaces due to the
# way ARC VM shares files with the host. This requires CAP_SETUID
# and CAP_SETGID, but we can restrict the impact of this by running
# concierge in an outer user namespace which only maps the uids
# needed for the inner namespaces (except that concierge itself
# needs access to a number of groups). We also need to map a number
# of groups for concierge itself. The mapping does not renumber any
# uids or gids to make the inner mappings easier to understand,
# except that we are forced to map something to 0, and we use
# crosvm-root for this purpose.
#
# uids:
# - crosvm (299)
# - crosvm-root (20182)
# - arc-camera (603, but we have to map 600-649 for unclear reasons)
# - Android (2,000,000 starting from 655360)
#
# gids:
# - all of the above uids except crosvm-dbus, plus
# - video (27)
# - daemon-store (400)
# - tun (413)
# - virtaccess (418)
# - cras (600)
# - wayland (601)
# - android-reserved-disk (20119)
# - pluginvm (20128)
# - cups-proxy (20136)
# - traced-producer (20162)
#
# /proc is also remounted read-write because crosvm needs to be able to set the
# uid_map and gid_map for its child processes and that needs a writable /proc.
#
# The following mount flags are used below:
# MS_BIND = 0x1000
# MS_REC = 0x4000
#
# -Kslave is applied to propagate imageloader mounts into concierge's mount
# namespace.
exec nsenter --mount=/run/namespaces/mnt_concierge --no-fork \
-- minijail0 -nlvd -i -t --uts \
-Kslave \
-P /mnt/empty \
-b /,/ \
-k 'proc,/proc,proc,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /sys,/sys \
-k 'tmpfs,/sys/fs/cgroup,tmpfs,MS_NODEV|MS_NOEXEC|MS_NOSUID,mode=755,size=10M' \
-b /sys/fs/cgroup/cpu,,1 \
-b /dev/chromeos-low-mem \
-b /dev/log,/dev/log,1 \
-b /dev/kvm,/dev/kvm,1 \
-b /dev/net,/dev/net,1 \
-b /dev/vhost-vsock,/dev/vhost-vsock,1 \
-b /dev/dri,/dev/dri,1 \
"$@" \
-k 'run,/run,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /run/camera/,,1 \
-b /run/chrome,/run/chrome,1 \
-b /run/cras/vms,/run/cras,1 \
-b /run/cups_proxy,,1 \
-b /run/dbus,/run/dbus,1 \
-b /run/perfetto,/run/perfetto,1 \
-b /run/pvm,,1 \
-b /run/vm,/run/vm,1 \
-b /run/vm_cicerone/client,/run/vm_cicerone/client,1 \
-k /run/imageloader,/run/imageloader,none,0x5000 \
-k 'var,/var,tmpfs,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-b /var/lib/timezone \
-k '/run/daemon-store/crosvm,/run/daemon-store/crosvm,none,MS_BIND|MS_REC' \
-k '/run/daemon-store/pvm,/run/daemon-store/pvm,none,MS_BIND|MS_REC' \
-k '/run/arcvm,/run/arcvm,none,MS_BIND|MS_REC' \
-- /sbin/minijail0 -U -pv -I -i \
-Kslave \
-c 'cap_setuid,cap_setgid+eip' --ambient \
-k 'proc,/proc,proc,MS_NOSUID|MS_NODEV|MS_NOEXEC' \
-m'0 20182 1,299 299 1,600 600 50,655360 655360 2000000' \
-M'0 20182 1,27 27 1,299 299 1,400 400 1,413 413 1,418 418 1,600 600 50,20119 20119 1,20128 20128 1,20136 20136 1,20162 20162 1,655360 655360 2000000' \
/usr/bin/vm_concierge
end script
post-stop script
if mountpoint -q /run/namespaces/mnt_concierge; then
umount /run/namespaces/mnt_concierge
fi
if mountpoint -q /run/arcvm; then
umount /run/arcvm
fi
end script