blob: a4a00b4fd63c6eee2699738528633f6383fc4f0d [file] [log] [blame] [edit]
#!/bin/bash
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
. /usr/share/misc/chromeos-common.sh || exit 1
. /usr/share/misc/shflags || exit 1
# For PARTITION_NUM_ROOT_A
. /usr/sbin/write_gpt.sh || exit 1
# Helpful constants.
readonly LOGFILE_PATH="/var/log/quick-provision.log"
readonly KERN_IMAGE="full_dev_part_KERN.bin.gz"
readonly ROOT_IMAGE="full_dev_part_ROOT.bin.gz"
readonly STATEFUL_TGZ="stateful.tgz"
readonly STATEFUL_DIR="/mnt/stateful_partition"
readonly UPDATE_STATE_FILE=".update_available"
# Persistent file to log the hashes of contents of files downloaded from
# the devserver to help debug crbug.com/788473.
readonly HASHES_LOG="/tmp/hashes"
# Lock file to ensure quick provisioning script isn't run concurrently
# by SSH retries.
readonly LOCKFILE="/var/lock/quick-provision.lock"
# File to indicate that quick provision has already successfully completed.
readonly COMPLETED_FILE="/tmp/quick-provision-complete"
readonly PROGRAM="$(basename $0)"
readonly FLAGS_HELP="Usage:
${PROGRAM} [flags] <build> <url>
"
DEFINE_string logfile "${LOGFILE_PATH}" "Path to record logs to."
DEFINE_string status_url "" "URL of devserver to post status to."
DEFINE_boolean reboot "${FLAGS_TRUE}" "Whether to reboot at completion"
# Parse command line.
FLAGS "$@" || exit 1
eval set -- "${FLAGS_ARGV}"
usage() {
echo "$@"
echo
flags_help
exit 1
}
# Log an info line.
info() {
local line="$*"
echo "$(date --rfc-3339=seconds) INFO: ${line}"
}
# Log an error line.
error() {
local line="$*"
echo "$(date --rfc-3339=seconds) ERROR: ${line}" >&2
}
# Log a keyval.
keyval() {
local line="$*"
echo "KEYVAL: ${line}"
}
# Fatal with a message, updating status prior to exiting.
die() {
local line="$*"
error "FATAL: ${line}"
post_status "FATAL: ${line}"
exit 1
}
# Get the current time as a timestamp.
get_timestamp() {
date +%s
}
# Generate keyvals based on the timing of an event.
end_timing() {
local start_time="$1"
local event_name="$2"
local end_time="$(get_timestamp)"
local elapsed_time="$((end_time - start_time))"
keyval "${event_name}_start=${start_time}"
keyval "${event_name}_end=${end_time}"
keyval "${event_name}_elapsed=${elapsed_time}"
}
time_cmd() {
local event_name="$1"
shift
local start_time="$(get_timestamp)"
"$@"
end_timing "${start_time}" "${event_name}"
}
# Attempt to post a status update for the provision process.
# Requires status_url FLAGS to be set.
post_status() {
local status="$*"
logger -t quick-provision "Updated status: ${status}"
info "Updated status: ${status}"
if [[ -n "${FLAGS_status_url}" ]]; then
# Send status in the background -- don't let an RPC call blockage hold up
# provisioning. Ignore output unless there is an error.
curl -sS -o /dev/null -F "status=<-" "${FLAGS_status_url}" <<< "${status}" &
fi
}
# Retrieves a URL to stdout.
get_url_to_stdout() {
local url="$1"
# TODO(davidriley): Switch to curl once curl has better retry/resume
# semantics. See crbug.com/782416 for details.
# TODO(davidriley): Configure timeouts and retries.
# TODO(davidriley): curl options: --speed-time, --speed-limit,
# --connect-timeout, --max-time, --retry, --retry-delay, --retry-max-time
if type wget >/dev/null 2>&1; then
wget --progress=dot:giga -S --tries=1 -O - "${url}" | \
tee >(md5sum >>"${HASHES_LOG}")
return "${PIPESTATUS[0]}"
else
curl "${url}"
fi
}
# Writes to a partition from stdin.
write_partition() {
local part="$1"
# TODO(davidriley): Use tool that only verifies zero blocks before writing.
# conv=sparse assumes that zero blocks were previous zero so is not safe
# to use.
dd of="${part}" obs=2M
}
# Updates a partition on disk with a given gzip compressed partition URL.
# Function will exit script on failure.
update_partition() {
local url="$1"
local part="$2"
# TODO(davidriley): Enable blkdiscard when moving to verifying zero blocks
# before writing them.
# info blkdiscard "${part}"
# blkdiscard "${part}"
info Updating "${part}" with "${url}"
get_url_to_stdout "${url}" | gzip -d | write_partition "${part}"
local pipestatus=("${PIPESTATUS[@]}")
if [[ "${pipestatus[0]}" -ne "0" ]]; then
die "Retrieving ${url} failed. (statuses ${pipestatus[*]})"
elif [[ "${pipestatus[1]}" -ne "0" ]]; then
die "Decompressing ${url} failed. (statuses ${pipestatus[*]})"
elif [[ "${pipestatus[2]}" -ne "0" ]]; then
die "Writing to ${part} failed. (statuses ${pipestatus[*]})"
fi
}
# Performs a stateful update using a specified stateful.tgz URL.
# Function will exit script on failure.
stateful_update() {
local url="$1"
# Stateful reset.
info "Stateful reset"
post_status "DUT: Stateful reset"
rm -rf "${STATEFUL_DIR}/${UPDATE_STATE_FILE}" \
"${STATEFUL_DIR}/var_new" \
"${STATEFUL_DIR}/dev_image_new" || die "Unable to reset stateful."
# Stateful update.
info "Stateful update"
post_status "DUT: Stateful update"
get_url_to_stdout "${url}" |
tar --ignore-command-error --overwrite --directory="${STATEFUL_DIR}" -xzf -
local pipestatus=("${PIPESTATUS[@]}")
if [[ "${pipestatus[0]}" -ne "0" ]]; then
die "Retrieving ${url} failed. (statuses ${pipestatus[*]})"
elif [[ "${pipestatus[1]}" -ne "0" ]]; then
die "Untarring to ${STATEFUL_DIR} failed. (statuses ${pipestatus[*]})"
fi
# Stateful clean.
info "Stateful clean"
post_status "DUT: Stateful clean"
printf "clobber" > "${STATEFUL_DIR}/${UPDATE_STATE_FILE}" || \
die "Unable to clean stateful."
}
# Performs postinst and sets the next kernel.
# Function will exit script on failure.
set_next_kernel() {
local part="$1"
# TODO(davidriley): Fix postinst to avoid unnecessary operations like
# rewriting hashes and unnecessary delays.
info "Update next kernel to try (via postinst)"
local tmpmnt="$(mktemp -d)"
mount -o ro "${NEXT_ROOT}" "${tmpmnt}" || die "Unable to mount ${NEXT_ROOT}."
"${tmpmnt}/postinst" "${NEXT_ROOT}"
local retval="$?"
umount "${tmpmnt}"
rmdir "${tmpmnt}"
if [[ "${retval}" -ne "0" ]]; then
echo "Downloaded hashes prior to postinst failure:"
cat "${HASHES_LOG}"
die "postinst failed."
fi
}
provision_device() {
local build="$1"
local static_url="$2"
local script_start_time="$3"
if [[ -f "${COMPLETED_FILE}" ]]; then
if cmp -s <(echo "${build}") "${COMPLETED_FILE}"; then
info "Quick provision already complete to desired version: ${build}"
exit 0
else
local other_version="$(<"${COMPLETED_FILE}")"
die "Previous quick provision attempt to unexpected version has" \
"completed and waiting for reboot: ${other_version}"
fi
fi
load_base_vars
# Example 1: "/dev/nvme0n1p3"
# Example 2: "/dev/sda3"
local current_root="$(rootdev -s)"
local root_disk="$(rootdev -s -d)"
info "Current root ${current_root}, disk ${root_disk}"
keyval "CURRENT_ROOT=${current_root}"
# Handle /dev/mmcblk0pX, /dev/sdaX, etc style partitions.
# Example 1: "3"
# Example 2: "3"
local root_part_num="$(get_partition_number ${current_root})"
# Example 1: "p3"
# Example 2: "3"
local root_part_num_with_delim="${current_root#${root_disk}}"
# Example 1: "p"
# Example 2: ""
local root_part_delim="${root_part_num_with_delim%${root_part_num}}"
if [[ "${root_part_num}" == "${PARTITION_NUM_ROOT_A}" ]]; then
NEXT_KERN_PART="${PARTITION_NUM_KERN_B}"
NEXT_KERN="${root_disk}${root_part_delim}${NEXT_KERN_PART}"
NEXT_ROOT="${root_disk}${root_part_delim}${PARTITION_NUM_ROOT_B}"
elif [[ "${root_part_num}" == "${PARTITION_NUM_ROOT_B}" ]]; then
NEXT_KERN_PART="${PARTITION_NUM_KERN_A}"
NEXT_KERN="${root_disk}${root_part_delim}${NEXT_KERN_PART}"
NEXT_ROOT="${root_disk}${root_part_delim}${PARTITION_NUM_ROOT_A}"
else
die "Unexpected root partition ${current_root}"
fi
info "Will update kern ${NEXT_KERN}, root ${NEXT_ROOT}"
# Shutdown ui, update-engine
info "Shutting down ui, update-engine"
stop ui
stop update-engine
# Kernel.
info "Update kernel ${NEXT_KERN}"
post_status "DUT: Updating kernel ${NEXT_KERN}"
time_cmd UPDATE_KERNEL \
update_partition "${static_url}/${build}/${KERN_IMAGE}" ${NEXT_KERN}
# Rootfs.
info "Update rootfs ${NEXT_ROOT}"
post_status "DUT: Updating rootfs ${NEXT_ROOT}"
time_cmd UPDATE_ROOTFS \
update_partition "${static_url}/${build}/${ROOT_IMAGE}" ${NEXT_ROOT}
# Stateful.
time_cmd UPDATE_STATEFUL \
stateful_update "${static_url}/${build}/${STATEFUL_TGZ}"
# Boot the next kernel.
time_cmd SET_NEXT_KERNEL \
set_next_kernel "${NEXT_KERN_PART}"
# Record that quick provision is complete to avoid another attempt.
echo "${build}" >"${COMPLETED_FILE}"
keyval "COMPLETED=${build}"
if [[ ${FLAGS_reboot} -eq ${FLAGS_TRUE} ]]; then
# Reboot in the background, giving time for the ssh invocation to
# cleanly terminate.
info "Reboot (into ${build})"
post_status "DUT: Reboot"
(sleep 2; reboot) &
fi
end_timing "${script_start_time}" QUICK_PROVISION
}
main() {
if [[ "$#" -ne 2 ]]; then
usage "ERROR: Incorrect number of arguments."
fi
local build="$1"
local static_url="$2"
local script_start_time="$(get_timestamp)"
info "Provisioning ${build} from ${static_url}"
keyval "BOOT_ID=$(</proc/sys/kernel/random/boot_id)"
keyval "$(grep CHROMEOS_RELEASE_BUILDER_PATH /etc/lsb-release | \
sed -e s/CHROMEOS_RELEASE_BUILDER_PATH/ORIGINAL_BUILD/)"
(
# Ensure no concurrent quick provision attempts.
time_cmd LOCK_LOCKFILE flock 9
provision_device "${build}" "${static_url}" "${script_start_time}"
) 9>"${LOCKFILE}"
}
main "$@" |& tee -a "${FLAGS_logfile}"
# Return the exit status of the main function.
exit "${PIPESTATUS[0]}"