#!/bin/bash

# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

. /usr/share/misc/chromeos-common.sh || exit 1
. /usr/share/misc/shflags || exit 1

# For PARTITION_NUM_ROOT_A
. /usr/sbin/write_gpt.sh || exit 1

# Helpful constants.
LOGFILE_PATH="/var/log/quick-provision.log"
KERN_IMAGE="full_dev_part_KERN.bin.gz"
ROOT_IMAGE="full_dev_part_ROOT.bin.gz"
STATEFUL_TGZ="stateful.tgz"
STATEFUL_DIR="/mnt/stateful_partition"
UPDATE_STATE_FILE=".update_available"

# Persistent file to log the hashes of contents of files downloaded from
# the devserver to help debug crbug.com/788473.
HASHES_LOG="/tmp/hashes"

# Lock file to ensure quick provisioning script isn't run concurrently
# by SSH retries.
LOCKFILE="/var/lock/quick-provision.lock"

# File to indicate that quick provision has already successfully completed.
COMPLETED_FILE="/tmp/quick-provision-complete"

PROGRAM="$(basename $0)"
FLAGS_HELP="Usage:
  ${PROGRAM} [flags] <build> <url> [<url> ...]
"

DEFINE_string logfile "${LOGFILE_PATH}" "Path to record logs to."
DEFINE_string status_url "" "URL of devserver to post status to."
DEFINE_boolean reboot "${FLAGS_TRUE}" "Whether to reboot at completion"

# Parse command line.
FLAGS "$@" || exit 1
eval set -- "${FLAGS_ARGV}"

usage() {
  echo "$@"
  echo
  flags_help
  exit 1
}

# Log an info line.
info() {
  local line="$*"

  echo "$(date --rfc-3339=seconds) INFO: ${line}"
}

# Log an error line.
error() {
  local line="$*"

  echo "$(date --rfc-3339=seconds) ERROR: ${line}" >&2
}

# Log a keyval.
keyval() {
  local line="$*"

  echo "KEYVAL: ${line}"
}

# Fatal with a message, updating status prior to exiting.
die() {
  local line="$*"

  error "FATAL: ${line}"
  post_status "FATAL: ${line}"
  exit 1
}

# Get the current time as a timestamp.
get_timestamp() {
  date +%s
}

# Generate keyvals based on the timing of an event.
end_timing() {
  local start_time="$1"
  local event_name="$2"

  local end_time="$(get_timestamp)"
  local elapsed_time="$((end_time - start_time))"

  keyval "${event_name}_start=${start_time}"
  keyval "${event_name}_end=${end_time}"
  keyval "${event_name}_elapsed=${elapsed_time}"
}

time_cmd() {
  local event_name="$1"
  shift
  local start_time="$(get_timestamp)"
  "$@"
  end_timing "${start_time}" "${event_name}"
}

# Attempt to post a status update for the provision process.
# Requires status_url FLAGS to be set.
post_status() {
  local status="$*"

  logger -t quick-provision "Updated status: ${status}"
  info "Updated status: ${status}"

  if [[ -n "${FLAGS_status_url}" ]]; then
    # Send status in the background -- don't let an RPC call blockage hold up
    # provisioning.  Ignore output unless there is an error.
    curl -sS -o /dev/null -F "status=<-" "${FLAGS_status_url}" <<< "${status}" &
  fi
}

# Retrieves a URL to stdout.
get_url_to_stdout() {
  local url="$1"

  # TODO(davidriley): Switch to curl once curl has better retry/resume
  # semantics.  See crbug.com/782416 for details.
  # TODO(davidriley): Configure timeouts and retries.
  # TODO(davidriley): curl options: --speed-time, --speed-limit,
  #   --connect-timeout, --max-time, --retry, --retry-delay, --retry-max-time
  if type wget >/dev/null 2>&1; then
    wget --progress=dot:giga -S --retry-connrefused -O - "${url}" | \
      tee >(md5sum >>"${HASHES_LOG}")
    return "${PIPESTATUS[0]}"
  else
    curl "${url}"
  fi
}

# Writes to a partition from stdin.
write_partition() {
  local part="$1"

  # TODO(davidriley): Use tool that only verifies zero blocks before writing.
  # conv=sparse assumes that zero blocks were previous zero so is not safe
  # to use.
  dd of="${part}" obs=2M
}

# Updates a partition on disk with a given gzip compressed partition URL.
# Function will exit script on failure.
update_partition() {
  local file_servers="$1"
  local file_path="$2"
  local part="$3"

  # TODO(davidriley): Enable blkdiscard when moving to verifying zero blocks
  # before writing them.
  # info blkdiscard "${part}"
  # blkdiscard "${part}"

  local rc=1
  for file_server in ${file_servers[@]}; do
    local url="${file_server}/${file_path}"
    info Updating "${part}" with "${url}"
    get_url_to_stdout "${url}" | gzip -d | write_partition "${part}"
    local pipestatus=("${PIPESTATUS[@]}")
    if [[ "${pipestatus[0]}" -ne "0" ]]; then
      error "Retrieving ${url} failed. (statuses ${pipestatus[*]})"
      info "Retry with next file server."
      continue
    elif [[ "${pipestatus[1]}" -ne "0" ]]; then
      die "Decompressing ${url} failed. (statuses ${pipestatus[*]})"
    elif [[ "${pipestatus[2]}" -ne "0" ]]; then
      die "Writing to ${part} failed. (statuses ${pipestatus[*]})"
    fi
    rc=0
  done
  if [[ "${rc}" -ne "0" ]]; then
    die "Retrieving ${file_path} failed with all file servers."
  fi
}

# Performs a stateful update using a specified stateful.tgz URL.
# Function will exit script on failure.
stateful_update() {
  local file_servers="$1"
  local file_path="$2"

  # Stateful reset.
  info "Stateful reset"
  post_status "DUT: Stateful reset"
  rm -rf "${STATEFUL_DIR}/${UPDATE_STATE_FILE}" \
    "${STATEFUL_DIR}/var_new" \
    "${STATEFUL_DIR}/dev_image_new" || die "Unable to reset stateful."

  # Stateful update.
  info "Stateful update"
  post_status "DUT: Stateful update"

  local rc=1
  for file_server in ${file_servers[@]}; do
    local url="${file_server}/${file_path}"
    get_url_to_stdout "${url}" |
      tar --ignore-command-error --overwrite \
        --directory="${STATEFUL_DIR}" -xzf -
    local pipestatus=("${PIPESTATUS[@]}")
    if [[ "${pipestatus[0]}" -ne "0" ]]; then
      error "Retrieving ${url} failed. (statuses ${pipestatus[*]})"
      info "Retry with next file server."
      continue
    elif [[ "${pipestatus[1]}" -ne "0" ]]; then
      die "Untarring to ${STATEFUL_DIR} failed. (statuses ${pipestatus[*]})"
    fi
    rc=0
  done
  if [[ "${rc}" -ne "0" ]]; then
    die "Retrieving ${file_path} failed with all file servers."
  fi

  # Stateful clean.
  info "Stateful clean"
  post_status "DUT: Stateful clean"
  printf "clobber" > "${STATEFUL_DIR}/${UPDATE_STATE_FILE}" || \
    die "Unable to clean stateful."
}

# Performs postinst and sets the next kernel.
# Function will exit script on failure.
set_next_kernel() {
  local part="$1"

  # TODO(davidriley): Fix postinst to avoid unnecessary operations like
  # rewriting hashes and unnecessary delays.
  info "Update next kernel to try (via postinst)"
  local tmpmnt="$(mktemp -d)"
  mount -o ro "${NEXT_ROOT}" "${tmpmnt}" || die "Unable to mount ${NEXT_ROOT}."
  "${tmpmnt}/postinst" "${NEXT_ROOT}"
  local retval="$?"
  umount "${tmpmnt}"
  rmdir "${tmpmnt}"
  if [[ "${retval}" -ne "0" ]]; then
    echo "Downloaded hashes prior to postinst failure:"
    cat "${HASHES_LOG}"
    die "postinst failed."
  fi
}

provision_device() {
  local build="$1"
  local file_servers="$2"
  local script_start_time="$3"

  if [[ -f "${COMPLETED_FILE}" ]]; then
    if cmp -s <(echo "${build}") "${COMPLETED_FILE}"; then
      info "Quick provision already complete to desired version: ${build}"
      exit 0
    else
      local other_version="$(<"${COMPLETED_FILE}")"
      die "Previous quick provision attempt to unexpected version has" \
          "completed and waiting for reboot: ${other_version}"
    fi
  fi

  load_base_vars

  local current_root="$(rootdev -s)"
  local root_disk="$(rootdev -s -d)"
  info "Current root ${current_root}, disk ${root_disk}"
  keyval "CURRENT_ROOT=${current_root}"

  # Handle both /dev/mmcblk0pX and /dev/sdaX style partitions.
  if [[ "${current_root#${root_disk}}" == "p${PARTITION_NUM_ROOT_A}" ]]; then
    NEXT_KERN_PART="${PARTITION_NUM_KERN_B}"
    NEXT_KERN="${root_disk}p${NEXT_KERN_PART}"
    NEXT_ROOT="${root_disk}p${PARTITION_NUM_ROOT_B}"
  elif [[ "${current_root#${root_disk}}" == "p${PARTITION_NUM_ROOT_B}" ]]; then
    NEXT_KERN_PART="${PARTITION_NUM_KERN_A}"
    NEXT_KERN="${root_disk}p${NEXT_KERN_PART}"
    NEXT_ROOT="${root_disk}p${PARTITION_NUM_ROOT_A}"
  elif [[ "${current_root#${root_disk}}" == "${PARTITION_NUM_ROOT_A}" ]]; then
    NEXT_KERN_PART="${PARTITION_NUM_KERN_B}"
    NEXT_KERN="${root_disk}${NEXT_KERN_PART}"
    NEXT_ROOT="${root_disk}${PARTITION_NUM_ROOT_B}"
  elif [[ "${current_root#${root_disk}}" == "${PARTITION_NUM_ROOT_B}" ]]; then
    NEXT_KERN_PART="${PARTITION_NUM_KERN_A}"
    NEXT_KERN="${root_disk}${NEXT_KERN_PART}"
    NEXT_ROOT="${root_disk}${PARTITION_NUM_ROOT_A}"
  else
    die "Unexpected root partition ${current_root}"
  fi

  info "Will update kern ${NEXT_KERN}, root ${NEXT_ROOT}"

  # Shutdown ui, update-engine
  info "Shutting down ui, update-engine"
  stop ui
  stop update-engine

  # Kernel.
  info "Update kernel ${NEXT_KERN}"
  post_status "DUT: Updating kernel ${NEXT_KERN}"
  time_cmd UPDATE_KERNEL \
    update_partition "${file_servers}" "${build}/${KERN_IMAGE}" ${NEXT_KERN}

  # Rootfs.
  info "Update rootfs ${NEXT_ROOT}"
  post_status "DUT: Updating rootfs ${NEXT_ROOT}"
  time_cmd UPDATE_ROOTFS \
    update_partition "${file_servers}" "${build}/${ROOT_IMAGE}" ${NEXT_ROOT}

  # Stateful.
  time_cmd UPDATE_STATEFUL \
    stateful_update "${file_servers}" "${build}/${STATEFUL_TGZ}"

  # Boot the next kernel.
  time_cmd SET_NEXT_KERNEL \
    set_next_kernel "${NEXT_KERN_PART}"

  # Record that quick provision is complete to avoid another attempt.
  echo "${build}" >"${COMPLETED_FILE}"
  keyval "COMPLETED=${build}"

  if [[ ${FLAGS_reboot} -eq ${FLAGS_TRUE} ]]; then
    # Reboot in the background, giving time for the ssh invocation to
    # cleanly terminate.
    info "Reboot (into ${build})"
    post_status "DUT: Reboot"
    (sleep 2; reboot) &
  fi

  end_timing "${script_start_time}" QUICK_PROVISION
}

main() {
  if [[ "$#" -lt 2 ]]; then
    usage "ERROR: Incorrect number of arguments."
  fi
  local build="$1"
  shift
  local file_servers="$@"

  local script_start_time="$(get_timestamp)"

  info "Provisioning ${build} from ${file_servers}"
  keyval "BOOT_ID=$(</proc/sys/kernel/random/boot_id)"
  keyval "$(grep CHROMEOS_RELEASE_BUILDER_PATH /etc/lsb-release | \
            sed -e s/CHROMEOS_RELEASE_BUILDER_PATH/ORIGINAL_BUILD/)"

  (
    # Ensure no concurrent quick provision attempts.
    time_cmd LOCK_LOCKFILE flock 9

    provision_device "${build}" "${file_servers}" "${script_start_time}"
  ) 9>"${LOCKFILE}"
}

main "$@" |& tee -a "${FLAGS_logfile}"

# Return the exit status of the main function.
exit "${PIPESTATUS[0]}"
