common.sh: Add retry logic to loopback_partscan
We have continually had issues with mounting and
unmounting loopback paritions via partx. This commit
adds a mechanism to retry as well as fully detach
and reattach using losetup.
BUG=chromium:1011891
TEST=tryjob
Change-Id: I7153aaec512c70493b4e42adce4472de479ad322
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosutils/+/1885394
Legacy-Commit-Queue: Commit Bot <commit-bot@chromium.org>
Reviewed-by: George Engelbrecht <engeg@google.com>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Commit-Queue: George Engelbrecht <engeg@google.com>
Tested-by: George Engelbrecht <engeg@google.com>
Auto-Submit: George Engelbrecht <engeg@google.com>
(cherry picked from commit fcd236f4029d0e65d0870da789096b5d97cffa02)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosutils/+/1899052
diff --git a/common.sh b/common.sh
index 3d7ee52..c214797 100644
--- a/common.sh
+++ b/common.sh
@@ -616,28 +616,41 @@
local lb_dev image="$1"
shift
- # Flush any dirty pages in the image before we send losetup that way.
- info "Running sync -f ${image}"
- sync -f "${image}"
+ # We set up a binary backoff for adding the partitions. We give 10 attempts
+ # each time nth time we fail, backing off by 1<<n. The maximum time then
+ # spent sleeping will be sum(2^n) from [0...10] which is 1023 seconds,
+ # or ~37 minutes.
+ local i partx_out partx_d_out sleep_seconds=1
+ for (( i = 0; i <= 10; i++ )); do
+ # Flush any dirty pages in the image before we do partx commands.
+ info "Running sync -f ${image}"
+ sync -f "${image}"
- lb_dev=$(sudo losetup --show -f "$@" "${image}")
+ # This (perhaps) mounts the partitions as well.
+ lb_dev=$(sudo losetup --show -f "$@" "${image}")
- # Ignore problems deleting existing partitions. There shouldn't be any
- # which will upset partx, but that's actually ok.
- sudo partx -d "${lb_dev}" 2>/dev/null || true
+ # Try to clean the slate by removing any existing parts (best effort).
+ partx_d_out=$(sudo partx -v -d "${lb_dev}") || true
- # First try to add missing partitions.
- if ! sudo partx -a "${lb_dev}"; then
- warn "Adding partitions with 'partx -a ${lb_dev}' failed."
- warn "Dumping full kernel buffer"
- dmesg >&2 || true
- sync
- sleep 1
-
- # Try a partition update to recover.
- # https://crbug.com/999596
- sudo partx -u "${lb_dev}"
- fi
+ # Try to add the partitions back.
+ if ! partx_out=$(sudo partx -v -a "${lb_dev}"); then
+ local proc_parts
+ warn "Adding partitions with 'partx -v -a ${lb_dev}' failed."
+ warn "partx -d output:\n${partx_d_out}"
+ warn "partx -a output:\n${partx_out}"
+ proc_parts=$(cat /proc/partitions)
+ warn "/proc/partitions before detaching loopback:\n ${proc_parts}"
+ # Detach the image.
+ sudo losetup -d "${lb_dev}" || true
+ proc_parts=$(cat /proc/partitions)
+ warn "/proc/partitions after detaching loopback:\n ${proc_parts}"
+ warn "Sleeping ${sleep_seconds} before trying again."
+ sleep "${sleep_seconds}"
+ : $(( sleep_seconds <<= 1 ))
+ else
+ break
+ fi
+ done
echo "${lb_dev}"
}