futility: updater: support reading the archives using libarchive

The firmware archive files from buildbot are usually released in
tar+bzip2 format. Although the standard format of the archive
(chromeos-firmwareupdate) is ZIP, it is very helpful if the developers
can quickly run the update using the standard tar.bz2 archive files.

BUG=b:230679721
TEST=futility update --manifest -a \
       ~/Downloads/ChromeOS-firmware-R91-13885.3.0-asurada.tar.bz2
     QEMU_LD_PREFIX=/build/cherry qemu-arm /build/cherry/usr/bin/futility \
       update --manifest -a \
       ~/Downloads/ChromeOS-firmware-R91-13885.3.0-asurada.tar.bz2
BRANCH=None

Change-Id: Ibbab2e8226a00e8b5b292293af570eda37b31a8a
Signed-off-by: Hung-Te Lin <hungte@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/vboot_reference/+/3644540
Reviewed-by: Yu-Ping Wu <yupingso@chromium.org>
diff --git a/Makefile b/Makefile
index 6340fb2..beaffaf 100644
--- a/Makefile
+++ b/Makefile
@@ -275,6 +275,13 @@
   LIBZIP_LIBS := $(shell ${PKG_CONFIG} --libs libzip)
 endif
 
+LIBARCHIVE_VERSION := $(shell ${PKG_CONFIG} --modversion libarchive 2>/dev/null)
+HAVE_LIBARCHIVE := $(if ${LIBARCHIVE_VERSION},1)
+ifneq ($(filter-out 0,${HAVE_LIBARCHIVE}),)
+  CFLAGS += -DHAVE_LIBARCHIVE $(shell ${PKG_CONFIG} --cflags libarchive)
+  LIBARCHIVE_LIBS := $(shell ${PKG_CONFIG} --libs libarchive)
+endif
+
 HAVE_CROSID := $(shell ${PKG_CONFIG} --exists crosid && echo 1)
 ifeq ($(HAVE_CROSID),1)
   CFLAGS += -DHAVE_CROSID $(shell ${PKG_CONFIG} --cflags crosid)
@@ -1040,7 +1047,8 @@
 futil: ${FUTIL_BIN}
 
 # FUTIL_LIBS is shared by FUTIL_BIN and TEST_FUTIL_BINS.
-FUTIL_LIBS = ${CROSID_LIBS} ${CRYPTO_LIBS} ${LIBZIP_LIBS} ${FLASHROM_LIBS}
+FUTIL_LIBS = ${CROSID_LIBS} ${CRYPTO_LIBS} ${LIBZIP_LIBS} ${LIBARCHIVE_LIBS} \
+	${FLASHROM_LIBS}
 
 ${FUTIL_BIN}: LDLIBS += ${FUTIL_LIBS}
 ${FUTIL_BIN}: ${FUTIL_OBJS} ${UTILLIB} ${FWLIB}
diff --git a/futility/updater_archive.c b/futility/updater_archive.c
index aa3e2f9..dafce3c 100644
--- a/futility/updater_archive.c
+++ b/futility/updater_archive.c
@@ -19,6 +19,11 @@
 #include <sys/time.h>
 #include <unistd.h>
 
+#ifdef HAVE_LIBARCHIVE
+#include <archive.h>
+#include <archive_entry.h>
+#endif
+
 #ifdef HAVE_LIBZIP
 #ifndef __clang__
 /* If libzip headers were built for Clang but later get included with GCC you
@@ -238,6 +243,226 @@
 	return r;
 }
 
+#ifdef HAVE_LIBARCHIVE
+
+/*
+ * For stream-based archives (e.g., tar+gz) we want to create a cache for
+ * storing the names and contents for later processing.
+ */
+struct archive_cache {
+	char *name;
+	uint8_t *data;
+	int64_t mtime;
+	size_t size;
+	int has_data;
+	struct archive_cache *next;
+};
+
+/* Add a new cache node to an existing cache list and return the new head. */
+static struct archive_cache *archive_cache_new(struct archive_cache *cache,
+					       const char *name)
+{
+	struct archive_cache *c;
+
+	c = (struct archive_cache *)calloc(sizeof(*c), 1);
+	if (!c)
+		return NULL;
+
+	c->name = strdup(name);
+	if (!c->name) {
+		free(c);
+		return NULL;
+	}
+
+	c->next = cache;
+	return c;
+}
+
+/* Find and return an entry (by name) from the cache. */
+static struct archive_cache *archive_cache_find(struct archive_cache *c,
+						const char *name)
+{
+	for (; c; c = c->next) {
+		assert(c->name);
+		if (!strcmp(c->name, name))
+			return c;
+	}
+	return NULL;
+}
+
+/* Callback for archive_walk to process all entries in the cache. */
+static int archive_cache_walk(
+		struct archive_cache *c, void *arg,
+		int (*callback)(const char *name, void *arg))
+{
+	for (; c; c = c->next) {
+		assert(c->name);
+		if (callback(c->name, arg))
+			break;
+	}
+	return 0;
+}
+
+/* Delete all entries in the cache. */
+static void *archive_cache_free(struct archive_cache *c)
+{
+	struct archive_cache *next;
+
+	while (c) {
+		next = c->next;
+		free(c->name);
+		free(c->data);
+		free(c);
+		c = next;
+	}
+	return NULL;
+}
+
+enum {
+	FILTER_IGNORE,
+	FILTER_ABORT,
+	FILTER_NAME_ONLY,
+	FILTER_READ_ALL,
+};
+
+static struct archive_cache *libarchive_read_file_entries(
+		const char *fpath, int (*filter)(struct archive_entry *entry))
+{
+	struct archive *a = archive_read_new();
+	struct archive_entry *entry;
+	struct archive_cache *c, *cache = NULL;
+	int r;
+
+	assert(a);
+	archive_read_support_filter_all(a);
+	archive_read_support_format_all(a);
+	r = archive_read_open_filename(a, fpath, 10240);
+	if (r != ARCHIVE_OK) {
+		ERROR("Failed parsing archive using libarchive: %s\n", fpath);
+		archive_read_free(a);
+		return NULL;
+	}
+
+	WARN("Loading data from archive: %s ", fpath);
+	while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
+		fputc('.', stderr);
+		if (archive_entry_filetype(entry) != AE_IFREG)
+			continue;
+		if (filter)
+			r = filter(entry);
+		else
+			r = FILTER_READ_ALL;
+
+		if (r == FILTER_ABORT)
+			break;
+		if (r == FILTER_IGNORE)
+			continue;
+
+		c = archive_cache_new(cache, archive_entry_pathname(entry));
+		if (!c) {
+			ERROR("Internal error: out of memory.\n");
+			archive_cache_free(cache);
+			archive_read_free(a);
+			return NULL;
+		}
+		cache = c;
+
+		if (r == FILTER_NAME_ONLY)
+			continue;
+
+		assert(r == FILTER_READ_ALL);
+		c->size = archive_entry_size(entry);
+		c->mtime = archive_entry_mtime(entry);
+		c->data = (uint8_t *)calloc(1, c->size + 1);
+		if (!c->data) {
+			WARN("Out of memory when loading: %s\n", c->name);
+			continue;
+		}
+		if (archive_read_data(a, c->data, c->size) != c->size) {
+			WARN("Failed reading from archive: %s\n", c->name);
+			continue;
+		}
+		c->has_data = 1;
+	}
+	fputs("\r\n", stderr);  /* Flush the '.' */
+	VB2_DEBUG("Finished loading from archive: %s.\n", fpath);
+
+	archive_read_free(a);
+	return cache;
+}
+
+/* Callback for archive_open on an ARCHIVE file. */
+static void *archive_libarchive_open(const char *name)
+{
+	/*
+	 * The firmware archives today can usually all load into memory
+	 * so we are using a NULL filter. Change that to a specific list in
+	 * future if the /build/$BOARD/firmware archive becomes too large.
+	 */
+	return libarchive_read_file_entries(name, NULL);
+}
+
+/* Callback for archive_close on an ARCHIVE file. */
+static int archive_libarchive_close(void *handle)
+{
+	archive_cache_free(handle);
+	return 0;
+}
+
+/* Callback for archive_has_entry on an ARCHIVE file. */
+static int archive_libarchive_has_entry(void *handle, const char *fname)
+{
+	return archive_cache_find(handle, fname) != NULL;
+}
+
+/* Callback for archive_walk on an ARCHIVE file. */
+static int archive_libarchive_walk(
+		void *handle, void *arg,
+		int (*callback)(const char *name, void *arg))
+{
+	return archive_cache_walk(handle, arg, callback);
+}
+
+/* Callback for archive_read_file on an ARCHIVE file. */
+static int archive_libarchive_read_file(
+		void *handle, const char *fname, uint8_t **data,
+		uint32_t *size, int64_t *mtime)
+{
+	struct archive_cache *c = archive_cache_find(handle, fname);
+
+	if (!c)
+		return 1;
+
+	if (!c->has_data) {
+		/* TODO(hungte) Re-read. */
+		ERROR("Not in the cache: %s\n", fname);
+		return 1;
+	}
+
+	if (mtime)
+		*mtime = c->mtime;
+	if (size)
+		*size = c->size;
+	*data = (uint8_t *)malloc(c->size + 1);
+	if (!*data) {
+		ERROR("Out of memory when reading: %s\n", c->name);
+		return 1;
+	}
+	memcpy(*data, c->data, c->size);
+	(*data)[c->size] = '\0';
+	return 0;
+}
+
+/* Callback for archive_write_file on an ARCHIVE file. */
+static int archive_libarchive_write_file(
+		void *handle, const char *fname, uint8_t *data, uint32_t size,
+		int64_t mtime)
+{
+	ERROR("Not implemented!\n");
+	return 1;
+}
+#endif
+
 #ifdef HAVE_LIBZIP
 
 /* Callback for archive_open on a ZIP file. */
@@ -368,7 +593,7 @@
 		return NULL;
 	}
 
-	ar = (struct u_archive *)malloc(sizeof(*ar));
+	ar = (struct u_archive *)calloc(sizeof(*ar), 1);
 	if (!ar) {
 		ERROR("Internal error: allocation failure.\n");
 		return NULL;
@@ -384,22 +609,48 @@
 		ar->has_entry = archive_fallback_has_entry;
 		ar->read_file = archive_fallback_read_file;
 		ar->write_file = archive_fallback_write_file;
-	} else {
+	}
+
+	/* Format detection must try ZIP (the official format) first. */
 #ifdef HAVE_LIBZIP
-		VB2_DEBUG("Found file, use ZIP driver: %s\n", path);
-		ar->open = archive_zip_open;
-		ar->close = archive_zip_close;
-		ar->walk = archive_zip_walk;
-		ar->has_entry = archive_zip_has_entry;
-		ar->read_file = archive_zip_read_file;
-		ar->write_file = archive_zip_write_file;
-#else
-		ERROR("Found file, but no drivers were enabled: %s\n", path);
+	if (!ar->open) {
+		ar->handle = archive_zip_open(path);
+
+		if (ar->handle) {
+			VB2_DEBUG("Found a ZIP file: %s\n", path);
+			ar->open = archive_zip_open;
+			ar->close = archive_zip_close;
+			ar->walk = archive_zip_walk;
+			ar->has_entry = archive_zip_has_entry;
+			ar->read_file = archive_zip_read_file;
+			ar->write_file = archive_zip_write_file;
+		}
+	}
+#endif
+
+	/* LIBARCHIVE must be the last driver. */
+#ifdef HAVE_LIBARCHIVE
+	if (!ar->open) {
+		VB2_DEBUG("Found a file, use libarchive: %s\n", path);
+		ar->open = archive_libarchive_open;
+		ar->close = archive_libarchive_close;
+		ar->walk = archive_libarchive_walk;
+		ar->has_entry = archive_libarchive_has_entry;
+		ar->read_file = archive_libarchive_read_file;
+		ar->write_file = archive_libarchive_write_file;
+	}
+#endif
+
+	if (!ar->open) {
+		ERROR("Found a file, but no drivers were selected: %s\n", path);
 		free(ar);
 		return NULL;
-#endif
 	}
-	ar->handle = ar->open(path);
+
+	/* Some drivers may have already opened the archive. */
+	if (!ar->handle)
+		ar->handle = ar->open(path);
+
 	if (!ar->handle) {
 		ERROR("Failed to open archive: %s\n", path);
 		free(ar);