mount-encrypted: write key to disk initially

The encrypted partition has been plagued with TPM problems, which means
systems that have a wedged TPM, or interrupt the TPM Ownership, Lockbox
creation, etc, all fail to keep the encrypted partition across a reboot.
As a result, we're forced to write the encryption key to disk initially,
and then throw it away once the system key from NVRAM can be used to
encrypt it.

On most systems that have a sane unowned TPM, the key will only be on
disk until the first login finishes and Cryptohome can Finalize the
NVRAM area. For all the other systems, they will continue to run, but
with their encryption key effectively in the clear. Technically, this
is not a regression from R21, so at least we can move forward and work
to improve this in the future.

Some attempt is made to wipe out the key, but this is especially ugly for
SSDs, since doing a "shred" just means the blocks will get moved around.
When ext4 supports "secure delete", we can move to that instead.

BUG=chromium-os:32951
TEST=alex build, manual testing

Change-Id: I9b9a0190ea0f47a277a150eb0882e4a507ff2927
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/29123
Reviewed-by: Gaurav Shah <gauravsh@chromium.org>
diff --git a/utility/mount-encrypted.c b/utility/mount-encrypted.c
index 5c3dd67..26f1542 100644
--- a/utility/mount-encrypted.c
+++ b/utility/mount-encrypted.c
@@ -58,6 +58,7 @@
 static const size_t kExt4MinBytes = 16 * 1024 * 1024;
 static const char * const kStaticKeyDefault = "default unsafe static key";
 static const char * const kStaticKeyFactory = "factory unsafe static key";
+static const char * const kStaticKeyFinalizationNeeded = "needs finalization";
 static const int kModeProduction = 0;
 static const int kModeFactory = 1;
 static const int kCryptAllowDiscard = 1;
@@ -102,6 +103,7 @@
 static gchar *rootdir = NULL;
 static gchar *stateful_mount = NULL;
 static gchar *key_path = NULL;
+static gchar *needs_finalization_path = NULL;
 static gchar *block_path = NULL;
 static gchar *encrypted_mount = NULL;
 static gchar *dmcrypt_name = NULL;
@@ -256,10 +258,10 @@
  *
  * NVRAM area cases:
  *  - no NVRAM area at all:
- *    - if cr48, assume this is going to always be missing the lockbox:
- *      - expect no lockbox (migration allowed).
- *    - otherwise, assume an interrupted install
- *      - expect modern lockbox (no migration allowed).
+ *    - interrupted install (cryptohome has the TPM password)
+ *    - ancient device (cr48, cryptohome has thrown away TPM password)
+ *    - broken device (cryptohome has thrown away/never had TPM password)
+ *      - must expect worst-case: no lockbox ever, and migration allowed.
  *  - defined NVRAM area, but not written to ("Finalized"); interrupted OOBE:
  *    - if legacy size, allow migration.
  *    - if not, disallow migration.
@@ -269,14 +271,12 @@
  *
  * When returning 1: (NVRAM area found and used)
  *  - *digest populated with NVRAM area entropy.
- *  - *old_lockbox indicates which lockbox NVRAM area type was used.
- *  - *static_key will always be 0.
+ *  - *migrate is 1 for NVRAM v1, 0 for NVRAM v2.
  * When returning 0: (NVRAM missing or error)
  *  - *digest untouched.
- *  - *old_lockbox indicates future expected lockbox size (after Finalize).
- *  - *static_key=1 only if TPM owned, lockbox missing, and device is cr48.
+ *  - *migrate always 1
  */
-static int get_nvram_key(uint8_t *digest, int *old_lockbox, int *static_key)
+static int get_nvram_key(uint8_t *digest, int *migrate)
 {
 	uint8_t owned = 0;
 	uint8_t value[kLockboxSizeV2], bytes_anded, bytes_ored;
@@ -284,9 +284,8 @@
 	uint8_t *rand_bytes;
 	uint32_t rand_size;
 
-	/* Expect to use a new lockbox by default, and require NVRAM key. */
-	*old_lockbox = 0;
-	*static_key = 0;
+	/* Default to allowing migration (disallow when owned with NVRAMv2). */
+	*migrate = 1;
 
 	/* Ignore unowned TPM's NVRAM area. */
 	result = tpm_owned(&owned);
@@ -315,28 +314,13 @@
 		if (result != TPM_SUCCESS) {
 			/* No NVRAM area at all. */
 			INFO("No NVRAM area defined.");
-
-			/* TPM is owned, without an NVRAM area. If this is a
-			 * Cr48, mark this as using an old NVRAM area to
-			 * allow migration, and mark this as an ancient
-			 * install that did not even know to create a lockbox
-			 * at all. If this is actually a Cr48 that has had
-			 * its OOBE interrupted at the perfect moment, then
-			 * the user is going to lose their settings. This
-			 * should be extremely rare.
-			 */
-			if (is_cr48()) {
-				*static_key = 1;
-				*old_lockbox = 1;
-			}
-
 			return 0;
 		}
 		/* Legacy NVRAM area. */
-		INFO("Legacy NVRAM area found.");
-		*old_lockbox = 1;
+		INFO("Version 1 NVRAM area found.");
 	} else {
-		INFO("NVRAM area found.");
+		*migrate = 0;
+		INFO("Version 2 NVRAM area found.");
 	}
 
 	debug_dump_hex("nvram", value, size);
@@ -354,7 +338,7 @@
 	}
 
 	/* Choose random bytes to use based on NVRAM version. */
-	if (*old_lockbox) {
+	if (*migrate) {
 		rand_bytes = value;
 		rand_size = size;
 	} else {
@@ -402,26 +386,16 @@
 	 * NVRAM.
 	 */
 	if (has_chromefw()) {
-		int rc, static_key = 0;
-		rc = get_nvram_key(digest, migration_allowed, &static_key);
+		int rc;
+		rc = get_nvram_key(digest, migration_allowed);
 
 		if (rc) {
-			/* Use populated NVRAM area. */
 			INFO("Using NVRAM as system key; already populated%s.",
-				migration_allowed ? " (legacy)" : "");
-			return rc;
+				*migration_allowed ? " (legacy)" : "");
 		} else {
-			if (!static_key) {
-				INFO("Using NVRAM as system key; area needed.");
-				return rc;
-			}
-			/* If a static key is allowed, it means the TPM is
-			 * already owned, is missing the NVRAM area, and is
-			 * running on a Cr48. In this special case, fall
-			 * through to the other key methods below, since
-			 * the NVRAM area will never be populated.
-			 */
+			INFO("Using NVRAM as system key; finalization needed.");
 		}
+		return rc;
 	}
 
 	if (get_key_from_cmdline(digest)) {
@@ -602,6 +576,16 @@
 	return 1;
 }
 
+static void finalized(void)
+{
+	/* TODO(keescook): once ext4 supports secure delete, just unlink. */
+	if (access(needs_finalization_path, R_OK) == 0) {
+		/* This is nearly useless on SSDs. */
+		shred(needs_finalization_path);
+		unlink(needs_finalization_path);
+	}
+}
+
 static void finalize(uint8_t *system_key, char *encryption_key)
 {
 	struct bind_mount *bind;
@@ -612,6 +596,8 @@
 		return;
 	}
 
+	finalized();
+
 	for (bind = bind_mounts; bind->src; ++ bind) {
 		if (!bind->pending || access(bind->pending, R_OK))
 			continue;
@@ -623,6 +609,20 @@
 	}
 }
 
+static void needs_finalization(char *encryption_key)
+{
+	uint8_t useless_key[DIGEST_LENGTH];
+	sha256((char *)kStaticKeyFinalizationNeeded, useless_key);
+
+	INFO("Writing finalization intent %s.", needs_finalization_path);
+	if (!keyfile_write(needs_finalization_path, useless_key,
+			   encryption_key)) {
+		ERROR("Failed to write %s -- aborting.",
+		      needs_finalization_path);
+		return;
+	}
+}
+
 /* This triggers the live encryption key to be written to disk, encrypted
  * by the system key. It is intended to be called by Cryptohome once the
  * TPM is done being set up. If the system key is passed as an argument,
@@ -746,11 +746,21 @@
 		 */
 		migrate_allowed = 0;
 	} else {
-		INFO("Generating new encryption key.");
-		encryption_key = choose_encryption_key();
-		if (!encryption_key)
-			return 0;
-		rebuild = 1;
+		uint8_t useless_key[DIGEST_LENGTH];
+		sha256((char *)kStaticKeyFinalizationNeeded, useless_key);
+		encryption_key = keyfile_read(needs_finalization_path,
+					      useless_key);
+		if (!encryption_key) {
+			/* This is a brand new system with no keys. */
+			INFO("Generating new encryption key.");
+			encryption_key = choose_encryption_key();
+			if (!encryption_key)
+				return 0;
+			rebuild = 1;
+		} else {
+			ERROR("Finalization unfinished! " \
+			      "Encryption key still on disk!");
+		}
 	}
 
 	if (rebuild) {
@@ -924,14 +934,34 @@
 		}
 	}
 
-	/* Devices that are not using NVRAM for their system key do not
-	 * need to wait for the NVRAM area to be populated by Cryptohome
-	 * and a call to "finalize". Devices that already have the NVRAM
-	 * area populated and are being rebuilt don't need to wait for
-	 * Cryptohome because the NVRAM area isn't going to change.
+	/* When we are creating the encrypted mount for the first time,
+	 * either finalize immediately, or write the encryption key to
+	 * disk (*sigh*) to handle the seemingly endless broken or
+	 * wedged TPM states.
 	 */
-	if (rebuild && has_system_key)
-		finalize(system_key, encryption_key);
+	if (rebuild) {
+		/* Devices that already have the NVRAM area populated and
+		 * are being rebuilt don't need to wait for Cryptohome
+		 * because the NVRAM area isn't going to change.
+		 *
+		 * Devices that do not have the NVRAM area populated
+		 * may potentially never have the NVRAM area populated,
+		 * which means we have to write the encryption key to
+		 * disk until we finalize. Once secure deletion is
+		 * supported on ext4, this won't be as horrible.
+		 */
+		if (has_system_key)
+			finalize(system_key, encryption_key);
+		else
+			needs_finalization(encryption_key);
+	} else {
+		/* If we're not rebuilding and we have a sane system
+		 * key, then we must have finalized. Force any required
+		 * clean up.
+		 */
+		if (has_system_key)
+			finalized();
+	}
 
 	free(lodev);
 	return 1;
@@ -1075,8 +1105,7 @@
 	uint8_t system_key[DIGEST_LENGTH];
 	uint8_t owned = 0;
 	struct bind_mount *mnt;
-	int old_lockbox = -1;
-	int static_key = -1;
+	int migrate = -1;
 
 	printf("TPM: %s\n", has_tpm ? "yes" : "no");
 	if (has_tpm) {
@@ -1087,13 +1116,12 @@
 	printf("CR48: %s\n", is_cr48() ? "yes" : "no");
 	if (has_chromefw()) {
 		int rc;
-		rc = get_nvram_key(system_key, &old_lockbox, &static_key);
+		rc = get_nvram_key(system_key, &migrate);
 		if (!rc)
-			printf("NVRAM: missing%s.\n",
-				static_key ? " (static key allowed)" : "");
+			printf("NVRAM: missing.\n");
 		else {
 			printf("NVRAM: %s, %s.\n",
-				old_lockbox ? "legacy" : "modern",
+				migrate ? "legacy" : "modern",
 				rc ? "available" : "ignored");
 		}
 	}
@@ -1193,6 +1221,9 @@
 	if (asprintf(&key_path, "%s%s", rootdir,
 		     STATEFUL_MNT "/encrypted.key") == -1)
 		goto fail;
+	if (asprintf(&needs_finalization_path, "%s%s", rootdir,
+		     STATEFUL_MNT "/encrypted.needs-finalization") == -1)
+		goto fail;
 	if (asprintf(&block_path, "%s%s", rootdir,
 		     STATEFUL_MNT "/encrypted.block") == -1)
 		goto fail;
@@ -1242,15 +1273,10 @@
 	check_mount_states();
 
 	okay = setup_encrypted(mode);
-	if (!okay) {
-		INFO("Setup failed -- clearing files and retrying.");
-		unlink(key_path);
-		unlink(block_path);
-		okay = setup_encrypted(mode);
-	}
+	/* If we fail, let chromeos_startup handle the stateful wipe. */
 
 	INFO_DONE("Done.");
 
 	/* Continue boot. */
-	return !okay;
+	return okay ? EXIT_SUCCESS : EXIT_FAILURE;
 }
diff --git a/utility/mount-helpers.c b/utility/mount-helpers.c
index 927adb1..893d43e 100644
--- a/utility/mount-helpers.c
+++ b/utility/mount-helpers.c
@@ -147,6 +147,48 @@
 	return binary;
 }
 
+/* Overwrite file contents. Useless on SSD. :( */
+void shred(const char *pathname)
+{
+	uint8_t patterns[] = { 0xA5, 0x5A, 0xFF, 0x00 };
+	FILE *target;
+	struct stat info;
+	uint8_t *pattern;
+	int fd, i;
+
+	/* Give up if we can't safely open or stat the target. */
+	if ((fd = open(pathname, O_WRONLY | O_NOFOLLOW)) < 0) {
+		PERROR(pathname);
+		return;
+	}
+	if (fstat(fd, &info)) {
+		close(fd);
+		PERROR(pathname);
+		return;
+	}
+	if (!(target = fdopen(fd, "w"))) {
+		close(fd);
+		PERROR(pathname);
+		return;
+	}
+	/* Ignore errors here, since there's nothing we can really do. */
+	pattern = malloc(info.st_size);
+	for (i = 0; i < sizeof(patterns); ++i) {
+		memset(pattern, patterns[i], info.st_size);
+		if (fseek(target, 0, SEEK_SET))
+			PERROR(pathname);
+		if (fwrite(pattern, info.st_size, 1, target) != 1)
+			PERROR(pathname);
+		if (fflush(target))
+			PERROR(pathname);
+		if (fdatasync(fd))
+			PERROR(pathname);
+	}
+	free(pattern);
+	/* fclose() closes the fd too. */
+	fclose(target);
+}
+
 static int is_loop_device(int fd)
 {
 	struct stat info;
@@ -662,6 +704,13 @@
 	length = cipher_length + final_len;
 
 	DEBUG("Writing %zu bytes to %s", length, keyfile);
+	/* TODO(keescook): use fd here, and set secure delete. Unsupported
+	 * by ext4 currently. :(
+	 * 	int f;
+	 * 	ioctl(fd, EXT2_IOC_GETFLAGS, &f);
+	 * 	f |= EXT2_SECRM_FL;
+	 * 	ioctl(fd, EXT2_IOC_SETFLAGS, &f);
+	 */
 	if (!g_file_set_contents(keyfile, (gchar *)cipher, length, &error)) {
 		ERROR("Unable to write %s: %s", keyfile, error->message);
 		g_error_free(error);
diff --git a/utility/mount-helpers.h b/utility/mount-helpers.h
index ee74535..c5554b3 100644
--- a/utility/mount-helpers.h
+++ b/utility/mount-helpers.h
@@ -14,6 +14,7 @@
 int same_vfs(const char *mnt_a, const char *mnt_b);
 char *stringify_hex(uint8_t *binary, size_t length);
 uint8_t *hexify_string(char *string, uint8_t *binary, size_t length);
+void shred(const char *keyfile);
 
 /* Loopback device attach/detach helpers. */
 gchar *loop_attach(int fd, const char *name);