init: mount-encrypted: don't regenerate encryption key when TPM error

1. The /dev/tpm0 should be available before mount_encrypted try to load
the system key.
2. The system key should always persist on TPM2.0 device.

We should reboot the device when it didn't meet those two conditions.
And showing the recovery screen if there is no way to recover the data.

BUG=b:175152256, b:174978923, b:140530417
TEST=check the data in encstateful didn't disappear with TPM error
TEST=check OOBE worked as normal
TEST=the device isn't going to reboot loop when /dev/tpm0 always absent
TEST=unit-tests of cryptohome

Change-Id: I610e71e21064918d6f3789bdce9ea64217b1c906
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform2/+/2592070
Tested-by: Yi Chou <yich@google.com>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Andrey Pronin <apronin@chromium.org>
Reviewed-by: Leo Lai <cylai@google.com>
Reviewed-by: Achuith Bhandarkar <achuith@chromium.org>
Commit-Queue: Yi Chou <yich@google.com>
diff --git a/cryptohome/mount_encrypted/encryption_key.cc b/cryptohome/mount_encrypted/encryption_key.cc
index 3754a4d..f442a33 100644
--- a/cryptohome/mount_encrypted/encryption_key.cc
+++ b/cryptohome/mount_encrypted/encryption_key.cc
@@ -269,6 +269,14 @@
     }
 
     if (!system_key_.empty() && loader_->Persist() != RESULT_SUCCESS) {
+      if (USE_TPM2) {
+        // The system_key shouldn't fail to persist in TPM2 case, it would only
+        // happen when we had some TPM errors.
+        LOG(ERROR) << "Failed to persist the system key.";
+        // We shouldn't continue to regenerate the existing encryption key.
+        system_key_status_ = SystemKeyStatus::kUnknown;
+        return RESULT_FAIL_FATAL;
+      }
       system_key_.clear();
     }
   }
diff --git a/cryptohome/mount_encrypted/encryption_key_unittest.cc b/cryptohome/mount_encrypted/encryption_key_unittest.cc
index bc3d56b..90dce57 100644
--- a/cryptohome/mount_encrypted/encryption_key_unittest.cc
+++ b/cryptohome/mount_encrypted/encryption_key_unittest.cc
@@ -273,16 +273,20 @@
     EXPECT_TRUE(base::PathExists(key_->key_path()));
   }
 
+  void ExpectSystemKeyFailed() {
+    EXPECT_EQ(key_->LoadChromeOSSystemKey(), RESULT_FAIL_FATAL);
+  }
+
   void ExpectFreshKey() {
-    key_->LoadChromeOSSystemKey();
-    key_->LoadEncryptionKey();
+    EXPECT_EQ(key_->LoadChromeOSSystemKey(), RESULT_SUCCESS);
+    EXPECT_EQ(key_->LoadEncryptionKey(), RESULT_SUCCESS);
     EXPECT_EQ(key_->encryption_key().size(), kEncryptionKeySize);
     EXPECT_TRUE(key_->is_fresh());
   }
 
   void ExpectExistingKey(const uint8_t* expected_key) {
-    key_->LoadChromeOSSystemKey();
-    key_->LoadEncryptionKey();
+    EXPECT_EQ(key_->LoadChromeOSSystemKey(), RESULT_SUCCESS);
+    EXPECT_EQ(key_->LoadEncryptionKey(), RESULT_SUCCESS);
     EXPECT_EQ(key_->encryption_key().size(), kEncryptionKeySize);
     if (expected_key) {
       EXPECT_EQ(
@@ -343,10 +347,8 @@
 TEST_F(EncryptionKeyTest, TpmOwnedNoSpaces) {
   SetOwned();
 
-  ExpectFreshKey();
-  EXPECT_EQ(EncryptionKeyStatus::kFresh, key_->encryption_key_status());
-  ExpectNeedsFinalization();
-  EXPECT_EQ(SystemKeyStatus::kFinalizationPending, key_->system_key_status());
+  ExpectSystemKeyFailed();
+  EXPECT_EQ(SystemKeyStatus::kUnknown, key_->system_key_status());
 }
 
 TEST_F(EncryptionKeyTest, TpmExistingSpaceNoKeyFile) {
@@ -379,10 +381,8 @@
              sizeof(kEncStatefulTpm2Contents));
   WriteWrappedKey(key_->key_path(), kWrappedKeyEncStatefulTpm2);
 
-  ExpectFreshKey();
-  EXPECT_EQ(EncryptionKeyStatus::kFresh, key_->encryption_key_status());
-  ExpectNeedsFinalization();
-  EXPECT_EQ(SystemKeyStatus::kFinalizationPending, key_->system_key_status());
+  ExpectSystemKeyFailed();
+  EXPECT_EQ(SystemKeyStatus::kUnknown, key_->system_key_status());
 }
 
 TEST_F(EncryptionKeyTest, TpmExistingSpaceNotYetWritten) {
diff --git a/cryptohome/mount_encrypted/mount_encrypted.cc b/cryptohome/mount_encrypted/mount_encrypted.cc
index 1261e84..b211972 100644
--- a/cryptohome/mount_encrypted/mount_encrypted.cc
+++ b/cryptohome/mount_encrypted/mount_encrypted.cc
@@ -316,6 +316,12 @@
   auto loader = mount_encrypted::SystemKeyLoader::Create(&tpm, rootdir);
   mount_encrypted::EncryptionKey key(loader.get(), rootdir);
   if (has_chromefw()) {
+    if (!tpm.available()) {
+      // The TPM should be available before we load the system_key.
+      LOG(ERROR) << "TPM not available.";
+      // We shouldn't continue to load the system_key.
+      return RESULT_FAIL_FATAL;
+    }
     rc = key.LoadChromeOSSystemKey();
   } else {
     rc = key.SetInsecureFallbackSystemKey();
diff --git a/init/chromeos_startup b/init/chromeos_startup
index 294a734..10ae114 100755
--- a/init/chromeos_startup
+++ b/init/chromeos_startup
@@ -16,6 +16,11 @@
 # encrypted stateful partition.
 ENCRYPTED_STATEFUL_MNT="/mnt/stateful_partition/encrypted"
 
+# Flag file indicating that mount encrypted stateful failed last time.
+# If the file is present and mount_encrypted failed again, machine would enter
+# self-repair mode.
+MOUNT_ENCRYPTED_FAILED_FILE="/mnt/stateful_partition/mount_encrypted_failed"
+
 # USE_ENCRYPTED_REBOOT_VAULT determines whether the encrypted reboot vault
 # should be created/mounted.
 USE_ENCRYPTED_REBOOT_VAULT=1
@@ -526,9 +531,15 @@
 mount_or_fail --bind /mnt/stateful_partition/home /home
 
 if ! do_mount_var_and_home_chronos; then
-  add_clobber_crash_report "encstateful"
-  cleanup_mounts "var and home"
+  if [ ! -O "${MOUNT_ENCRYPTED_FAILED_FILE}" ]; then
+    touch "${MOUNT_ENCRYPTED_FAILED_FILE}"
+  else
+    crossystem recovery_request=1
+  fi
+  reboot
+  exit 0
 fi
+rm -f "${MOUNT_ENCRYPTED_FAILED_FILE}"
 remember_mount "${ENCRYPTED_STATEFUL_MNT}"
 
 # Setup the encrypted reboot vault once the encrypted stateful partition
diff --git a/init/upstart/test-init/test_utils.sh b/init/upstart/test-init/test_utils.sh
index ad84e2d..6e8a7ef 100644
--- a/init/upstart/test-init/test_utils.sh
+++ b/init/upstart/test-init/test_utils.sh
@@ -5,6 +5,12 @@
 # Utility functions for chromeos_startup to run for test images (loaded by
 # dev_utils.sh).
 
+# Flag file indicating that mount encrypted stateful failed last time.
+# If the file is present and mount_encrypted failed again, machine would enter
+# self-repair mode.
+# It should be the same as MOUNT_ENCRYPTED_FAILED_FILE in chromeos_startup.
+MOUNT_ENCRYPTED_FAILED_FILE="/mnt/stateful_partition/mount_encrypted_failed"
+
 # Load factory utilities.
 . /usr/share/cros/factory_utils.sh
 
@@ -39,6 +45,13 @@
     # call is no-op.
     create_system_key
 
+    if [ ! -O "${MOUNT_ENCRYPTED_FAILED_FILE}" ]; then
+      # Try to use the original handler in chromeos_startup.
+      # It should not wipe whole stateful partition in this case.
+      mount_var_and_home_chronos
+      return $?
+    fi
+
     if ! mount_var_and_home_chronos; then
       # Try to re-construct encrypted folders, otherwise such failure will lead
       # to wiping whole stateful partition (including all helpful programs in