tpm_lite: stub: retry in case of TPM comm error

This CL retries reads and writes from/to TPM device if an error
is returned by read()/write(), up to 3 total attempts.

This is useful case of transient TPM communication errors that go
away after a single retry. Without this CL, after such errors the
encstateful key might be regenerated and encstateful data wiped.

BRANCH=none
BUG=chromium:702724
TEST=1) normal boot still works;
     2) simulate a single error, verify that it retries.

Change-Id: I259882209df0aad66cd083729f746ea45909922b
Reviewed-on: https://chromium-review.googlesource.com/1067939
Commit-Ready: Andrey Pronin <apronin@chromium.org>
Tested-by: Andrey Pronin <apronin@chromium.org>
Reviewed-by: Andrey Pronin <apronin@chromium.org>
diff --git a/firmware/stub/tpm_lite_stub.c b/firmware/stub/tpm_lite_stub.c
index 004eeaf..c063858 100644
--- a/firmware/stub/tpm_lite_stub.c
+++ b/firmware/stub/tpm_lite_stub.c
@@ -33,6 +33,7 @@
 /* Retry failed open()s for 5 seconds in 10ms polling intervals. */
 #define OPEN_RETRY_DELAY_NS (10 * 1000 * 1000)
 #define OPEN_RETRY_MAX_NUM  500
+#define COMM_RETRY_MAX_NUM  3
 
 /* TODO: these functions should pass errors back rather than returning void */
 /* TODO: if the only callers to these are just wrappers, should just
@@ -90,20 +91,57 @@
 			       "the TPM device was not opened.  " \
 			       "Forgot to call TlclLibInit?\n");
 	} else {
-		int n = write(tpm_fd, in, in_len);
-		if (n != in_len) {
-			return DoError(TPM_E_WRITE_FAILURE,
-				       "write failure to TPM device: %s\n",
-				       strerror(errno));
+		int n;
+		int retries = 0;
+		int first_errno = 0;
+
+		/* Write command. Retry in case of communication errors.
+		 */
+		for ( ; retries < COMM_RETRY_MAX_NUM; ++retries) {
+			n = write(tpm_fd, in, in_len);
+			if (n >= 0) {
+				break;
+			}
+			if (retries == 0) {
+				first_errno = errno;
+			}
+			VB2_DEBUG("TPM: write attempt %d failed: %s\n",
+				  retries + 1, strerror(errno));
 		}
-		n = read(tpm_fd, response, sizeof(response));
+		if (n < 0) {
+			return DoError(TPM_E_WRITE_FAILURE,
+				       "write failure to TPM device: %s "
+				       "(first error %d)\n",
+				       strerror(errno), first_errno);
+		} else if (n != in_len) {
+			return DoError(TPM_E_WRITE_FAILURE,
+				       "bad write size to TPM device: %d vs %u "
+				       "(%d retries, first error %d)\n",
+				       n, in_len, retries, first_errno);
+		}
+
+		/* Read response. Retry in case of communication errors.
+		 */
+		for (retries = 0, first_errno = 0;
+		     retries < COMM_RETRY_MAX_NUM; ++retries) {
+			n = read(tpm_fd, response, sizeof(response));
+			if (n >= 0) {
+				break;
+			}
+			if (retries == 0) {
+				first_errno = errno;
+			}
+			VB2_DEBUG("TPM: read attempt %d failed: %s\n",
+				  retries + 1, strerror(errno));
+		}
 		if (n == 0) {
 			return DoError(TPM_E_READ_EMPTY,
 				       "null read from TPM device\n");
 		} else if (n < 0) {
 			return DoError(TPM_E_READ_FAILURE,
-				       "read failure from TPM device: %s\n",
-				       strerror(errno));
+				       "read failure from TPM device: %s "
+				       "(first error %d)\n",
+				       strerror(errno), first_errno);
 		} else {
 			if (n > *pout_len) {
 				return DoError(TPM_E_RESPONSE_TOO_LARGE,