auto_updater_transfer: add metrics to track fallback devserver calls

Add metrics to measure the number of times fallback calls to devservers
are made so that the fallbacks can be removed completely in the future.

BUG=chromium:1059008
TEST=None

Change-Id: Ib5faefb71ae8c6f1d5558327cf20c44711d78294
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/2285994
Tested-by: Sanika Kulkarni <sanikak@chromium.org>
Reviewed-by: Amin Hassani <ahassani@chromium.org>
Commit-Queue: Amin Hassani <ahassani@chromium.org>
Auto-Submit: Sanika Kulkarni <sanikak@chromium.org>
diff --git a/lib/auto_updater_transfer.py b/lib/auto_updater_transfer.py
index 0e8ef8c..a5555a7 100644
--- a/lib/auto_updater_transfer.py
+++ b/lib/auto_updater_transfer.py
@@ -48,6 +48,7 @@
 
 from chromite.lib import cros_build_lib
 from chromite.lib import cros_logging as logging
+from chromite.lib import metrics
 from chromite.lib import nebraska_wrapper
 from chromite.lib import osutils
 from chromite.lib import retry_util
@@ -335,6 +336,8 @@
           complete list of accepted keyword arguments.
     """
     self._staging_server = staging_server
+    self._fallback_metrics = metrics.Counter(
+        'chromeos/autotest/devserver/remote_devserver_call')
     super(LabTransfer, self).__init__(*args, **kwargs)
 
   @property
@@ -358,6 +361,22 @@
     """
     return r'.*/(R[0-9]+-)(?P<image_version>.+)'
 
+  def _UpdateFallbackMetrics(self, ssh_used, success, hostname=None):
+    """Updates metrics that track the fallback devserver calls.
+
+    Args:
+      ssh_used: Indicates whether the devserver was ssh-ed into first.
+      success: Whether the command completed successfully or not.
+      hostname: Hostname of the devserver on which payloads are staged. If None,
+         then the hostname is derived from the self._staging_server URL.
+    """
+    if not hostname:
+      hostname = urllib.parse.urlparse(self._staging_server).hostname
+
+    self._fallback_metrics.increment(fields={'dev_server': hostname,
+                                             'ssh_used': ssh_used,
+                                             'success': success})
+
   def _RemoteDevserverCall(self, cmd, stdout=False):
     """Runs a command on a remote devserver by sshing into it.
 
@@ -370,11 +389,15 @@
     """
     ip = urllib.parse.urlparse(self._staging_server).hostname
     try:
+      success = True
       return cros_build_lib.run(['ssh', ip] + cmd, log_output=True,
                                 stdout=stdout)
     except cros_build_lib.RunCommandError:
+      success = False
       logging.error('Remote devserver call failed.')
       raise
+    finally:
+      self._UpdateFallbackMetrics(ssh_used=True, success=success, hostname=ip)
 
   def _CheckPayloads(self, payload_name):
     """Runs the curl command that checks if payloads have been staged."""
@@ -386,10 +409,11 @@
     except cros_build_lib.RunCommandError as e:
       logging.error('Could not verify if %s was staged at %s. Received '
                     'exception: %s', payload_name, payload_url, e)
-      # TODO(crbug.com/1059008): The following code blck is a fallback. It
+      # TODO(crbug.com/1059008): The following code block is a fallback. It
       # should be removed once _RemoteDevserverCall has been proven to be
       # reliable.
       try:
+        success = True
         # TODO(crbug.com/1033187): Remove log_output parameter passed to
         # retry_util.RunCurl after the bug is fixed. The log_output=True option
         # has been added to correct what seems to be a timing issue in
@@ -399,8 +423,11 @@
         # open, thus avoiding the failure.
         retry_util.RunCurl(curl_args=cmd[1:], log_output=True)
       except retry_util.DownloadError as e:
+        success = False
         raise ChromiumOSTransferError('Payload %s does not exist at %s: %s' %
                                       (payload_name, payload_url, e))
+      finally:
+        self._UpdateFallbackMetrics(ssh_used=False, success=success)
 
   def CheckPayloads(self):
     """Verify that all required payloads are staged on staging server."""
@@ -564,10 +591,14 @@
               payload_props_filename[len(prefix):])
 
         try:
+          success = True
           retry_util.RunCurl(cmd[1:])
         except retry_util.DownloadError as e:
+          success = False
           raise ChromiumOSTransferError('Unable to download %s: %s' %
                                         (payload_props_filename, e))
+        finally:
+          self._UpdateFallbackMetrics(ssh_used=False, success=success)
 
       self._local_payload_props_path = payload_props_path
     return self._local_payload_props_path
@@ -589,9 +620,13 @@
       # TODO(crbug.com/1059008): Fallback in case the try block above fails.
       # Should be removed once reliable.
       try:
+        success = True
         proc = retry_util.RunCurl(cmd[1:], stdout=True)
       except cros_build_lib.RunCommandError as e:
+        success = False
         raise ChromiumOSTransferError('Unable to get payload size: %s' % e)
+      finally:
+        self._UpdateFallbackMetrics(ssh_used=False, success=success)
 
     pattern = re.compile(r'Content-Length: [0-9]+', re.I)
     match = pattern.findall(proc.output)