cheets_{CTS,GTS}: Configure retry count based on branch number.

Previous implementation that parsed CHROMEOS_RELEASE_DESCRIPTION did not
return the right channel, and hence we had to add hack for release branches
to adjust the retry count.

Instead, this new implementation uses the "branch number" field of
CHROMEOS_RELEASE_VERSION to determine the count. Besides, it splits
the configuration for the default retry count and maximum retry count.
(We have control files with max_retry=12 or max_retry=30 with the
intention to do that number of retries, but it was always capped to 9.)

BUG=b:119521922
TEST=cheets_CTS_N, cheets_CTS_P, cheets_GTS, cheets_CTS_Instant on
  dev/beta/stable devices all runs without problems
CQ-DEPEND=CL:*905295

Change-Id: Ic76baf55456988ba988a78fb7f8ae57848f6f6d4
Reviewed-on: https://chromium-review.googlesource.com/1491092
Commit-Ready: Kazuhiro Inaba <kinaba@chromium.org>
Tested-by: Kazuhiro Inaba <kinaba@chromium.org>
Reviewed-by: Ilja H. Friedel <ihf@chromium.org>
(cherry picked from commit 56a9a474fa47c34bcd3f30fa2a5cd8ec5a65ec9b)
Reviewed-on: https://chromium-review.googlesource.com/c/1494727
Reviewed-by: Kazuhiro Inaba <kinaba@chromium.org>
Commit-Queue: Kazuhiro Inaba <kinaba@chromium.org>
Trybot-Ready: Kazuhiro Inaba <kinaba@chromium.org>
diff --git a/server/cros/tradefed_constants.py b/server/cros/tradefed_constants.py
index 79a6837..8bb1120 100644
--- a/server/cros/tradefed_constants.py
+++ b/server/cros/tradefed_constants.py
@@ -48,3 +48,6 @@
 # longer than on hardware or bare metal.
 LOGIN_BOARD_TIMEOUT = {'betty': 300}
 LOGIN_DEFAULT_TIMEOUT = 90
+
+# Approximately assume ChromeOS revision Rdd-xxxxx.y.z with y>=45 as stable.
+APPROXIMATE_STABLE_BRANCH_NUMBER = 45
diff --git a/server/cros/tradefed_test.py b/server/cros/tradefed_test.py
index 6eeb5f2..3f5f2ae 100644
--- a/server/cros/tradefed_test.py
+++ b/server/cros/tradefed_test.py
@@ -47,14 +47,22 @@
     """Base class to prepare DUT to run tests via tradefed."""
     version = 1
 
-    # Default max_retry based on board and channel.
-    _BOARD_RETRY = {}
-    _CHANNEL_RETRY = {'dev': 5}
+    # Default and upperbounds of max_retry, based on board and revision
+    # after branching (that is, 'y' of R74-12345.y.z).
+    #
+    # By default, 0<=y<1 does 5 retries and 1<=y does 10. The |max_retry|
+    # parameter in control files can override the count, within the
+    # _BRANCH_MAX_RETRY limit below.
+    _BRANCH_DEFAULT_RETRY = [(0, 5), (1, 10)]  # dev=5, beta=stable=10
+    _BRANCH_MAX_RETRY = [(0, 5), (1, 10),      # dev=5, beta=10, stable=99
+        (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)]
+    # TODO(kinaba): betty-arcnext
+    _BOARD_MAX_RETRY = {'betty': 0}
 
     _SHARD_CMD = None
     _board_arch = None
     _board_name = None
-    _release_channel = None
+    _release_branch_number = None  # The 'y' of OS version Rxx-xxxxx.y.z
     _android_version = None
     _num_media_bundles = 0
     _perf_results = []
@@ -778,11 +786,12 @@
                       'ro.product.cpu.abilist')).stdout.split(',')
         return self._abilist
 
-    def _get_release_channel(self):
-        """Returns the DUT channel of the image ('dev', 'beta', 'stable')."""
-        if not self._release_channel:
-            self._release_channel = self._hosts[0].get_channel() or 'dev'
-        return self._release_channel
+    def _get_release_branch_number(self):
+        """Returns the DUT branch number (z of Rxx-yyyyy.z.w) or 0 on error."""
+        if not self._release_branch_number:
+            ver = (self._hosts[0].get_release_version() or '').split('.')
+            self._release_branch_number = (int(ver[1]) if len(ver) >= 3 else 0)
+        return self._release_branch_number
 
     def _get_board_arch(self):
         """Return target DUT arch name."""
@@ -812,9 +821,11 @@
         @param max_retry: max_retry specified in the control file.
         @return: number of retries for this specific host.
         """
+        if max_retry is None:
+            max_retry = self._get_branch_retry(self._BRANCH_DEFAULT_RETRY)
         candidate = [max_retry]
         candidate.append(self._get_board_retry())
-        candidate.append(self._get_channel_retry())
+        candidate.append(self._get_branch_retry(self._BRANCH_MAX_RETRY))
         return min(x for x in candidate if x is not None)
 
     def _get_board_retry(self):
@@ -823,19 +834,19 @@
         @return: number of max_retry or None.
         """
         board = self._get_board_name()
-        if board in self._BOARD_RETRY:
-            return self._BOARD_RETRY[board]
+        if board in self._BOARD_MAX_RETRY:
+            return self._BOARD_MAX_RETRY[board]
         logging.info('No board retry specified for board: %s', board)
         return None
 
-    def _get_channel_retry(self):
-        """Returns the maximum number of retries for DUT image channel."""
-        channel = self._get_release_channel()
-        if channel in self._CHANNEL_RETRY:
-            return self._CHANNEL_RETRY[channel]
-        retry = self._CHANNEL_RETRY['dev']
-        logging.warning('Could not establish channel. Using retry=%d.', retry)
-        return retry
+    def _get_branch_retry(self, table):
+        """Returns the retry count for DUT branch number defined in |table|."""
+        number = self._get_release_branch_number()
+        for lowerbound, retry in reversed(table):
+            if lowerbound <= number:
+                return retry
+        logging.warning('Could not establish channel. Using retry=0.')
+        return 0
 
     def _run_precondition_scripts(self, commands, steps):
         """Run precondition scripts on all the hosts."""
@@ -990,6 +1001,7 @@
                                    test_name,
                                    run_template,
                                    retry_template,
+                                   timeout,
                                    needs_push_media=False,
                                    target_module=None,
                                    target_plan=None,
@@ -1003,6 +1015,12 @@
         We first kick off the specified module. Then rerun just the failures
         on the next MAX_RETRY iterations.
         """
+        # On dev and beta channels timeouts are sharp, lenient on stable.
+        self._timeout = timeout
+        if (self._get_release_branch_number() >=
+                constants.APPROXIMATE_STABLE_BRANCH_NUMBER):
+            self._timeout += 3600
+
         if self._should_skip_test(bundle):
             logging.warning('Skipped test %s', ' '.join(test_name))
             return
diff --git a/server/site_tests/cheets_CTS_N/cheets_CTS_N.py b/server/site_tests/cheets_CTS_N/cheets_CTS_N.py
index 135abf6..d391a13 100644
--- a/server/site_tests/cheets_CTS_N/cheets_CTS_N.py
+++ b/server/site_tests/cheets_CTS_N/cheets_CTS_N.py
@@ -19,6 +19,7 @@
 from autotest_lib.server import hosts
 from autotest_lib.server import utils
 from autotest_lib.server.cros import camerabox_utils
+from autotest_lib.server.cros import tradefed_constants as constants
 from autotest_lib.server.cros import tradefed_test
 
 # Maximum default time allowed for each individual CTS module.
@@ -37,8 +38,9 @@
     """Sets up tradefed to run CTS tests."""
     version = 1
 
-    _BOARD_RETRY = {'betty': 0}
-    _CHANNEL_RETRY = {'dev': 9, 'beta': 9, 'stable': 9}
+    _BRANCH_DEFAULT_RETRY = [(0, 10)]  # dev=beta=stable=10
+    _BRANCH_MAX_RETRY = [(0, 10),      # dev=beta=10, stable=99
+        (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)]
     _SHARD_CMD = '--shards'
     # TODO(pwang): b/110966363, remove it once scarlet is fixed.
     _NEED_DEVICE_INFO_BOARDS = ['scarlet', 'veyron_tiger']
@@ -196,16 +198,11 @@
         dut before the log-in for the test is performed.
         @param timeout: time after which tradefed can be interrupted.
         """
-
-        # On dev and beta channels timeouts are sharp, lenient on stable.
-        self._timeout = timeout
-        if self._get_release_channel() == 'stable':
-            self._timeout += 3600
-
         self._run_tradefed_with_retries(
             test_name=test_name,
             run_template=run_template,
             retry_template=retry_template,
+            timeout=timeout,
             target_module=target_module,
             target_plan=target_plan,
             needs_push_media=needs_push_media,
diff --git a/server/site_tests/cheets_GTS/cheets_GTS.py b/server/site_tests/cheets_GTS/cheets_GTS.py
index ac180ee..a7e93a6 100644
--- a/server/site_tests/cheets_GTS/cheets_GTS.py
+++ b/server/site_tests/cheets_GTS/cheets_GTS.py
@@ -31,8 +31,6 @@
     """Sets up tradefed to run GTS tests."""
     version = 1
 
-    _BOARD_RETRY = {'betty': 0}
-    _CHANNEL_RETRY = {'dev': 5, 'beta': 5, 'stable': 5}
     _SHARD_CMD = '--shard-count'
 
     def _tradefed_retry_command(self, template, session_id):
@@ -128,12 +126,6 @@
         @param login_precondition_commands: a list of scripts to be run on the
         dut before the log-in for the test is performed.
         """
-
-        # On dev and beta channels timeouts are sharp, lenient on stable.
-        self._timeout = timeout
-        if self._get_release_channel() == 'stable':
-            self._timeout += 3600
-
         # Download the GTS auth key to the local temp directory.
         tmpdir = tempfile.mkdtemp()
         try:
@@ -144,6 +136,7 @@
                 test_name=test_name,
                 run_template=run_template,
                 retry_template=retry_template,
+                timeout=timeout,
                 target_module=target_module,
                 target_plan=target_plan,
                 needs_push_media=needs_push_media,