# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import datetime
import logging
import threading

import common
from autotest_lib.client.common_lib import error
from autotest_lib.client.common_lib import global_config
from autotest_lib.server import site_utils
from autotest_lib.server.cros import provision
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

try:
    from chromite.lib import metrics
except ImportError:
    metrics = site_utils.metrics_mock


CONFIG = global_config.global_config

JOB_MAX_RUNTIME_MINS_DEFAULT = CONFIG.get_config_value(
        'AUTOTEST_WEB', 'job_max_runtime_mins_default', type=int, default=72*60)

# Minimum RPC timeout setting for calls expected to take long time, e.g.,
# create_suite_job. If default socket time (socket.getdefaulttimeout()) is
# None or greater than this value, the default will be used.
# The value here is set to be the same as the timeout for the RetryingAFE object
# so long running RPCs can wait long enough before being aborted.
_MIN_RPC_TIMEOUT = 600

# Number of days back to search for existing job.
SEARCH_JOB_MAX_DAYS = 14

# Number of minutes to increase the value of DedupingScheduler.delay_minutes.
# This allows all suite jobs created in the same event to start provision jobs
# at different time. 5 minutes allows 40 boards to have provision jobs started
# with in about 200 minutes. That way, we don't add too much delay on test jobs
# and do not keep suite jobs running for too long. Note that suite jobs created
# by suite scheduler does not wait for test job to finish. That helps to reduce
# the load on drone.
DELAY_MINUTES_INTERVAL = 5
# Set maximum delay minutes to 24 hours. This is to prevent suite jobs from
# running for too long. Nightly and new_build tasks won't create that many
# suites that need such a long delay. However, weekly tasks can create several
# hundreds of suites as most of them requires to run on all branches.
MAX_DELAY_MINUTES = 1440

class DedupingSchedulerException(Exception):
    """Base class for exceptions from this module."""
    pass


class ScheduleException(DedupingSchedulerException):
    """Raised when an error is returned from the AFE during scheduling."""
    pass


class DedupException(DedupingSchedulerException):
    """Raised when an error occurs while checking for duplicate jobs."""
    pass


class DedupingScheduler(object):
    """A class that will schedule suites to run on a given board, build.

    Includes logic to check whether or not a given (suite, board, build)
    has already been run.  If so, it will skip scheduling that suite.

    @var _afe: a frontend.AFE instance used to talk to autotest.
    """

    _SUITE_SCHEDULER_SUITE_COUNT = metrics.Counter(
            'chromeos/autotest/suite_scheduler/suite/created')

    def __init__(self, afe=None, file_bug=False):
        """Constructor

        @param afe: an instance of AFE as defined in server/frontend.py.
                    Defaults to a frontend_wrappers.RetryingAFE instance.
        """
        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
                                                         delay_sec=10,
                                                         debug=False)
        self._file_bug = file_bug

        # Number of minutes to delay a suite job from creating test jobs.
        self.delay_minutes = 0
        # Number of minutes to increase of decrease self.delay_minutes. When
        # self.delay_minutes reaches MAX_DELAY_MINUTES, it should wind down
        # to allow even distribution of test job creation.
        self.delay_minutes_interval = DELAY_MINUTES_INTERVAL
        # Lock to make sure each suite created with different delay_minutes.
        self._lock = threading.Lock()


    def _ShouldScheduleSuite(self, suite, board, test_source_build):
        """Return True if |suite| has not yet been run for |build| on |board|.

        True if |suite| has not been run for |build| on |board|, and
        the lab is open for this particular request.  False otherwise.

        @param suite: the name of the suite to run, e.g. 'bvt'
        @param board: the board to run the suite on, e.g. x86-alex
        @param test_source_build: Build with the source of tests.

        @return False if the suite was already scheduled, True if not
        @raise DedupException if the AFE raises while searching for jobs.

        """
        try:
            site_utils.check_lab_status(test_source_build)
        except site_utils.TestLabException as ex:
            logging.debug('Skipping suite %s, board %s, build %s:  %s',
                          suite, board, test_source_build, str(ex))
            return False
        try:
            start_time = str(datetime.datetime.now() -
                             datetime.timedelta(days=SEARCH_JOB_MAX_DAYS))
            return not self._afe.get_jobs(
                    name__istartswith=test_source_build,
                    name__iendswith='control.'+suite,
                    created_on__gte=start_time,
                    min_rpc_timeout=_MIN_RPC_TIMEOUT)
        except Exception as e:
            raise DedupException(e)


    def _Schedule(self, suite, board, build, pool, num, priority, timeout,
                  file_bugs=False, cheets_build=None, firmware_rw_build=None,
                  firmware_ro_build=None, test_source_build=None,
                  job_retry=False, launch_control_build=None,
                  run_prod_code=False, testbed_dut_count=None, no_delay=False):
        """Schedule |suite|, if it hasn't already been run.

        @param suite: the name of the suite to run, e.g. 'bvt'
        @param board: the board to run the suite on, e.g. x86-alex
        @param build: the ChromeOS build to install e.g.
                      x86-alex-release/R18-1655.0.0-a1-b1584.
        @param pool: the pool of machines to use for scheduling purposes.
                     Default: None
        @param num: the number of devices across which to shard the test suite.
                    Type: integer or None
                    Default: None (uses sharding factor in global_config.ini).
        @param priority: One of the values from
                         client.common_lib.priorities.Priority.
        @param timeout: The max lifetime of the suite in hours.
        @param file_bugs: True if bug filing is desired for this suite.
        @param cheets_build: CrOS Android build to be used for testing.
                             Default to None.
        @param firmware_rw_build: Firmware build to update RW firmware. Default
                                  to None.
        @param firmware_ro_build: Firmware build to update RO firmware. Default
                                  to None.
        @param test_source_build: Build that contains the server-side test code.
                                  Default to None to use the ChromeOS build
                                  (defined by `build`).
        @param job_retry: Set to True to enable job-level retry. Default is
                          False.
        @param launch_control_build: Name of a Launch Control build, e.g.,
                                     'git_mnc_release/shamu-eng/123'
        @param run_prod_code: If True, the suite will run the test code that
                              lives in prod aka the test code currently on the
                              lab servers. If False, the control files and test
                              code for this suite run will be retrieved from the
                              build artifacts. Default is False.
        @param testbed_dut_count: Number of duts to test when using a testbed.
        @param no_delay: Set to True to allow suite to be created without
                         configuring delay_minutes. Default is False.

        @return True if the suite got scheduled
        @raise ScheduleException if an error occurs while scheduling.

        """
        try:
            if build:
                builds = {provision.CROS_VERSION_PREFIX: build}
            if cheets_build:
                builds[provision.CROS_ANDROID_VERSION_PREFIX] = cheets_build
            if firmware_rw_build:
                builds[provision.FW_RW_VERSION_PREFIX] = firmware_rw_build
            if firmware_ro_build:
                builds[provision.FW_RO_VERSION_PREFIX] = firmware_ro_build
            if launch_control_build:
                if testbed_dut_count is None:
                    builds = {provision.ANDROID_BUILD_VERSION_PREFIX:
                              launch_control_build}
                else:
                    builds = {provision.TESTBED_BUILD_VERSION_PREFIX:
                              launch_control_build}

            # Suite scheduler handles all boards in parallel, to guarantee each
            # call of `create_suite_job` use different value of delay_minutes,
            # we need a lock around get/set attempts of self.delay_minutes.
            # To prevent suite jobs from running too long, the value for
            # self.delay_minutes is limited between 0 and MAX_DELAY_MINUTES (4
            # hours). The value starts at 0 and is increased by
            # DELAY_MINUTES_INTERVAL, when it reaches MAX_DELAY_MINUTES, the
            # logic here allows its value to step back by DELAY_MINUTES_INTERVAL
            # at each call of this method. When the value drops back to 0, it
            # will increase again in the next call of this method.
            # Such logic allows the values of delay_minutes for all calls
            # of `create_suite_job` running in parallel to be evenly distributed
            # between 0 and MAX_DELAY_MINUTES.
            delay_minutes = 0
            if not no_delay:
                with self._lock:
                    delay_minutes = self.delay_minutes
                    if ((self.delay_minutes < MAX_DELAY_MINUTES and
                         self.delay_minutes_interval > 0) or
                        (self.delay_minutes >= DELAY_MINUTES_INTERVAL and
                         self.delay_minutes_interval < 0)):
                        self.delay_minutes += self.delay_minutes_interval
                    else:
                        limit = ('Maximum' if self.delay_minutes_interval > 0
                                 else 'Minimum')
                        logging.info(
                                '%s delay minutes reached when scheduling '
                                '%s on %s against %s (pool: %s)',
                                limit, suite, builds, board, pool)
                        self.delay_minutes_interval = (
                                -self.delay_minutes_interval)

            # Update timeout settings for the suite job with delay_minutes.
            # `timeout` is in hours.
            if not timeout:
                timeout = JOB_MAX_RUNTIME_MINS_DEFAULT / 60.0
            timeout += delay_minutes / 60.0
            max_runtime_mins = JOB_MAX_RUNTIME_MINS_DEFAULT + delay_minutes
            timeout_mins = JOB_MAX_RUNTIME_MINS_DEFAULT + delay_minutes

            logging.info('Scheduling %s on %s against %s (pool: %s)...',
                         suite, builds, board, pool)
            job_id = self._afe.run('create_suite_job', name=suite, board=board,
                                   builds=builds, check_hosts=False, num=num,
                                   pool=pool, priority=priority,
                                   timeout=timeout,
                                   max_runtime_mins=max_runtime_mins,
                                   timeout_mins=timeout_mins,
                                   file_bugs=file_bugs,
                                   wait_for_results=file_bugs,
                                   test_source_build=test_source_build,
                                   job_retry=job_retry,
                                   delay_minutes=delay_minutes,
                                   run_prod_code=run_prod_code,
                                   min_rpc_timeout=_MIN_RPC_TIMEOUT)
            if job_id is not None:
                logging.info('... created as suite job id %s', job_id)
                # Report data to metrics.
                fields = {'suite': suite,
                          'board': board,
                          'pool': pool,
                          'priority': str(priority)}
                self._SUITE_SCHEDULER_SUITE_COUNT.increment(fields=fields)
                return True
            else:
                raise ScheduleException(
                        "Can't schedule %s for %s." % (suite, builds))
        except Exception as e:
            raise ScheduleException(e)


    def ScheduleSuite(self, suite, board, build, pool, num, priority, timeout,
                      force=False, file_bugs=False, cheets_build=None,
                      firmware_rw_build=None, firmware_ro_build=None,
                      test_source_build=None, job_retry=False,
                      launch_control_build=None, run_prod_code=False,
                      testbed_dut_count=None, no_delay=False):
        """Schedule |suite|, if it hasn't already been run.

        If |suite| has not already been run against |build| on |board|,
        schedule it and return True.  If it has, return False.

        @param suite: the name of the suite to run, e.g. 'bvt'
        @param board: the board to run the suite on, e.g. x86-alex
        @param build: the ChromeOS build to install e.g.
                      x86-alex-release/R18-1655.0.0-a1-b1584.
        @param pool: the pool of machines to use for scheduling purposes.
        @param num: the number of devices across which to shard the test suite.
                    Type: integer or None
        @param priority: One of the values from
                         client.common_lib.priorities.Priority.
        @param timeout: The max lifetime of the suite in hours.
        @param force: Always schedule the suite.
        @param file_bugs: True if bug filing is desired for this suite.
        @param cheets_build: CrOS Android build to be used for testing.
                             Default to None.
        @param firmware_rw_build: Firmware build to update RW firmware. Default
                                  to None.
        @param firmware_ro_build: Firmware build to update RO firmware. Default
                                  to None.
        @param test_source_build: Build with the source of tests. Default to
                                  None to use the ChromeOS build.
        @param job_retry: Set to True to enable job-level retry. Default is
                          False.
        @param launch_control_build: Name of a Launch Control build, e.g.,
                                     'git_mnc_release/shamu-eng/123'
        @param run_prod_code: If True, the suite will run the test code that
                              lives in prod aka the test code currently on the
                              lab servers. If False, the control files and test
                              code for this suite run will be retrieved from the
                              build artifacts. Default is False.
        @param testbed_dut_count: Number of duts to test when using a testbed.
        @param no_delay: Set to True to allow suite to be created without
                configuring delay_minutes. Default is False.

        @return True if the suite got scheduled, False if not
        @raise DedupException if we can't check for dups.
        @raise ScheduleException if the suite cannot be scheduled.

        """
        if (force or self._ShouldScheduleSuite(
                suite, board,
                test_source_build or build or launch_control_build)):
            return self._Schedule(suite, board, build, pool, num, priority,
                                  timeout, file_bugs=file_bugs,
                                  cheets_build=cheets_build,
                                  firmware_rw_build=firmware_rw_build,
                                  firmware_ro_build=firmware_ro_build,
                                  test_source_build=test_source_build,
                                  job_retry=job_retry,
                                  launch_control_build=launch_control_build,
                                  run_prod_code=run_prod_code,
                                  testbed_dut_count=testbed_dut_count,
                                  no_delay=no_delay)
        return False


    def CheckHostsExist(self, *args, **kwargs):
        """Forward a request to check if hosts matching args, kwargs exist."""
        try:
            kwargs['min_rpc_timeout'] = _MIN_RPC_TIMEOUT
            return self._afe.get_hostnames(*args, **kwargs)
        except error.TimeoutException as e:
            logging.exception(e)
            return []
